Coverage for src / lilbee / crawler / discovery.py: 100%
7 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""URL discovery: build backend-neutral concurrency and filter specs from ``cfg``."""
3from __future__ import annotations
5from lilbee.core.config import cfg
6from lilbee.crawler.models import ConcurrencySpec, FilterSpec
9def build_concurrency_spec() -> ConcurrencySpec:
10 """Snapshot the crawl-concurrency settings from ``cfg`` into a spec."""
11 return ConcurrencySpec(
12 semaphore_count=cfg.crawl_concurrent_requests,
13 mean_delay=cfg.crawl_mean_delay,
14 max_delay_range=cfg.crawl_max_delay_range,
15 retry_on_rate_limit=cfg.crawl_retry_on_rate_limit,
16 retry_base_delay_min=cfg.crawl_retry_base_delay_min,
17 retry_base_delay_max=cfg.crawl_retry_base_delay_max,
18 retry_max_backoff=cfg.crawl_retry_max_backoff,
19 retry_max_attempts=cfg.crawl_retry_max_attempts,
20 )
23def build_filter_spec(*, include_subdomains: bool) -> FilterSpec:
24 """Snapshot the filter settings from ``cfg`` + caller flags."""
25 return FilterSpec(
26 exclude_patterns=list(cfg.crawl_exclude_patterns),
27 include_subdomains=include_subdomains,
28 )