Coverage for src / lilbee / crawler / discovery.py: 100%

7 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""URL discovery: build backend-neutral concurrency and filter specs from ``cfg``.""" 

2 

3from __future__ import annotations 

4 

5from lilbee.core.config import cfg 

6from lilbee.crawler.models import ConcurrencySpec, FilterSpec 

7 

8 

9def build_concurrency_spec() -> ConcurrencySpec: 

10 """Snapshot the crawl-concurrency settings from ``cfg`` into a spec.""" 

11 return ConcurrencySpec( 

12 semaphore_count=cfg.crawl_concurrent_requests, 

13 mean_delay=cfg.crawl_mean_delay, 

14 max_delay_range=cfg.crawl_max_delay_range, 

15 retry_on_rate_limit=cfg.crawl_retry_on_rate_limit, 

16 retry_base_delay_min=cfg.crawl_retry_base_delay_min, 

17 retry_base_delay_max=cfg.crawl_retry_base_delay_max, 

18 retry_max_backoff=cfg.crawl_retry_max_backoff, 

19 retry_max_attempts=cfg.crawl_retry_max_attempts, 

20 ) 

21 

22 

23def build_filter_spec(*, include_subdomains: bool) -> FilterSpec: 

24 """Snapshot the filter settings from ``cfg`` + caller flags.""" 

25 return FilterSpec( 

26 exclude_patterns=list(cfg.crawl_exclude_patterns), 

27 include_subdomains=include_subdomains, 

28 )