Coverage for src / lilbee / crawler / __init__.py: 100%
11 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Web crawling: fetch pages as markdown and save them to the documents directory."""
3from __future__ import annotations
5import os
7from lilbee.crawler.bootstrap import (
8 CrawlerBackendError,
9 CrawlerBrowserError,
10 bootstrap_chromium,
11 chromium_installed,
12 crawler_browsers_path,
13)
14from lilbee.crawler.crawl4ai_fetcher import crawler_available
15from lilbee.crawler.fetcher import WebFetcher
16from lilbee.crawler.models import (
17 CancelToken,
18 ConcurrencySpec,
19 CrawlResult,
20 FetchedPage,
21 FilterSpec,
22)
23from lilbee.crawler.runner import (
24 crawl_and_save,
25 crawl_recursive,
26 crawl_single,
27)
28from lilbee.crawler.save import (
29 METADATA_FLUSH_INTERVAL,
30 CrawlMeta,
31 content_hash,
32 load_crawl_metadata,
33 save_crawl_metadata,
34 url_to_filename,
35)
36from lilbee.crawler.url_filter import (
37 get_blocked_networks,
38 is_url,
39 require_valid_crawl_url,
40 validate_crawl_url,
41)
43__all__ = [
44 "METADATA_FLUSH_INTERVAL",
45 "CancelToken",
46 "ConcurrencySpec",
47 "CrawlMeta",
48 "CrawlResult",
49 "CrawlerBackendError",
50 "CrawlerBrowserError",
51 "FetchedPage",
52 "FilterSpec",
53 "WebFetcher",
54 "bootstrap_chromium",
55 "chromium_installed",
56 "content_hash",
57 "crawl_and_save",
58 "crawl_recursive",
59 "crawl_single",
60 "crawler_available",
61 "crawler_browsers_path",
62 "get_blocked_networks",
63 "is_url",
64 "load_crawl_metadata",
65 "require_valid_crawl_url",
66 "save_crawl_metadata",
67 "url_to_filename",
68 "validate_crawl_url",
69]
71# Pin Playwright's browser cache so install and launch agree on Chromium's
72# location, regardless of wheel vs frozen-binary layout.
73os.environ.setdefault("PLAYWRIGHT_BROWSERS_PATH", str(crawler_browsers_path()))