Coverage for src / lilbee / crawler / models.py: 100%
33 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Backend-agnostic value types crossing the runner/fetcher seam."""
3from __future__ import annotations
5import threading
6from dataclasses import dataclass, field
7from typing import TypeAlias
10@dataclass
11class CrawlResult:
12 """Outcome of crawling a single URL.
14 This is the high-level result surfaced to lilbee callers
15 (CLI, MCP, HTTP, TUI). The adapter produces ``FetchedPage``
16 and the orchestration layer converts it to ``CrawlResult``
17 when returning up to the caller.
18 """
20 url: str
21 markdown: str = ""
22 success: bool = True
23 error: str | None = None
26@dataclass
27class FetchedPage:
28 """Single page produced by a ``WebFetcher`` backend.
30 Distinct from :class:`CrawlResult` so the adapter surface
31 stays narrow and neutral: just the bytes we needed out of
32 the underlying SDK's response object.
33 """
35 url: str
36 markdown: str = ""
37 success: bool = True
38 error: str | None = None
39 links: list[str] = field(default_factory=list)
42@dataclass
43class ConcurrencySpec:
44 """Backend-agnostic concurrency + rate-limit knobs.
46 The crawl4ai adapter translates these into ``RateLimiter`` and
47 ``SemaphoreDispatcher`` calls; a future adapter with its own
48 BFS loop maps them onto ``asyncio.Semaphore`` + retry logic.
49 """
51 semaphore_count: int = 1
52 mean_delay: float = 0.0
53 max_delay_range: float = 0.0
54 retry_on_rate_limit: bool = False
55 retry_base_delay_min: float = 0.0
56 retry_base_delay_max: float = 0.0
57 retry_max_backoff: float = 0.0
58 retry_max_attempts: int = 0
61@dataclass
62class FilterSpec:
63 """Backend-agnostic filter settings applied to discovered links.
65 Pure Python data; each adapter decides how to plug the settings
66 into its own filter pipeline.
67 """
69 exclude_patterns: list[str] = field(default_factory=list)
70 include_subdomains: bool = False
73CancelToken: TypeAlias = threading.Event
74"""Cancellation handle the orchestration layer passes to a fetcher.
76An already-``set()`` event means "stop as soon as you can". The
77crawl4ai adapter polls it in both its streaming loop and its BFS
78strategy's ``should_cancel`` hook; a future adapter can poll it
79in whatever granularity it supports.
80"""