Coverage for src / lilbee / server / handlers / crawl.py: 100%
23 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Crawl streaming handler."""
3from __future__ import annotations
5import asyncio
6from collections.abc import AsyncGenerator
7from pathlib import Path
9from lilbee.core.config.enums import CrawlRenderMode
10from lilbee.server.handlers.sse import SseStream, sse_done, sse_error
13async def crawl_stream(
14 url: str,
15 depth: int | None = None,
16 max_pages: int | None = None,
17 render_mode: CrawlRenderMode | None = None,
18) -> AsyncGenerator[str, None]:
19 """Stream crawl progress as SSE events.
20 Emits crawl_start, crawl_page, crawl_done events, then a final done event
21 with the list of files written. On error emits crawl_error.
22 Sets a cancel event on client disconnect so the crawl stops between pages.
24 On first use, Chromium isn't installed yet. The stream inlines
25 setup_start/progress/done events before the crawl begins so a stream
26 consumer can render a matching 'setup' progress indicator.
27 """
28 sse = SseStream()
30 async def _run_crawl() -> list[Path]:
31 from lilbee.crawler import crawl_and_save
33 # crawl_and_save runs the Chromium bootstrap itself on first use,
34 # relaying setup_* events through the same on_progress callback
35 # so the SSE stream carries them before any crawl_* events.
36 try:
37 return await crawl_and_save(
38 url,
39 depth=depth,
40 max_pages=max_pages,
41 on_progress=sse.callback,
42 cancel=sse.cancel,
43 render_mode=render_mode,
44 )
45 finally:
46 sse.queue.put_nowait(None)
48 task = asyncio.create_task(_run_crawl())
49 async for event in sse.drain(task, "Crawl stream"):
50 yield event
51 if not sse.cancel.is_set() and task.done() and not task.cancelled():
52 exc = task.exception()
53 if exc is not None:
54 yield sse_error(str(exc))
55 return
56 paths = task.result()
57 yield sse_done({"files_written": [str(p) for p in paths]})