Coverage for src / lilbee / server / handlers / crawl.py: 100%
22 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Crawl streaming handler."""
3from __future__ import annotations
5import asyncio
6from collections.abc import AsyncGenerator
7from pathlib import Path
9from lilbee.server.handlers.sse import SseStream, sse_done, sse_error
12async def crawl_stream(
13 url: str, depth: int | None = None, max_pages: int | None = None
14) -> AsyncGenerator[str, None]:
15 """Stream crawl progress as SSE events.
16 Emits crawl_start, crawl_page, crawl_done events, then a final done event
17 with the list of files written. On error emits crawl_error.
18 Sets a cancel event on client disconnect so the crawl stops between pages.
20 On first use, Chromium isn't installed yet. The stream inlines
21 setup_start/progress/done events before the crawl begins so a stream
22 consumer can render a matching 'setup' progress indicator.
23 """
24 sse = SseStream()
26 async def _run_crawl() -> list[Path]:
27 from lilbee.crawler import crawl_and_save
29 # crawl_and_save runs the Chromium bootstrap itself on first use,
30 # relaying setup_* events through the same on_progress callback
31 # so the SSE stream carries them before any crawl_* events.
32 try:
33 return await crawl_and_save(
34 url, depth=depth, max_pages=max_pages, on_progress=sse.callback, cancel=sse.cancel
35 )
36 finally:
37 sse.queue.put_nowait(None)
39 task = asyncio.create_task(_run_crawl())
40 async for event in sse.drain(task, "Crawl stream"):
41 yield event
42 if not sse.cancel.is_set() and task.done() and not task.cancelled():
43 exc = task.exception()
44 if exc is not None:
45 yield sse_error(str(exc))
46 return
47 paths = task.result()
48 yield sse_done({"files_written": [str(p) for p in paths]})