Coverage for src / lilbee / server / routes / documents.py: 100%
49 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Document management route handlers: add, list, remove, sync."""
3from __future__ import annotations
5from litestar import Request, Response, get, post
6from litestar.exceptions import ValidationException
7from litestar.params import Parameter
8from litestar.response import Stream
9from pydantic import BaseModel, Field
11from lilbee.server import handlers
12from lilbee.server.auth import read_only
13from lilbee.server.models import (
14 AddRequest,
15 DocumentListResponse,
16 DocumentRemoveResponse,
17 SyncRequest,
18)
21class RemoveRequest(BaseModel):
22 """Request body for /api/documents/remove."""
24 names: list[str] = Field(max_length=100)
25 delete_files: bool = False
28@post("/api/sync")
29async def sync_route(data: SyncRequest | None = None) -> Stream:
30 """Re-index changed documents with streaming SSE progress events.
32 Pass ``{"force_rebuild": true}`` to wipe the store and re-ingest every file
33 under the current ``cfg.embedding_model``. This is the recovery path after
34 a ``PUT /api/models/embedding`` that returned ``reindex_required=true``.
35 Pass ``{"retry_skipped": true}`` for the lighter path: retry the files that
36 failed a previous sync without dropping the store.
37 """
38 enable_ocr = data.enable_ocr if data else None
39 force_rebuild = data.force_rebuild if data else False
40 retry_skipped = data.retry_skipped if data else False
41 return Stream(
42 handlers.sync_stream(
43 enable_ocr=enable_ocr, force_rebuild=force_rebuild, retry_skipped=retry_skipped
44 ),
45 media_type="text/event-stream",
46 )
49@post("/api/add")
50async def add_route(data: AddRequest) -> Stream:
51 """Add files to the knowledge base with streaming SSE progress."""
52 try:
53 handlers.validate_add_paths(data.model_dump())
54 except ValueError as exc:
55 raise ValidationException(str(exc)) from exc
56 return Stream(
57 handlers.add_files_stream(data.model_dump()),
58 media_type="text/event-stream",
59 status_code=201,
60 )
63@get("/api/documents")
64@read_only
65async def documents_list_route(
66 search: str = Parameter(query="search", default=""),
67 limit: int = Parameter(query="limit", default=50, le=1000),
68 offset: int = Parameter(query="offset", default=0, ge=0),
69) -> DocumentListResponse:
70 """List indexed documents with metadata, paginated and searchable."""
71 return await handlers.list_documents(search=search, limit=limit, offset=offset)
74@post("/api/documents/remove")
75async def documents_remove_route(data: RemoveRequest) -> DocumentRemoveResponse:
76 """Remove documents from the knowledge base by source name."""
77 return await handlers.delete_documents(data.names, delete_files=data.delete_files)
80@get("/api/export")
81@read_only
82async def export_route(
83 fmt: str = Parameter(query="format", default=""),
84 source: str = Parameter(query="source", default=""),
85) -> Response[bytes]:
86 """Download the per-page text dataset as a file (parquet by default)."""
87 from lilbee.app.dataset import DatasetError, export_to_bytes
89 try:
90 payload = export_to_bytes(fmt, source or None)
91 except DatasetError as exc:
92 raise ValidationException(str(exc)) from exc
93 return Response(
94 content=payload.data,
95 media_type="application/octet-stream",
96 headers={"content-disposition": f'attachment; filename="pages.{payload.fmt}"'},
97 )
100@post("/api/import")
101async def import_route(
102 request: Request,
103 fmt: str = Parameter(query="format", default=""),
104) -> Stream:
105 """Import an uploaded per-page dataset with streaming SSE progress events.
107 The request body is the raw dataset bytes; ``?format=parquet|jsonl`` is
108 required since there is no filename to infer from. Bounded by the server's
109 body-size limit; larger datasets use the path-based CLI/MCP import.
110 """
111 from lilbee.app.dataset import DatasetError, require_format
113 try:
114 require_format(fmt)
115 except DatasetError as exc:
116 raise ValidationException(str(exc)) from exc
117 return Stream(
118 handlers.import_stream(await request.body(), fmt),
119 media_type="text/event-stream",
120 status_code=201,
121 )