Coverage for src / lilbee / server / routes / documents.py: 100%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""Document management route handlers: add, list, remove, sync.""" 

2 

3from __future__ import annotations 

4 

5from litestar import Request, Response, get, post 

6from litestar.exceptions import ValidationException 

7from litestar.params import Parameter 

8from litestar.response import Stream 

9from pydantic import BaseModel, Field 

10 

11from lilbee.server import handlers 

12from lilbee.server.auth import read_only 

13from lilbee.server.models import ( 

14 AddRequest, 

15 DocumentListResponse, 

16 DocumentRemoveResponse, 

17 SyncRequest, 

18) 

19 

20 

21class RemoveRequest(BaseModel): 

22 """Request body for /api/documents/remove.""" 

23 

24 names: list[str] = Field(max_length=100) 

25 delete_files: bool = False 

26 

27 

28@post("/api/sync") 

29async def sync_route(data: SyncRequest | None = None) -> Stream: 

30 """Re-index changed documents with streaming SSE progress events. 

31 

32 Pass ``{"force_rebuild": true}`` to wipe the store and re-ingest every file 

33 under the current ``cfg.embedding_model``. This is the recovery path after 

34 a ``PUT /api/models/embedding`` that returned ``reindex_required=true``. 

35 Pass ``{"retry_skipped": true}`` for the lighter path: retry the files that 

36 failed a previous sync without dropping the store. 

37 """ 

38 enable_ocr = data.enable_ocr if data else None 

39 force_rebuild = data.force_rebuild if data else False 

40 retry_skipped = data.retry_skipped if data else False 

41 return Stream( 

42 handlers.sync_stream( 

43 enable_ocr=enable_ocr, force_rebuild=force_rebuild, retry_skipped=retry_skipped 

44 ), 

45 media_type="text/event-stream", 

46 ) 

47 

48 

49@post("/api/add") 

50async def add_route(data: AddRequest) -> Stream: 

51 """Add files to the knowledge base with streaming SSE progress.""" 

52 try: 

53 handlers.validate_add_paths(data.model_dump()) 

54 except ValueError as exc: 

55 raise ValidationException(str(exc)) from exc 

56 return Stream( 

57 handlers.add_files_stream(data.model_dump()), 

58 media_type="text/event-stream", 

59 status_code=201, 

60 ) 

61 

62 

63@get("/api/documents") 

64@read_only 

65async def documents_list_route( 

66 search: str = Parameter(query="search", default=""), 

67 limit: int = Parameter(query="limit", default=50, le=1000), 

68 offset: int = Parameter(query="offset", default=0, ge=0), 

69) -> DocumentListResponse: 

70 """List indexed documents with metadata, paginated and searchable.""" 

71 return await handlers.list_documents(search=search, limit=limit, offset=offset) 

72 

73 

74@post("/api/documents/remove") 

75async def documents_remove_route(data: RemoveRequest) -> DocumentRemoveResponse: 

76 """Remove documents from the knowledge base by source name.""" 

77 return await handlers.delete_documents(data.names, delete_files=data.delete_files) 

78 

79 

80@get("/api/export") 

81@read_only 

82async def export_route( 

83 fmt: str = Parameter(query="format", default=""), 

84 source: str = Parameter(query="source", default=""), 

85) -> Response[bytes]: 

86 """Download the per-page text dataset as a file (parquet by default).""" 

87 from lilbee.app.dataset import DatasetError, export_to_bytes 

88 

89 try: 

90 payload = export_to_bytes(fmt, source or None) 

91 except DatasetError as exc: 

92 raise ValidationException(str(exc)) from exc 

93 return Response( 

94 content=payload.data, 

95 media_type="application/octet-stream", 

96 headers={"content-disposition": f'attachment; filename="pages.{payload.fmt}"'}, 

97 ) 

98 

99 

100@post("/api/import") 

101async def import_route( 

102 request: Request, 

103 fmt: str = Parameter(query="format", default=""), 

104) -> Stream: 

105 """Import an uploaded per-page dataset with streaming SSE progress events. 

106 

107 The request body is the raw dataset bytes; ``?format=parquet|jsonl`` is 

108 required since there is no filename to infer from. Bounded by the server's 

109 body-size limit; larger datasets use the path-based CLI/MCP import. 

110 """ 

111 from lilbee.app.dataset import DatasetError, require_format 

112 

113 try: 

114 require_format(fmt) 

115 except DatasetError as exc: 

116 raise ValidationException(str(exc)) from exc 

117 return Stream( 

118 handlers.import_stream(await request.body(), fmt), 

119 media_type="text/event-stream", 

120 status_code=201, 

121 )