Coverage for src/lilbee/server/models.py: 100%

1"""Request and response models for the lilbee HTTP API.

3Typed pydantic models so Litestar's OpenAPI schema has field-level detail.

4"""

6from __future__ import annotations

8from typing import Any, Literal

10from pydantic import BaseModel, Field, field_validator

12from lilbee.catalog.types import ModelSource, ModelTask

13from lilbee.data.store import SearchScope

14from lilbee.runtime.hardware import FitLevel, SizeVariantInfo

16_VALID_CHUNK_TYPES = frozenset({SearchScope.RAW.value, SearchScope.WIKI.value})

19def _validate_chunk_type(value: str | None) -> str | None:

20 """Reject unknown ``chunk_type`` values at the HTTP boundary.

22 Matches the CLI/MCP behaviour: only ``"raw"`` or ``"wiki"`` filter the

23 pool; everything else (including ``None`` and the UI-side ``"both"``)

24 means no filter.

25 """

26 if value is None or value == SearchScope.BOTH.value:

27 return None

28 if value not in _VALID_CHUNK_TYPES:

29 raise ValueError(

30 f"chunk_type must be one of 'raw', 'wiki', 'both', or omitted; got {value!r}"

31 )

32 return value

35class AskRequest(BaseModel):

36 """Request body for /api/ask."""

38 question: str

39 top_k: int = Field(default=0, le=100)

40 options: dict[str, Any] | None = None

41 chunk_type: str | None = None

43 @field_validator("chunk_type")

44 @classmethod

45 def _check_chunk_type(cls, v: str | None) -> str | None:

46 return _validate_chunk_type(v)

49class ChatRequest(BaseModel):

50 """Request body for /api/chat."""

52 question: str

53 history: list[ChatMessage] = []

54 top_k: int = Field(default=0, le=100)

55 options: dict[str, Any] | None = None

56 chunk_type: str | None = None

58 @field_validator("chunk_type")

59 @classmethod

60 def _check_chunk_type(cls, v: str | None) -> str | None:

61 return _validate_chunk_type(v)

64class SyncRequest(BaseModel):

65 """Request body for /api/sync.

67 ``force_rebuild`` triggers a full drop-and-reingest equivalent to ``lilbee rebuild``.

68 Use it to recover from an embedding-model switch (when the store refuses search

69 or ingest because ``cfg.embedding_model`` no longer matches the persisted vectors).

70 ``retry_skipped`` is the lighter recovery: it clears the markers for files that

71 failed a previous sync (Tesseract timeout, decode failure, no usable text) so this

72 sync attempts them again, without dropping the existing store. The default is an

73 incremental sync.

74 """

76 enable_ocr: bool | None = None

77 force_rebuild: bool = False

78 retry_skipped: bool = False

81class AddRequest(BaseModel):

82 """Request body for /api/add."""

84 paths: list[str]

85 force: bool = False

86 enable_ocr: bool | None = None

87 ocr_timeout: float | None = None

90class SetModelRequest(BaseModel):

91 """Request body for /api/models/chat."""

93 model: str

96class SourceContentResponse(BaseModel):

97 """JSON body for ``GET /api/source`` (``raw=0``); empty ``markdown`` for binary types."""

99 markdown: str

100 content_type: str

101 title: str | None = None

102

103

104class ChatMessage(BaseModel):

105 """A single message in a chat conversation."""

106

107 role: Literal["user", "assistant"]

108 content: str

109

110

111class CleanedChunk(BaseModel):

112 """A search result chunk with vector stripped and distance renamed."""

113

114 source: str

115 content_type: str

116 chunk: str

117 distance: float | None = None

118 relevance_score: float | None = None

119 page_start: int = 0

120 page_end: int = 0

121 line_start: int = 0

122 line_end: int = 0

123 chunk_index: int = 0

124 # Vault-relative path when ``cfg.vault_base`` is set and the source file

125 # lives inside the vault. Absent when the server is running headless or

126 # the source isn't resolvable as a vault file. Clients use this to open

127 # the source in a native editor instead of fetching ``/api/source``.

128 vault_path: str | None = None

129

130

131class StatusSourceInfo(BaseModel):

132 """A single indexed source in a status response."""

133

134 filename: str

135 file_hash: str

136 chunk_count: int

137 ingested_at: str

138

139

140class StatusConfigInfo(BaseModel):

141 """Configuration section of a status response.

142

143 Exposes all four role-bound model fields so plugins/TUI can show

144 what's active per role without a second round trip.

145 """

146

147 documents_dir: str

148 data_dir: str

149 chat_model: str

150 embedding_model: str

151 vision_model: str = ""

152 reranker_model: str = ""

153 enable_ocr: bool | None = None

154

155

156class StatusResponse(BaseModel):

157 """Response for GET /api/status."""

158

159 command: str = "status"

160 config: StatusConfigInfo

161 sources: list[StatusSourceInfo]

162 total_chunks: int

163

164

165class HealthResponse(BaseModel):

166 """Response for /api/health."""

167

168 status: str

169 version: str

170

171

172class AskResponse(BaseModel):

173 """Response for /api/ask and /api/chat."""

174

175 answer: str

176 sources: list[CleanedChunk]

177

178

179class SetModelResponse(BaseModel):

180 """Response for PUT /api/models/{chat|embedding|vision|reranker}.

181

182 ``reindex_required`` is ``True`` only when the new embedding model differs from

183 the model that built the persisted vector store. The chat, vision, and reranker

184 handlers always return ``False`` because their changes do not invalidate stored

185 vectors. Mirrors the ``reindex_required`` flag on ``ConfigUpdateResponse``.

186 """

187

188 model: str

189 reindex_required: bool = False

190

191

192class ConfigUpdateResponse(BaseModel):

193 """Response for PATCH /api/config."""

194

195 updated: list[str]

196 reindex_required: bool

197

198

199class CrawlRequest(BaseModel):

200 """Request body for /api/crawl.

201

202 depth: null / omitted = whole-site unbounded recursion. 0 = single URL

203 only. Positive int = max depth. max_pages: null / omitted = no cap.

204 Positive int = explicit page cap.

205 """

206

207 url: str

208 depth: int | None = Field(default=None, ge=0)

209 max_pages: int | None = Field(default=None, ge=1)

210

211

212class DocumentInfo(BaseModel):

213 """A single indexed document in a list response."""

214

215 filename: str

216 chunk_count: int = 0

217 ingested_at: str = ""

218

219

220class DocumentListResponse(BaseModel):

221 """Response for GET /api/documents."""

222

223 documents: list[DocumentInfo]

224 total: int

225 limit: int

226 offset: int

227 has_more: bool = False

228

229

230class DocumentRemoveResponse(BaseModel):

231 """Response for POST /api/documents/remove."""

232

233 removed: list[str]

234 not_found: list[str]

235

236

237class ConfigResponse(BaseModel):

238 """Response for GET /api/config."""

239

240 model_config = {"extra": "allow"}

241

242

243class ModelsShowResponse(BaseModel):

244 """Response for POST /api/models/show."""

245

246 model_config = {"extra": "allow"}

247

248

249class CatalogEntryResponse(BaseModel):

250 """A single model in the catalog browser.

251

252 ``fit`` and ``size_variants`` carry server-computed hardware-fit

253 data so clients (TUI, plugin) can render fit chips and size strips

254 without probing local memory themselves. ``fit`` is ``None`` when

255 the row's footprint cannot be assessed against host memory (e.g.

256 a future cloud-only entry whose weights live off-host).

257 """

258

259 hf_repo: str

260 gguf_filename: str

261 task: ModelTask

262 display_name: str

263 param_count: str

264 size_gb: float

265 min_ram_gb: float

266 description: str

267 quality_tier: str

268 featured: bool

269 downloads: int

270 installed: bool

271 source: ModelSource

272 fit: FitLevel | None = None

273 size_variants: list[SizeVariantInfo] = []

274

275

276class ModelsCatalogResponse(BaseModel):

277 """Response for GET /api/models/catalog."""

278

279 total: int

280 limit: int

281 offset: int

282 models: list[CatalogEntryResponse]

283 has_more: bool = False

284

285

286class InstalledModelEntry(BaseModel):

287 """A single installed model."""

288

289 name: str

290 source: ModelSource

291

292

293class ModelsInstalledResponse(BaseModel):

294 """Response for GET /api/models/installed."""

295

296 models: list[InstalledModelEntry]

297

298

299class ModelsDeleteResponse(BaseModel):

300 """Response for DELETE /api/models/{model}."""

301

302 deleted: bool

303 model: str

304 freed_gb: float

305

306

307class ExternalModelsResponse(BaseModel):

308 """Response for GET /api/models/external."""

309

310 models: list[str]

311 error: str | None = None

312

313

314class SyncSummary(BaseModel):

315 """Embedded sync result within an add-files response."""

316

317 added: list[str] = []

318 updated: list[str] = []

319 removed: list[str] = []

320 unchanged: int = 0

321 failed: list[str] = []

322 skipped: list[str] = []

323

324

325class AddSummary(BaseModel):

326 """Summary returned by the add-files handler."""

327

328 copied: list[str]

329 skipped: list[str]

330 errors: list[str]

331 sync: SyncSummary | None = None

332

333

334class WikiPageSummary(BaseModel):

335 """Summary of a wiki page for list endpoints."""

336

337 slug: str

338 title: str = ""

339 page_type: str = "unknown"

340 source_count: int = 0

341 created_at: str = ""

342

343

344class WikiCitationRecord(BaseModel):

345 """A citation record from the store, used in reverse lookup responses."""

346

347 wiki_source: str = ""

348 wiki_chunk_index: int = 0

349 citation_key: str = ""

350 claim_type: str = "fact"

351 source_filename: str = ""

352 source_hash: str = ""

353 page_start: int = 0

354 page_end: int = 0

355 line_start: int = 0

356 line_end: int = 0

357 excerpt: str = ""

358 created_at: str = ""

359

360

361class WikiPageDetail(BaseModel):

362 """Full content of a single wiki page."""

363

364 slug: str

365 title: str = ""

366 content: str = ""

367

368

369class WikiCitationsResult(BaseModel):

370 """Citations attached to a single wiki page."""

371

372 slug: str

373 citations: list[WikiCitationRecord] = []

374

375

376class WikiLintIssueItem(BaseModel):

377 """A single lint finding on a wiki page."""

378

379 wiki_source: str = ""

380 issue_type: str = ""

381 severity: str = ""

382 message: str = ""

383

384

385class WikiLintResult(BaseModel):

386 """Result of a full wiki lint run."""

387

388 issues: list[WikiLintIssueItem] = []

389 errors: int = 0

390 warnings: int = 0

391

392

393class WikiPruneRecordResponse(BaseModel):

394 """A single pruning action."""

395

396 wiki_source: str

397 action: str

398 reason: str

399

400

401class WikiPruneResult(BaseModel):

402 """Result of wiki pruning."""

403

404 records: list[WikiPruneRecordResponse] = []

405 archived: int = 0

406 flagged: int = 0

407

408

409class WikiBuildResult(BaseModel):

410 """Result of a full wiki build/update."""

411

412 paths: list[str] = []

413 entities: int = 0

414 count: int = 0

415

416

417class WikiStatusResult(BaseModel):

418 """Wiki layer status counters."""

419

420 wiki_enabled: bool

421 summaries: int = 0

422 drafts: int = 0

423 pages: int = 0

424 lint_errors: int = 0

425 lint_warnings: int = 0

426

427

428class WikiSynthesizeResult(BaseModel):

429 """Result of generating synthesis pages for cross-source concept clusters."""

430

431 paths: list[str] = []

432 count: int = 0

433

434

435class DraftInfoResponse(BaseModel):

436 """Metadata about a single wiki draft, mirroring ``DraftInfo.to_dict()``.

437

438 ``pending_kind`` distinguishes drift drafts (``None``) from

439 batched-generation markers (``"parse"``, ``"collision"``).

440 """

441

442 slug: str

443 path: str

444 drift_ratio: float | None = None

445 faithfulness_score: float | None = None

446 bad_title: bool = False

447 published_path: str | None = None

448 published_exists: bool = False

449 mtime: float = 0.0

450 pending_kind: str | None = None

451

452

453class WikiDraftDiffResponse(BaseModel):

454 """Unified diff of a draft against its published counterpart."""

455

456 slug: str

457 diff: str

458

459

460class WikiDraftAcceptResponse(BaseModel):

461 """Outcome of accepting a draft: where it landed and how many chunks reindexed.

462

463 ``slug`` is the slug where the content was published.

464 ``requested_slug`` is the slug the client asked to accept. The two

465 differ for PENDING-COLLISION drafts, where the request slug carries

466 a ``-collision-<hash>`` suffix that is stripped on publish.

467 """

468

469 slug: str

470 requested_slug: str

471 moved_to: str

472 reindexed_chunks: int

473

474

475class WikiDraftRejectResponse(BaseModel):

476 """Outcome of rejecting a draft."""

477

478 slug: str

Coverage for src / lilbee / server / models.py: 100%

236 statements