Coverage for src / lilbee / cli / commands / wiki.py: 100%

261 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Wiki layer commands: build, update, lint, citations, status, prune, synthesize, drafts.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6from typing import TYPE_CHECKING, Any 

7 

8import typer 

9from rich.table import Table 

10 

11from lilbee.app.services import get_services 

12from lilbee.cli import theme 

13from lilbee.cli.app import ( 

14 apply_overrides, 

15 console, 

16 data_dir_option, 

17 global_option, 

18) 

19from lilbee.cli.helpers import json_output 

20from lilbee.cli.tui import messages as msg 

21from lilbee.core.config import cfg 

22from lilbee.wiki.shared import ( 

23 WikiSubdir, 

24) 

25 

26if TYPE_CHECKING: 

27 from lilbee.wiki.entity_extractor import ExtractedEntity 

28 

29 

30wiki_app = typer.Typer(help="Wiki layer commands: generate, lint, citations, status, prune.") 

31 

32# Citations table renders excerpts truncated to ``_CITATION_EXCERPT_MAX_CHARS``; 

33# the ellipsis insertion point is one ``...`` shorter so the visible string never 

34# exceeds the column width. 

35_CITATION_EXCERPT_MAX_CHARS = 60 

36_CITATION_EXCERPT_TRUNCATE_AT = 57 

37 

38# Dry-run NER output previews the first ``_NER_DRY_RUN_PREVIEW_LIMIT`` sources 

39# per row, with ``", ..."`` appended when more were dropped. 

40_NER_DRY_RUN_PREVIEW_LIMIT = 3 

41 

42 

43def _count_md_files(directory: Path) -> int: 

44 """Count markdown files in a directory.""" 

45 if not directory.exists(): 

46 return 0 

47 return len(list(directory.rglob("*.md"))) 

48 

49 

50def _fail_wiki_disabled() -> None: 

51 """Emit the standard wiki-disabled message in the caller's output mode.""" 

52 if cfg.json_mode: 

53 json_output({"error": msg.CMD_WIKI_DISABLED}) 

54 return 

55 console.print(msg.CMD_WIKI_DISABLED) 

56 

57 

58@wiki_app.command(name="lint") 

59def wiki_lint( 

60 wiki_source: str = typer.Argument("", help="Wiki page path (empty = lint all)."), 

61 data_dir: Path | None = data_dir_option, 

62 use_global: bool = global_option, 

63) -> None: 

64 """Lint wiki pages for stale citations, missing sources, and unmarked claims.""" 

65 apply_overrides(data_dir=data_dir, use_global=use_global) 

66 from lilbee.wiki.lint import lint_all as _lint_all 

67 from lilbee.wiki.lint import lint_wiki_page 

68 

69 store = get_services().store 

70 if wiki_source: 

71 issues = lint_wiki_page(wiki_source, store) 

72 else: 

73 report = _lint_all(store) 

74 issues = report.issues 

75 

76 if cfg.json_mode: 

77 json_output( 

78 { 

79 "command": "wiki_lint", 

80 "issues": [i.to_dict() for i in issues], 

81 "total": len(issues), 

82 } 

83 ) 

84 return 

85 

86 if not issues: 

87 console.print("No issues found.") 

88 return 

89 

90 table = Table(title="Wiki Lint Issues") 

91 table.add_column("Page", style=theme.ACCENT) 

92 table.add_column("Severity") 

93 table.add_column("Message") 

94 for issue in issues: 

95 sev_style = theme.ERROR if issue.severity.value == "error" else theme.WARNING 

96 sev_text = f"[{sev_style}]{issue.severity.value}[/{sev_style}]" 

97 table.add_row(issue.wiki_source, sev_text, issue.message) 

98 console.print(table) 

99 

100 

101@wiki_app.command(name="citations") 

102def wiki_citations( 

103 wiki_source: str = typer.Argument(..., help="Wiki page path, e.g. wiki/summaries/doc.md."), 

104 data_dir: Path | None = data_dir_option, 

105 use_global: bool = global_option, 

106) -> None: 

107 """Show citations for a wiki page.""" 

108 apply_overrides(data_dir=data_dir, use_global=use_global) 

109 

110 records = get_services().store.get_citations_for_wiki(wiki_source) 

111 

112 if cfg.json_mode: 

113 json_output( 

114 { 

115 "command": "wiki_citations", 

116 "wiki_source": wiki_source, 

117 "citations": [dict(r) for r in records], 

118 "total": len(records), 

119 } 

120 ) 

121 return 

122 

123 if not records: 

124 console.print(f"No citations found for [{theme.ACCENT}]{wiki_source}[/{theme.ACCENT}]") 

125 return 

126 

127 table = Table(title=f"Citations: {wiki_source}") 

128 table.add_column("Key", style=theme.ACCENT) 

129 table.add_column("Source") 

130 table.add_column("Type", style=theme.MUTED) 

131 table.add_column("Excerpt", max_width=_CITATION_EXCERPT_MAX_CHARS) 

132 for rec in records: 

133 excerpt = ( 

134 rec["excerpt"][:_CITATION_EXCERPT_TRUNCATE_AT] + "..." 

135 if len(rec["excerpt"]) > _CITATION_EXCERPT_MAX_CHARS 

136 else rec["excerpt"] 

137 ) 

138 table.add_row(rec["citation_key"], rec["source_filename"], rec["claim_type"], excerpt) 

139 console.print(table) 

140 

141 

142@wiki_app.command(name="status") 

143def wiki_status( 

144 data_dir: Path | None = data_dir_option, 

145 use_global: bool = global_option, 

146) -> None: 

147 """Show wiki layer status: page counts and lint summary.""" 

148 apply_overrides(data_dir=data_dir, use_global=use_global) 

149 

150 wiki_root = cfg.data_root / cfg.wiki_dir 

151 if not wiki_root.exists(): 

152 if cfg.json_mode: 

153 json_output({"wiki_enabled": cfg.wiki, "pages": 0, "issues": 0}) 

154 return 

155 console.print("Wiki directory does not exist yet. Run sync with wiki enabled.") 

156 return 

157 

158 summaries = _count_md_files(wiki_root / WikiSubdir.SUMMARIES) 

159 drafts = _count_md_files(wiki_root / WikiSubdir.DRAFTS) 

160 

161 from lilbee.wiki.lint import lint_all as _lint_all 

162 

163 report = _lint_all(get_services().store) 

164 

165 if cfg.json_mode: 

166 json_output( 

167 { 

168 "wiki_enabled": cfg.wiki, 

169 WikiSubdir.SUMMARIES: summaries, 

170 WikiSubdir.DRAFTS: drafts, 

171 "pages": summaries + drafts, 

172 "lint_errors": report.error_count, 

173 "lint_warnings": report.warning_count, 

174 } 

175 ) 

176 return 

177 

178 color = "green" if cfg.wiki else "red" 

179 label = "enabled" if cfg.wiki else "disabled" 

180 console.print(f"Wiki: [{color}]{label}[/{color}]") 

181 console.print(f" Summaries: [{theme.LABEL}]{summaries}[/{theme.LABEL}]") 

182 console.print(f" Drafts: [{theme.LABEL}]{drafts}[/{theme.LABEL}]") 

183 if report.error_count or report.warning_count: 

184 console.print( 

185 f" Lint: [{theme.ERROR}]{report.error_count} error(s)[/{theme.ERROR}], " 

186 f"[{theme.WARNING}]{report.warning_count} warning(s)[/{theme.WARNING}]" 

187 ) 

188 else: 

189 console.print(" Lint: all clean") 

190 

191 

192@wiki_app.command(name="synthesize") 

193def wiki_synthesize( 

194 data_dir: Path | None = data_dir_option, 

195 use_global: bool = global_option, 

196) -> None: 

197 """Generate synthesis pages for concept clusters spanning 3+ sources.""" 

198 apply_overrides(data_dir=data_dir, use_global=use_global) 

199 if not cfg.wiki: 

200 _fail_wiki_disabled() 

201 return 

202 from lilbee.wiki.generation import generate_synthesis_pages 

203 

204 svc = get_services() 

205 paths = generate_synthesis_pages(svc.provider, svc.store, svc.clusterer) 

206 

207 if cfg.json_mode: 

208 json_output( 

209 { 

210 "command": "wiki_synthesize", 

211 "paths": [str(p) for p in paths], 

212 "count": len(paths), 

213 } 

214 ) 

215 return 

216 

217 if not paths: 

218 console.print("No synthesis pages generated (need 3+ sources per cluster).") 

219 return 

220 

221 console.print(f"Generated [{theme.LABEL}]{len(paths)}[/{theme.LABEL}] synthesis pages:") 

222 for path in paths: 

223 console.print(f" {path}") 

224 

225 

226@wiki_app.command(name="prune") 

227def wiki_prune( 

228 data_dir: Path | None = data_dir_option, 

229 use_global: bool = global_option, 

230) -> None: 

231 """Prune stale and orphaned wiki pages.""" 

232 apply_overrides(data_dir=data_dir, use_global=use_global) 

233 from lilbee.wiki.prune import prune_wiki 

234 

235 report = prune_wiki(get_services().store) 

236 

237 if cfg.json_mode: 

238 json_output( 

239 { 

240 "command": "wiki_prune", 

241 "records": [r.to_dict() for r in report.records], 

242 "archived": report.archived_count, 

243 "flagged": report.flagged_count, 

244 } 

245 ) 

246 return 

247 

248 if not report.records: 

249 console.print("No pages pruned.") 

250 return 

251 

252 table = Table(title="Wiki Prune Results") 

253 table.add_column("Page", style=theme.ACCENT) 

254 table.add_column("Action") 

255 table.add_column("Reason") 

256 for rec in report.records: 

257 action_style = theme.ERROR if rec.action.value == "archived" else theme.WARNING 

258 action_text = f"[{action_style}]{rec.action.value}[/{action_style}]" 

259 table.add_row(rec.wiki_source, action_text, rec.reason) 

260 console.print(table) 

261 

262 

263@wiki_app.command(name="build") 

264def wiki_build( 

265 data_dir: Path | None = data_dir_option, 

266 use_global: bool = global_option, 

267 dry_run: bool = typer.Option( 

268 False, 

269 "--dry-run", 

270 help=( 

271 "Run extraction only; skip every LLM call. Prints the NER entity candidates. " 

272 "LLM-curated concept pages require a build call and are not shown in dry-run." 

273 ), 

274 ), 

275) -> None: 

276 """Build the concept and entity wiki across all ingested sources.""" 

277 apply_overrides(data_dir=data_dir, use_global=use_global) 

278 if not cfg.wiki: 

279 _fail_wiki_disabled() 

280 return 

281 

282 if dry_run: 

283 from lilbee.data.store import SearchChunk 

284 from lilbee.wiki.entity_extractor import get_entity_extractor 

285 

286 svc = get_services() 

287 chunks: list[SearchChunk] = [] 

288 for record in svc.store.get_sources(): 

289 chunks.extend(svc.store.get_chunks_by_source(record["filename"])) 

290 extractor = get_entity_extractor(cfg.wiki_entity_mode, svc.provider, cfg) 

291 entities = extractor.extract(chunks) 

292 _wiki_build_dry_run_output(entities) 

293 return 

294 

295 from lilbee.wiki import run_full_build 

296 

297 result = run_full_build(cfg) 

298 

299 if cfg.json_mode: 

300 json_output({"command": "wiki_build", **result}) 

301 return 

302 

303 pages = result["paths"] 

304 if not pages: 

305 console.print("No concept or entity pages generated.") 

306 return 

307 

308 console.print( 

309 f"Generated [{theme.LABEL}]{result['count']}[/{theme.LABEL}] " 

310 f"wiki pages from {result['entities']} extracted records:" 

311 ) 

312 for path in pages: 

313 console.print(f" {path}") 

314 

315 

316_DRY_RUN_CONCEPT_NOTE = ( 

317 "Note: LLM-curated concepts are not shown in --dry-run. " 

318 "Run `lilbee wiki build` to see which concepts the LLM proposes." 

319) 

320 

321 

322def _wiki_build_dry_run_output(entities: list[ExtractedEntity]) -> None: 

323 """Render the extraction result as JSON or table without calling any LLM. 

324 

325 Concepts come from the per-source batched LLM call, so listing 

326 them would require the call we are trying to avoid. The dry-run 

327 surface is NER-entity only, with a trailing note so a user who 

328 expected concepts in the output knows why they are missing. 

329 """ 

330 rows: list[dict[str, Any]] = [ 

331 { 

332 "slug": e.slug, 

333 "label": e.label, 

334 "kind": e.kind.value, 

335 "type_hint": e.type_hint, 

336 "mentions": len(e.chunk_refs), 

337 "sources": sorted({r.source for r in e.chunk_refs}), 

338 } 

339 for e in entities 

340 ] 

341 

342 if cfg.json_mode: 

343 json_output( 

344 { 

345 "command": "wiki_build", 

346 "dry_run": True, 

347 "entities": rows, 

348 "count": len(rows), 

349 "note": _DRY_RUN_CONCEPT_NOTE, 

350 } 

351 ) 

352 return 

353 

354 if not rows: 

355 console.print("No candidate entities extracted. Run sync first.") 

356 console.print(f"[{theme.MUTED}]{_DRY_RUN_CONCEPT_NOTE}[/{theme.MUTED}]") 

357 return 

358 

359 table = Table(title=f"Wiki build dry-run ({len(rows)} NER entity candidates)") 

360 table.add_column("Slug", style=theme.ACCENT) 

361 table.add_column("Kind", style=theme.MUTED) 

362 table.add_column("Type") 

363 table.add_column("Mentions") 

364 table.add_column("Sources") 

365 for row in rows: 

366 sources_list: list[str] = row["sources"] 

367 table.add_row( 

368 str(row["slug"]), 

369 str(row["kind"]), 

370 str(row["type_hint"]), 

371 str(row["mentions"]), 

372 ", ".join(sources_list[:_NER_DRY_RUN_PREVIEW_LIMIT]) 

373 + (", ..." if len(sources_list) > _NER_DRY_RUN_PREVIEW_LIMIT else ""), 

374 ) 

375 console.print(table) 

376 console.print( 

377 f"Dry run: [{theme.LABEL}]{len(rows)}[/{theme.LABEL}] candidate entities. " 

378 "No LLM calls were made." 

379 ) 

380 console.print(f"[{theme.MUTED}]{_DRY_RUN_CONCEPT_NOTE}[/{theme.MUTED}]") 

381 

382 

383@wiki_app.command(name="update") 

384def wiki_update( 

385 data_dir: Path | None = data_dir_option, 

386 use_global: bool = global_option, 

387) -> None: 

388 """Refresh the concept and entity wiki after an ingest. 

389 

390 Currently a full rebuild. The incremental touched-slug regeneration 

391 lands in the ingest-hook task and will re-route this command then. 

392 """ 

393 wiki_build(data_dir=data_dir, use_global=use_global, dry_run=False) 

394 

395 

396drafts_app = typer.Typer(help="Review wiki drafts: list, diff, accept, reject.") 

397wiki_app.add_typer(drafts_app, name="drafts") 

398 

399 

400@drafts_app.command(name="list") 

401def wiki_drafts_list( 

402 data_dir: Path | None = data_dir_option, 

403 use_global: bool = global_option, 

404) -> None: 

405 """List pending wiki drafts with drift, faithfulness, and pairing info.""" 

406 apply_overrides(data_dir=data_dir, use_global=use_global) 

407 from lilbee.wiki.drafts import PendingKind, list_drafts 

408 

409 wiki_root = cfg.data_root / cfg.wiki_dir 

410 drafts = list_drafts(wiki_root) 

411 

412 if cfg.json_mode: 

413 json_output( 

414 { 

415 "command": "wiki_drafts_list", 

416 "drafts": [d.to_dict() for d in drafts], 

417 "total": len(drafts), 

418 } 

419 ) 

420 return 

421 

422 if not drafts: 

423 console.print("No drafts pending review.") 

424 return 

425 

426 table = Table(title="Wiki Drafts") 

427 table.add_column("Slug", style=theme.ACCENT) 

428 table.add_column("Kind", style=theme.MUTED) 

429 table.add_column("Drift") 

430 table.add_column("Faithfulness") 

431 table.add_column("Published?", style=theme.MUTED) 

432 for d in drafts: 

433 kind = d.pending_kind or PendingKind.DRIFT 

434 drift = f"{d.drift_ratio:.0%}" if d.drift_ratio is not None else "-" 

435 faith = f"{d.faithfulness_score:.2f}" if d.faithfulness_score is not None else "-" 

436 published = "yes" if d.published_exists else "no" 

437 table.add_row(d.slug, kind, drift, faith, published) 

438 console.print(table) 

439 

440 

441@drafts_app.command(name="diff") 

442def wiki_drafts_diff( 

443 slug: str = typer.Argument(..., help="Draft slug (e.g. chevrolet)."), 

444 data_dir: Path | None = data_dir_option, 

445 use_global: bool = global_option, 

446) -> None: 

447 """Show a unified diff of the draft against its published counterpart.""" 

448 apply_overrides(data_dir=data_dir, use_global=use_global) 

449 from lilbee.wiki.drafts import diff_draft 

450 

451 wiki_root = cfg.data_root / cfg.wiki_dir 

452 try: 

453 diff = diff_draft(slug, wiki_root) 

454 except FileNotFoundError as exc: 

455 if cfg.json_mode: 

456 json_output({"error": str(exc)}) 

457 else: 

458 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]") 

459 raise typer.Exit(1) from None 

460 

461 if cfg.json_mode: 

462 json_output({"command": "wiki_drafts_diff", "slug": slug, "diff": diff}) 

463 return 

464 console.print(diff or "(no differences)") 

465 

466 

467@drafts_app.command(name="accept") 

468def wiki_drafts_accept( 

469 slug: str = typer.Argument(..., help="Draft slug to accept."), 

470 data_dir: Path | None = data_dir_option, 

471 use_global: bool = global_option, 

472) -> None: 

473 """Overwrite the published page with the draft and re-index its chunks.""" 

474 apply_overrides(data_dir=data_dir, use_global=use_global) 

475 from lilbee.wiki.drafts import accept_draft 

476 

477 wiki_root = cfg.data_root / cfg.wiki_dir 

478 try: 

479 result = accept_draft(slug, wiki_root, get_services().store) 

480 except FileNotFoundError as exc: 

481 if cfg.json_mode: 

482 json_output({"error": str(exc)}) 

483 else: 

484 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]") 

485 raise typer.Exit(1) from None 

486 

487 if cfg.json_mode: 

488 json_output({"command": "wiki_drafts_accept", **result.to_dict()}) 

489 return 

490 console.print( 

491 f"Accepted [{theme.ACCENT}]{slug}[/{theme.ACCENT}] -> " 

492 f"{result.moved_to} ({result.reindexed_chunks} chunks re-indexed)" 

493 ) 

494 

495 

496@drafts_app.command(name="reject") 

497def wiki_drafts_reject( 

498 slug: str = typer.Argument(..., help="Draft slug to reject."), 

499 data_dir: Path | None = data_dir_option, 

500 use_global: bool = global_option, 

501) -> None: 

502 """Delete the draft file. Does not touch the published page or index.""" 

503 apply_overrides(data_dir=data_dir, use_global=use_global) 

504 from lilbee.wiki.drafts import reject_draft 

505 

506 wiki_root = cfg.data_root / cfg.wiki_dir 

507 try: 

508 reject_draft(slug, wiki_root) 

509 except FileNotFoundError as exc: 

510 if cfg.json_mode: 

511 json_output({"error": str(exc)}) 

512 else: 

513 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]") 

514 raise typer.Exit(1) from None 

515 

516 if cfg.json_mode: 

517 json_output({"command": "wiki_drafts_reject", "slug": slug}) 

518 return 

519 console.print(f"Rejected [{theme.ACCENT}]{slug}[/{theme.ACCENT}]")