Coverage for src / lilbee / wiki / ingest.py: 100%
38 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Wiki post-ingest hook: regenerate pages touched by a recent sync."""
3from __future__ import annotations
5import asyncio
6import logging
8from lilbee.app.services import get_services
9from lilbee.core.config import cfg
11log = logging.getLogger(__name__)
14async def incremental_update(changed_sources: set[str]) -> None:
15 """Regenerate only the wiki pages touched by *changed_sources*.
17 Builds a fresh ``ExtractedEntity`` set from the current corpus,
18 keeps the records that either have no page on disk yet or whose
19 chunk trail includes one of the changed sources, and regenerates
20 just those. Above ``cfg.wiki_ingest_update_cap`` touched pages the
21 auto-update bails out and logs a manual-update hint instead.
22 """
23 if not cfg.wiki or not changed_sources:
24 return
25 from lilbee.data.store import SearchChunk
26 from lilbee.wiki import append_wiki_log, build_wiki, update_wiki_index
27 from lilbee.wiki.entity_extractor import EntityKind, get_entity_extractor
28 from lilbee.wiki.shared import WikiLogAction, WikiSubdir
30 svc = get_services()
31 extractor = get_entity_extractor(cfg.wiki_entity_mode, svc.provider, cfg)
33 chunks: list[SearchChunk] = []
34 for record in svc.store.get_sources():
35 chunks.extend(svc.store.get_chunks_by_source(record["filename"]))
36 entities = await asyncio.to_thread(extractor.extract, chunks)
38 wiki_root = cfg.data_root / cfg.wiki_dir
39 touched = []
40 for entity in entities:
41 # The extractor emits only ENTITY kind; CONCEPT is reserved for
42 # LLM-curated pages produced inside the batched call. Keeping
43 # the dispatch neutral guards against a future extractor that
44 # re-introduces CONCEPT.
45 subdir = WikiSubdir.CONCEPTS if entity.kind is EntityKind.CONCEPT else WikiSubdir.ENTITIES
46 page_path = wiki_root / subdir / f"{entity.slug}.md"
47 if not page_path.exists():
48 touched.append(entity)
49 continue
50 if any(ref.source in changed_sources for ref in entity.chunk_refs):
51 touched.append(entity)
53 if not touched:
54 return
56 if len(touched) > cfg.wiki_ingest_update_cap:
57 # warning, not info: the default LILBEE_LOG_LEVEL is WARNING, so
58 # log.info would silently drop the manual-update hint and the user
59 # would see no signal at all during `lilbee sync` when the cap trips.
60 log.warning(
61 "Wiki auto-update skipped: %d pages touched (cap %d). "
62 "Run 'lilbee wiki update' to refresh.",
63 len(touched),
64 cfg.wiki_ingest_update_cap,
65 )
66 append_wiki_log(
67 WikiLogAction.INGEST,
68 f"skipped: {len(touched)} pages exceeds cap {cfg.wiki_ingest_update_cap}",
69 )
70 return
72 # extract_concepts=False so an incremental sync does not churn
73 # concept slugs. Concept curation is a deliberate, user-invoked
74 # refresh (full `lilbee wiki build`).
75 pages = await asyncio.to_thread(
76 build_wiki, touched, svc.provider, svc.store, cfg, extract_concepts=False
77 )
78 update_wiki_index()
79 append_wiki_log(
80 WikiLogAction.INGEST,
81 f"{len(pages)} pages regenerated for {', '.join(sorted(changed_sources))}",
82 )