Coverage for src / lilbee / wiki / persistence.py: 100%
67 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Disk-write side effects for wiki page generation.
3Owns the orchestrator that lands a generated page on disk plus the
4draft-routing helpers (drift redirects, PENDING markers for parse
5failures, collision markers for duplicate concept slugs). Higher-level
6code in :mod:`lilbee.wiki.page` calls into here for the publish step;
7the actual ``write_page`` lives there to keep file-handling close to
8content assembly.
9"""
11from __future__ import annotations
13import logging
14from pathlib import Path
16from lilbee.core.config import Config
17from lilbee.data.store import CitationRecord, Store
18from lilbee.wiki.index import append_wiki_log, update_wiki_index
19from lilbee.wiki.shared import (
20 PENDING_MARKER_KEYWORD_COLLISION,
21 PENDING_MARKER_KEYWORD_PARSE,
22 PageTarget,
23 WikiLogAction,
24)
26log = logging.getLogger(__name__)
28# Pending-marker conventions: the drafts listing surface
29# (``lilbee.wiki.drafts``) scans for these prefixes to classify a
30# draft as PARSE or COLLISION instead of a drift-routed regen. The
31# keyword phrases live in ``wiki.shared`` so writer (gen) and reader
32# (drafts) stay in sync on the exact wording.
33_PENDING_PARSE_MARKER_PREFIX = f"<!-- {PENDING_MARKER_KEYWORD_PARSE}"
34_PENDING_COLLISION_MARKER_PREFIX = f"<!-- {PENDING_MARKER_KEYWORD_COLLISION}"
36# A ``<wiki_dir>/<subdir>/<slug>.md`` source has at least this many ``/``-split
37# parts. Anything shorter is a malformed wiki source and has no subdir.
38_WIKI_SOURCE_MIN_PARTS = 2
41def divert_to_drafts(
42 new_content: str,
43 drafts_dir: Path,
44 slug: str,
45 change_ratio: float,
46 diff_text: str,
47) -> Path:
48 """Write new content to wiki/drafts/ with a drift note instead of overwriting."""
49 draft_path = drafts_dir / f"{slug}.md"
50 draft_path.parent.mkdir(parents=True, exist_ok=True)
51 note = f"<!-- DRIFT: {change_ratio:.0%} content changed - flagged for human review -->\n\n"
52 draft_path.write_text(note + new_content, encoding="utf-8")
53 log.warning(
54 "Drift detected for %s (%.0f%% changed), diverted to drafts. Diff:\n%s",
55 slug,
56 change_ratio * 100,
57 diff_text,
58 )
59 return draft_path
62def subdir_from_wiki_source(wiki_source: str) -> str | None:
63 """Return the subdir component (``summaries``, ``concepts``, ...) of *wiki_source*.
65 ``wiki_source`` is the ``<wiki_dir>/<subdir>/<slug>.md`` path
66 stored in citations and chunks. Returns None when the path has
67 fewer than two components.
68 """
69 parts = wiki_source.split("/")
70 return parts[1] if len(parts) >= _WIKI_SOURCE_MIN_PARTS else None
73def persist_and_finalize(
74 content: str,
75 target: PageTarget,
76 verified: list[CitationRecord],
77 source_names: list[str],
78 store: Store,
79 config: Config,
80) -> Path:
81 """Write page to disk, persist citations, index body chunks, update index and log."""
82 # circular: page -> persistence via persist_and_finalize
83 from lilbee.wiki.page import index_wiki_page, write_page
85 page_path = write_page(
86 target.wiki_root, target.subdir, target.slug, content, config.wiki_drift_threshold
87 )
88 for rec in verified:
89 rec["wiki_source"] = target.wiki_source
90 store.delete_citations_for_wiki(target.wiki_source)
91 store.add_citations(verified)
93 index_wiki_page(content, target.wiki_source, store)
95 if config.wiki_prune_raw:
96 for name in source_names:
97 store.delete_by_source(name)
99 update_wiki_index(config)
100 append_wiki_log(
101 WikiLogAction.GENERATED,
102 f"{target.page_type} page for {target.label} -> {target.subdir}/{target.slug}.md",
103 config,
104 )
105 return page_path
108def write_pending_marker(
109 drafts_dir: Path,
110 slug: str,
111 marker_line: str,
112 frontmatter: str = "",
113) -> Path:
114 """Write a PENDING marker page under ``drafts/<slug>.md``.
116 ``marker_line`` is the leading HTML comment that both identifies
117 the marker kind and carries the context (source, label). The
118 optional ``frontmatter`` preserves minimal metadata for the
119 drafts surface to round-trip (e.g. ``bad_title``-style fields).
120 """
121 drafts_dir.mkdir(parents=True, exist_ok=True)
122 draft_path = drafts_dir / f"{slug}.md"
123 body = marker_line + "\n"
124 if frontmatter:
125 body += "\n" + frontmatter
126 draft_path.write_text(body, encoding="utf-8")
127 return draft_path
130def delete_pending_marker_if_present(drafts_dir: Path, slug: str) -> bool:
131 """Delete an existing PENDING marker for *slug*; return whether one was removed.
133 Match is slug-equality (not fuzzy): an LLM that rephrases a
134 label on retry (``brake system`` → ``braking system``) leaves
135 the old marker behind for the user to drain via ``wiki drafts
136 reject``. Documented limitation; follow-up if the pattern
137 matters.
138 """
139 draft_path = drafts_dir / f"{slug}.md"
140 if not draft_path.is_file():
141 return False
142 try:
143 body = draft_path.read_text(encoding="utf-8")
144 except OSError:
145 return False
146 first_line = body.splitlines()[0] if body else ""
147 is_pending = first_line.startswith(_PENDING_PARSE_MARKER_PREFIX) or first_line.startswith(
148 _PENDING_COLLISION_MARKER_PREFIX
149 )
150 if not is_pending:
151 return False
152 draft_path.unlink()
153 return True
156def divert_concept_collision(
157 *,
158 slug: str,
159 source: str,
160 first_source: str,
161 content: str,
162 drafts_dir: Path,
163) -> Path:
164 """Write the losing concept to ``drafts/<slug>-collision-<hash>.md``.
166 The winning source's page is unchanged on disk. Hash is the
167 first 8 hex of sha256(source_filename); stable per source so a
168 retry on the same two sources lands at the same draft path,
169 letting the user iterate without marker sprawl.
170 """
171 # circular: persistence -> batch via short_source_hash (batch imports
172 # persist_and_finalize / divert_concept_collision from persistence).
173 from lilbee.wiki.batch import short_source_hash
175 short = short_source_hash(source)
176 collision_slug = f"{slug}-collision-{short}"
177 marker = (
178 f"{_PENDING_COLLISION_MARKER_PREFIX} with source {first_source}, "
179 f"content from {source} held for review -->\n\n"
180 )
181 drafts_dir.mkdir(parents=True, exist_ok=True)
182 path = drafts_dir / f"{collision_slug}.md"
183 path.write_text(marker + content, encoding="utf-8")
184 log.warning(
185 "Concept slug collision: %s already written by %s; diverted %s's version to %s",
186 slug,
187 first_source,
188 source,
189 path,
190 )
191 return path