Coverage for src / lilbee / wiki / persistence.py: 100%

67 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Disk-write side effects for wiki page generation. 

2 

3Owns the orchestrator that lands a generated page on disk plus the 

4draft-routing helpers (drift redirects, PENDING markers for parse 

5failures, collision markers for duplicate concept slugs). Higher-level 

6code in :mod:`lilbee.wiki.page` calls into here for the publish step; 

7the actual ``write_page`` lives there to keep file-handling close to 

8content assembly. 

9""" 

10 

11from __future__ import annotations 

12 

13import logging 

14from pathlib import Path 

15 

16from lilbee.core.config import Config 

17from lilbee.data.store import CitationRecord, Store 

18from lilbee.wiki.index import append_wiki_log, update_wiki_index 

19from lilbee.wiki.shared import ( 

20 PENDING_MARKER_KEYWORD_COLLISION, 

21 PENDING_MARKER_KEYWORD_PARSE, 

22 PageTarget, 

23 WikiLogAction, 

24) 

25 

26log = logging.getLogger(__name__) 

27 

28# Pending-marker conventions: the drafts listing surface 

29# (``lilbee.wiki.drafts``) scans for these prefixes to classify a 

30# draft as PARSE or COLLISION instead of a drift-routed regen. The 

31# keyword phrases live in ``wiki.shared`` so writer (gen) and reader 

32# (drafts) stay in sync on the exact wording. 

33_PENDING_PARSE_MARKER_PREFIX = f"<!-- {PENDING_MARKER_KEYWORD_PARSE}" 

34_PENDING_COLLISION_MARKER_PREFIX = f"<!-- {PENDING_MARKER_KEYWORD_COLLISION}" 

35 

36# A ``<wiki_dir>/<subdir>/<slug>.md`` source has at least this many ``/``-split 

37# parts. Anything shorter is a malformed wiki source and has no subdir. 

38_WIKI_SOURCE_MIN_PARTS = 2 

39 

40 

41def divert_to_drafts( 

42 new_content: str, 

43 drafts_dir: Path, 

44 slug: str, 

45 change_ratio: float, 

46 diff_text: str, 

47) -> Path: 

48 """Write new content to wiki/drafts/ with a drift note instead of overwriting.""" 

49 draft_path = drafts_dir / f"{slug}.md" 

50 draft_path.parent.mkdir(parents=True, exist_ok=True) 

51 note = f"<!-- DRIFT: {change_ratio:.0%} content changed - flagged for human review -->\n\n" 

52 draft_path.write_text(note + new_content, encoding="utf-8") 

53 log.warning( 

54 "Drift detected for %s (%.0f%% changed), diverted to drafts. Diff:\n%s", 

55 slug, 

56 change_ratio * 100, 

57 diff_text, 

58 ) 

59 return draft_path 

60 

61 

62def subdir_from_wiki_source(wiki_source: str) -> str | None: 

63 """Return the subdir component (``summaries``, ``concepts``, ...) of *wiki_source*. 

64 

65 ``wiki_source`` is the ``<wiki_dir>/<subdir>/<slug>.md`` path 

66 stored in citations and chunks. Returns None when the path has 

67 fewer than two components. 

68 """ 

69 parts = wiki_source.split("/") 

70 return parts[1] if len(parts) >= _WIKI_SOURCE_MIN_PARTS else None 

71 

72 

73def persist_and_finalize( 

74 content: str, 

75 target: PageTarget, 

76 verified: list[CitationRecord], 

77 source_names: list[str], 

78 store: Store, 

79 config: Config, 

80) -> Path: 

81 """Write page to disk, persist citations, index body chunks, update index and log.""" 

82 # circular: page -> persistence via persist_and_finalize 

83 from lilbee.wiki.page import index_wiki_page, write_page 

84 

85 page_path = write_page( 

86 target.wiki_root, target.subdir, target.slug, content, config.wiki_drift_threshold 

87 ) 

88 for rec in verified: 

89 rec["wiki_source"] = target.wiki_source 

90 store.delete_citations_for_wiki(target.wiki_source) 

91 store.add_citations(verified) 

92 

93 index_wiki_page(content, target.wiki_source, store) 

94 

95 if config.wiki_prune_raw: 

96 for name in source_names: 

97 store.delete_by_source(name) 

98 

99 update_wiki_index(config) 

100 append_wiki_log( 

101 WikiLogAction.GENERATED, 

102 f"{target.page_type} page for {target.label} -> {target.subdir}/{target.slug}.md", 

103 config, 

104 ) 

105 return page_path 

106 

107 

108def write_pending_marker( 

109 drafts_dir: Path, 

110 slug: str, 

111 marker_line: str, 

112 frontmatter: str = "", 

113) -> Path: 

114 """Write a PENDING marker page under ``drafts/<slug>.md``. 

115 

116 ``marker_line`` is the leading HTML comment that both identifies 

117 the marker kind and carries the context (source, label). The 

118 optional ``frontmatter`` preserves minimal metadata for the 

119 drafts surface to round-trip (e.g. ``bad_title``-style fields). 

120 """ 

121 drafts_dir.mkdir(parents=True, exist_ok=True) 

122 draft_path = drafts_dir / f"{slug}.md" 

123 body = marker_line + "\n" 

124 if frontmatter: 

125 body += "\n" + frontmatter 

126 draft_path.write_text(body, encoding="utf-8") 

127 return draft_path 

128 

129 

130def delete_pending_marker_if_present(drafts_dir: Path, slug: str) -> bool: 

131 """Delete an existing PENDING marker for *slug*; return whether one was removed. 

132 

133 Match is slug-equality (not fuzzy): an LLM that rephrases a 

134 label on retry (``brake system`` → ``braking system``) leaves 

135 the old marker behind for the user to drain via ``wiki drafts 

136 reject``. Documented limitation; follow-up if the pattern 

137 matters. 

138 """ 

139 draft_path = drafts_dir / f"{slug}.md" 

140 if not draft_path.is_file(): 

141 return False 

142 try: 

143 body = draft_path.read_text(encoding="utf-8") 

144 except OSError: 

145 return False 

146 first_line = body.splitlines()[0] if body else "" 

147 is_pending = first_line.startswith(_PENDING_PARSE_MARKER_PREFIX) or first_line.startswith( 

148 _PENDING_COLLISION_MARKER_PREFIX 

149 ) 

150 if not is_pending: 

151 return False 

152 draft_path.unlink() 

153 return True 

154 

155 

156def divert_concept_collision( 

157 *, 

158 slug: str, 

159 source: str, 

160 first_source: str, 

161 content: str, 

162 drafts_dir: Path, 

163) -> Path: 

164 """Write the losing concept to ``drafts/<slug>-collision-<hash>.md``. 

165 

166 The winning source's page is unchanged on disk. Hash is the 

167 first 8 hex of sha256(source_filename); stable per source so a 

168 retry on the same two sources lands at the same draft path, 

169 letting the user iterate without marker sprawl. 

170 """ 

171 # circular: persistence -> batch via short_source_hash (batch imports 

172 # persist_and_finalize / divert_concept_collision from persistence). 

173 from lilbee.wiki.batch import short_source_hash 

174 

175 short = short_source_hash(source) 

176 collision_slug = f"{slug}-collision-{short}" 

177 marker = ( 

178 f"{_PENDING_COLLISION_MARKER_PREFIX} with source {first_source}, " 

179 f"content from {source} held for review -->\n\n" 

180 ) 

181 drafts_dir.mkdir(parents=True, exist_ok=True) 

182 path = drafts_dir / f"{collision_slug}.md" 

183 path.write_text(marker + content, encoding="utf-8") 

184 log.warning( 

185 "Concept slug collision: %s already written by %s; diverted %s's version to %s", 

186 slug, 

187 first_source, 

188 source, 

189 path, 

190 ) 

191 return path