Coverage for src / lilbee / wiki / browse.py: 100%

95 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Wiki browse: shared page listing, reading, and resolution logic.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass, field 

6from datetime import date, datetime 

7from pathlib import Path 

8from typing import Any 

9 

10from lilbee.core.security import validate_path_within 

11from lilbee.wiki.grammar import CODE_FENCE_RE, H1_RE 

12from lilbee.wiki.index import parse_source_count 

13from lilbee.wiki.shared import ( 

14 SUBDIR_TO_TYPE, 

15 WIKI_CONTENT_SUBDIRS, 

16 WikiSubdir, 

17 parse_frontmatter, 

18) 

19 

20# Wiki paths under the root take the form ``<subdir>/<slug>.md``; only paths 

21# with at least this many components carry a meaningful page type. 

22_WIKI_PATH_MIN_PARTS = 2 

23 

24 

25@dataclass 

26class WikiPageInfo: 

27 """Summary metadata for a wiki page.""" 

28 

29 slug: str 

30 title: str 

31 page_type: str 

32 source_count: int 

33 created_at: str 

34 

35 def to_dict(self) -> dict[str, Any]: 

36 """Serialize to a plain dict suitable for JSON responses.""" 

37 return { 

38 "slug": self.slug, 

39 "title": self.title, 

40 "page_type": self.page_type, 

41 "source_count": self.source_count, 

42 "created_at": self.created_at, 

43 } 

44 

45 

46@dataclass 

47class WikiPageContent: 

48 """Full content of a wiki page with parsed frontmatter.""" 

49 

50 slug: str 

51 title: str 

52 content: str 

53 frontmatter: dict[str, Any] = field(default_factory=dict) 

54 

55 

56def list_md_files(directory: Path) -> list[Path]: 

57 """Return sorted markdown files in a directory (non-recursive).""" 

58 if not directory.is_dir(): 

59 return [] 

60 return sorted(directory.glob("*.md")) 

61 

62 

63def _page_type_from_path(path: Path, wiki_root: Path) -> str: 

64 """Determine page type from its location relative to wiki root.""" 

65 try: 

66 relative = path.relative_to(wiki_root) 

67 except ValueError: 

68 return "unknown" 

69 parts = relative.parts 

70 if len(parts) >= _WIKI_PATH_MIN_PARTS: 

71 return SUBDIR_TO_TYPE.get(parts[0], "unknown") 

72 return "unknown" 

73 

74 

75def _slug_from_path(path: Path, wiki_root: Path) -> str: 

76 """Build a URL slug from a wiki page path.""" 

77 relative = path.relative_to(wiki_root) 

78 return str(relative.with_suffix("")).replace("\\", "/") 

79 

80 

81def _extract_h1_title(text: str) -> str | None: 

82 """Return the first top-level heading from markdown body, ignoring fenced code blocks.""" 

83 in_fence = False 

84 for line in text.splitlines(): 

85 if CODE_FENCE_RE.match(line): 

86 in_fence = not in_fence 

87 continue 

88 if in_fence: 

89 continue 

90 if m := H1_RE.match(line): 

91 return m.group(1).strip() 

92 return None 

93 

94 

95def _resolve_page_title(fm: dict[str, Any], text: str, path: Path) -> str: 

96 """Pick a page title. Frontmatter wins; body H1 beats slug-title-case fallback. 

97 

98 Wiki generation does not emit a frontmatter title today, so without the H1 

99 step every page would render as the slug (e.g. 'Cv Manual' for cv-manual.md). 

100 """ 

101 if (fm_title := fm.get("title")) is not None: 

102 return str(fm_title) 

103 if (h1 := _extract_h1_title(text)) is not None: 

104 return h1 

105 return path.stem.replace("-", " ").title() 

106 

107 

108def build_page_info(path: Path, wiki_root: Path) -> WikiPageInfo: 

109 """Build a WikiPageInfo from a markdown file on disk.""" 

110 text = path.read_text(encoding="utf-8") 

111 fm = parse_frontmatter(text) 

112 slug = _slug_from_path(path, wiki_root) 

113 title = _resolve_page_title(fm, text, path) 

114 page_type = _page_type_from_path(path, wiki_root) 

115 source_count = parse_source_count(text) 

116 raw_at = fm.get("generated_at", "") 

117 # yaml.safe_load returns datetime/date objects for date-like strings 

118 created_at = raw_at.isoformat() if isinstance(raw_at, (datetime, date)) else str(raw_at) 

119 return WikiPageInfo( 

120 slug=slug, 

121 title=title, 

122 page_type=page_type, 

123 source_count=source_count, 

124 created_at=created_at, 

125 ) 

126 

127 

128def find_page(wiki_root: Path, slug: str) -> Path | None: 

129 """Resolve a slug to a wiki page path, or None if not found. 

130 Validates the resolved path stays within wiki_root to prevent 

131 path traversal attacks. 

132 """ 

133 candidate = wiki_root / f"{slug}.md" 

134 try: 

135 validate_path_within(candidate, wiki_root) 

136 except ValueError: 

137 return None 

138 return candidate if candidate.is_file() else None 

139 

140 

141def _list_md_files_recursive(directory: Path) -> list[Path]: 

142 """Sorted markdown files under *directory* at any depth.""" 

143 if not directory.is_dir(): 

144 return [] 

145 return sorted(directory.rglob("*.md")) 

146 

147 

148def list_pages(wiki_root: Path) -> list[WikiPageInfo]: 

149 """List all wiki pages under summaries/ and synthesis/ at any nesting depth.""" 

150 pages: list[WikiPageInfo] = [] 

151 for subdir in WIKI_CONTENT_SUBDIRS: 

152 for path in _list_md_files_recursive(wiki_root / subdir): 

153 pages.append(build_page_info(path, wiki_root)) 

154 return pages 

155 

156 

157def list_draft_pages(wiki_root: Path) -> list[WikiPageInfo]: 

158 """List draft pages that failed the quality gate (recurses into per-source dirs).""" 

159 return [ 

160 build_page_info(path, wiki_root) 

161 for path in _list_md_files_recursive(wiki_root / WikiSubdir.DRAFTS) 

162 ] 

163 

164 

165def read_page(wiki_root: Path, slug: str) -> WikiPageContent | None: 

166 """Read a wiki page's content and parsed frontmatter. 

167 Returns None if the page does not exist or the slug escapes wiki_root. 

168 """ 

169 path = find_page(wiki_root, slug) 

170 if path is None: 

171 return None 

172 text = path.read_text(encoding="utf-8") 

173 fm = parse_frontmatter(text) 

174 title = _resolve_page_title(fm, text, path) 

175 return WikiPageContent(slug=slug, title=title, content=text, frontmatter=fm)