Coverage for src / lilbee / wiki / browse.py: 100%
95 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Wiki browse: shared page listing, reading, and resolution logic."""
3from __future__ import annotations
5from dataclasses import dataclass, field
6from datetime import date, datetime
7from pathlib import Path
8from typing import Any
10from lilbee.core.security import validate_path_within
11from lilbee.wiki.grammar import CODE_FENCE_RE, H1_RE
12from lilbee.wiki.index import parse_source_count
13from lilbee.wiki.shared import (
14 SUBDIR_TO_TYPE,
15 WIKI_CONTENT_SUBDIRS,
16 WikiSubdir,
17 parse_frontmatter,
18)
20# Wiki paths under the root take the form ``<subdir>/<slug>.md``; only paths
21# with at least this many components carry a meaningful page type.
22_WIKI_PATH_MIN_PARTS = 2
25@dataclass
26class WikiPageInfo:
27 """Summary metadata for a wiki page."""
29 slug: str
30 title: str
31 page_type: str
32 source_count: int
33 created_at: str
35 def to_dict(self) -> dict[str, Any]:
36 """Serialize to a plain dict suitable for JSON responses."""
37 return {
38 "slug": self.slug,
39 "title": self.title,
40 "page_type": self.page_type,
41 "source_count": self.source_count,
42 "created_at": self.created_at,
43 }
46@dataclass
47class WikiPageContent:
48 """Full content of a wiki page with parsed frontmatter."""
50 slug: str
51 title: str
52 content: str
53 frontmatter: dict[str, Any] = field(default_factory=dict)
56def list_md_files(directory: Path) -> list[Path]:
57 """Return sorted markdown files in a directory (non-recursive)."""
58 if not directory.is_dir():
59 return []
60 return sorted(directory.glob("*.md"))
63def _page_type_from_path(path: Path, wiki_root: Path) -> str:
64 """Determine page type from its location relative to wiki root."""
65 try:
66 relative = path.relative_to(wiki_root)
67 except ValueError:
68 return "unknown"
69 parts = relative.parts
70 if len(parts) >= _WIKI_PATH_MIN_PARTS:
71 return SUBDIR_TO_TYPE.get(parts[0], "unknown")
72 return "unknown"
75def _slug_from_path(path: Path, wiki_root: Path) -> str:
76 """Build a URL slug from a wiki page path."""
77 relative = path.relative_to(wiki_root)
78 return str(relative.with_suffix("")).replace("\\", "/")
81def _extract_h1_title(text: str) -> str | None:
82 """Return the first top-level heading from markdown body, ignoring fenced code blocks."""
83 in_fence = False
84 for line in text.splitlines():
85 if CODE_FENCE_RE.match(line):
86 in_fence = not in_fence
87 continue
88 if in_fence:
89 continue
90 if m := H1_RE.match(line):
91 return m.group(1).strip()
92 return None
95def _resolve_page_title(fm: dict[str, Any], text: str, path: Path) -> str:
96 """Pick a page title. Frontmatter wins; body H1 beats slug-title-case fallback.
98 Wiki generation does not emit a frontmatter title today, so without the H1
99 step every page would render as the slug (e.g. 'Cv Manual' for cv-manual.md).
100 """
101 if (fm_title := fm.get("title")) is not None:
102 return str(fm_title)
103 if (h1 := _extract_h1_title(text)) is not None:
104 return h1
105 return path.stem.replace("-", " ").title()
108def build_page_info(path: Path, wiki_root: Path) -> WikiPageInfo:
109 """Build a WikiPageInfo from a markdown file on disk."""
110 text = path.read_text(encoding="utf-8")
111 fm = parse_frontmatter(text)
112 slug = _slug_from_path(path, wiki_root)
113 title = _resolve_page_title(fm, text, path)
114 page_type = _page_type_from_path(path, wiki_root)
115 source_count = parse_source_count(text)
116 raw_at = fm.get("generated_at", "")
117 # yaml.safe_load returns datetime/date objects for date-like strings
118 created_at = raw_at.isoformat() if isinstance(raw_at, (datetime, date)) else str(raw_at)
119 return WikiPageInfo(
120 slug=slug,
121 title=title,
122 page_type=page_type,
123 source_count=source_count,
124 created_at=created_at,
125 )
128def find_page(wiki_root: Path, slug: str) -> Path | None:
129 """Resolve a slug to a wiki page path, or None if not found.
130 Validates the resolved path stays within wiki_root to prevent
131 path traversal attacks.
132 """
133 candidate = wiki_root / f"{slug}.md"
134 try:
135 validate_path_within(candidate, wiki_root)
136 except ValueError:
137 return None
138 return candidate if candidate.is_file() else None
141def _list_md_files_recursive(directory: Path) -> list[Path]:
142 """Sorted markdown files under *directory* at any depth."""
143 if not directory.is_dir():
144 return []
145 return sorted(directory.rglob("*.md"))
148def list_pages(wiki_root: Path) -> list[WikiPageInfo]:
149 """List all wiki pages under summaries/ and synthesis/ at any nesting depth."""
150 pages: list[WikiPageInfo] = []
151 for subdir in WIKI_CONTENT_SUBDIRS:
152 for path in _list_md_files_recursive(wiki_root / subdir):
153 pages.append(build_page_info(path, wiki_root))
154 return pages
157def list_draft_pages(wiki_root: Path) -> list[WikiPageInfo]:
158 """List draft pages that failed the quality gate (recurses into per-source dirs)."""
159 return [
160 build_page_info(path, wiki_root)
161 for path in _list_md_files_recursive(wiki_root / WikiSubdir.DRAFTS)
162 ]
165def read_page(wiki_root: Path, slug: str) -> WikiPageContent | None:
166 """Read a wiki page's content and parsed frontmatter.
167 Returns None if the page does not exist or the slug escapes wiki_root.
168 """
169 path = find_page(wiki_root, slug)
170 if path is None:
171 return None
172 text = path.read_text(encoding="utf-8")
173 fm = parse_frontmatter(text)
174 title = _resolve_page_title(fm, text, path)
175 return WikiPageContent(slug=slug, title=title, content=text, frontmatter=fm)