Coverage for src / lilbee / core / config / model.py: 100%

392 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""The :class:`Config` dataclass and the ``cfg`` singleton. 

2 

3The settings sources, TOML parser, and the resilient builder that falls 

4back to defaults on stale-config validation failures live here too. Every 

5``from lilbee.core.config import cfg`` resolves through ``lilbee.core.config.__init__`` 

6to the same instance defined at module bottom. 

7""" 

8 

9import logging 

10import os 

11from pathlib import Path 

12from typing import Any, ClassVar 

13 

14from pydantic import Field, ValidationInfo, field_validator, model_validator 

15from pydantic_settings import BaseSettings, SettingsConfigDict 

16 

17from .defaults import ( 

18 DEFAULT_ALLOWED_NER_LABELS, 

19 DEFAULT_CORS_ORIGIN_REGEX, 

20 DEFAULT_CRAWL_EXCLUDE_PATTERNS, 

21 DEFAULT_GENERAL_SYSTEM_PROMPT, 

22 DEFAULT_IGNORE_DIRS, 

23 DEFAULT_RAG_SYSTEM_PROMPT, 

24) 

25from .enums import ChatMode, ClustererBackend, KvCacheType, WikiEntityMode 

26from .parsing import parse_bool 

27from .validators import ConfigField 

28 

29log = logging.getLogger(__name__) 

30 

31# Sentinel for unset Path-typed fields. ``Field(default=Path())`` produces an 

32# instance equal to this, so the model_validator can distinguish "user passed 

33# the default" from "user explicitly set a value". 

34_UNSET_PATH = Path() 

35 

36 

37class Config(BaseSettings): 

38 """Runtime configuration: one singleton instance, mutated by CLI overrides.""" 

39 

40 model_config = SettingsConfigDict( 

41 env_prefix="LILBEE_", 

42 validate_assignment=True, 

43 arbitrary_types_allowed=True, 

44 extra="ignore", 

45 ) 

46 

47 # Paths: resolved from env/defaults in model_validator(mode='before') 

48 data_root: Path = Field(default=Path()) 

49 # Writable so plugin-managed servers can pivot storage to a vault path on 

50 # first boot; rebuild the index after migrating. 

51 documents_dir: Path = ConfigField(default=Path(), writable=True) 

52 data_dir: Path = Field(default=Path()) 

53 lancedb_dir: Path = Field(default=Path()) 

54 models_dir: Path = Field(default=Path()) 

55 # Markdown vault root; when set, search results carry a vault-relative 

56 # ``vault_path`` so a host UI can deep-link into the vault. 

57 vault_base: Path | None = ConfigField(default=None, writable=True) 

58 

59 chat_model: str = Field(default="Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", min_length=1) 

60 embedding_model: str = Field( 

61 default="nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_K_M.gguf", 

62 min_length=1, 

63 ) 

64 # Vision OCR model for scanned PDFs and image-only pages. Empty = disabled; 

65 # there is no cross-role fallback onto the chat model even if multimodal. 

66 vision_model: str = ConfigField(default="", public=True) 

67 embedding_dim: int = Field(default=768, ge=1) 

68 chunk_size: int = ConfigField(default=512, ge=64, writable=True, reindex=True) 

69 chunk_overlap: int = ConfigField(default=100, ge=0, writable=True, reindex=True) 

70 max_embed_chars: int = Field(default=2000, ge=1) 

71 top_k: int = ConfigField(default=8, ge=1, writable=True) 

72 max_distance: float = ConfigField(default=0.65, ge=0.0, writable=True) 

73 # Floor for hybrid-search relevance scores (0.0 = no filtering). lilbee 

74 # surfaces LanceDB's raw RRF sum, not a normalized score: with K=60 a 

75 # chunk ranked first in both the vector and FTS lists tops out near 

76 # 1/61 + 1/61 ~= 0.033, so any positive floor above that silently drops 

77 # every result. Keep this at 0.0 unless the RRF scores are normalized first. 

78 min_relevance_score: float = ConfigField(default=0.0, ge=0.0, writable=True) 

79 adaptive_threshold: bool = Field(default=False) 

80 rag_system_prompt: str = ConfigField( 

81 default=DEFAULT_RAG_SYSTEM_PROMPT, min_length=1, writable=True 

82 ) 

83 general_system_prompt: str = ConfigField( 

84 default=DEFAULT_GENERAL_SYSTEM_PROMPT, min_length=1, writable=True 

85 ) 

86 chat_mode: str = ConfigField(default=ChatMode.SEARCH.value, writable=True) 

87 ignore_dirs: frozenset[str] = Field(default=DEFAULT_IGNORE_DIRS) 

88 # OCR for scanned PDFs via vision-capable chat model. 

89 # None = auto-detect (use OCR if chat model is vision-capable). 

90 # True = force OCR regardless of detection. 

91 # False = disable OCR entirely. 

92 enable_ocr: bool | None = ConfigField(default=None, writable=True) 

93 # Per-page timeout in seconds for vision OCR (0 = no limit). 

94 ocr_timeout: float = ConfigField(default=120.0, ge=0.0, writable=True) 

95 # Outer wall-clock budget for the streamed pool drain: load grace plus 

96 # per_page * pages. Tune up for slow hardware (M1 Pro vision is 

97 # ~5min/page) or down for fast hardware. ocr_timeout still governs the 

98 # per-page expectation that drives the total budget. 

99 vision_load_budget_s: float = ConfigField(default=300.0, ge=0.0, writable=True) 

100 

101 # Tesseract fallback wall-clock timeout per file, seconds. 0 = no cap. 

102 tesseract_timeout: float = ConfigField(default=60.0, ge=0.0, writable=True) 

103 semantic_chunking: bool = ConfigField(default=False, writable=True) 

104 topic_threshold: float = ConfigField(default=0.75, ge=0.0, le=1.0, writable=True) 

105 server_host: str = "127.0.0.1" 

106 server_port: int = Field(default=0, ge=0, le=65535) 

107 cors_origins: list[str] = Field(default_factory=list) 

108 cors_origin_regex: str = Field(default=DEFAULT_CORS_ORIGIN_REGEX) 

109 # Seconds between SSE heartbeat events when the producer queue is idle. 

110 # Must stay well below the plugin's STREAM_IDLE_TIMEOUT_MS (120s) so a 

111 # single long-running vision OCR page can't starve the client into aborting. 

112 sse_heartbeat_interval: float = ConfigField(default=30.0, ge=0.0, writable=True) 

113 json_mode: bool = False 

114 temperature: float | None = ConfigField(default=0.1, ge=0.0, writable=True) 

115 top_p: float | None = ConfigField(default=0.9, ge=0.0, le=1.0, writable=True) 

116 top_k_sampling: int | None = ConfigField(default=40, ge=1, writable=True) 

117 # 1.1 is llama.cpp's default. Leaving this at None caused n-gram loops 

118 # ("tire tire tire...") on some open-weights models. 

119 repeat_penalty: float | None = ConfigField(default=1.1, ge=0.0, writable=True) 

120 num_ctx: int | None = ConfigField(default=None, ge=1, writable=True) 

121 max_tokens: int | None = ConfigField(default=4096, ge=1, writable=True) 

122 seed: int | None = ConfigField(default=None, writable=True) 

123 llm_provider: str = ConfigField(default="auto", writable=True) 

124 remote_base_url: str = ConfigField(default="http://localhost:11434", writable=True) 

125 llm_api_key: str = ConfigField(default="", writable=True, write_only=True) 

126 openrouter_api_key: str = ConfigField(default="", writable=True, write_only=True) 

127 gemini_api_key: str = ConfigField(default="", writable=True, write_only=True) 

128 anthropic_api_key: str = ConfigField(default="", writable=True, write_only=True) 

129 openai_api_key: str = ConfigField(default="", writable=True, write_only=True) 

130 mistral_api_key: str = ConfigField(default="", writable=True, write_only=True) 

131 deepseek_api_key: str = ConfigField(default="", writable=True, write_only=True) 

132 

133 # Retrieval quality knobs. 

134 

135 # Max chunks per source in top-k; prevents one large file monopolizing results. 

136 diversity_max_per_source: int = ConfigField(default=3, ge=1, writable=True) 

137 

138 # MMR relevance/diversity tradeoff; 0 = max diversity, 1 = pure relevance 

139 # (Carbonell & Goldstein 1998). 

140 mmr_lambda: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True) 

141 

142 # Extra candidates retrieved for MMR reranking (multiplies top_k). 

143 candidate_multiplier: int = ConfigField(default=3, ge=1, writable=True) 

144 

145 # LLM-generated alternative queries for expansion. 0 disables. 

146 query_expansion_count: int = ConfigField(default=3, ge=0, writable=True) 

147 

148 # Skip LLM expansion when tokenized query length ≤ this. The LLM round-trip 

149 # dominates latency on small local models; short queries already have strong 

150 # BM25/vector signal. Concept-graph expansion still runs. 0 disables the skip. 

151 expansion_short_query_tokens: int = ConfigField(default=2, ge=0, writable=True) 

152 

153 # Cosine-distance step when adaptive-widening retry kicks in. 

154 adaptive_threshold_step: float = ConfigField(default=0.2, gt=0.0, writable=True) 

155 

156 # Reject expansion variants below expansion_similarity_threshold. 

157 expansion_guardrails: bool = ConfigField(default=True, writable=True) 

158 

159 # Min cosine similarity between question and variant embeddings. 

160 expansion_similarity_threshold: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True) 

161 

162 # Sigmoid-normalized BM25 score above which query expansion is skipped. 

163 expansion_skip_threshold: float = Field(default=0.8, ge=0.0, le=1.0) 

164 

165 # Min BM25 top-1 vs top-2 gap to skip expansion. 

166 expansion_skip_gap: float = Field(default=0.15, ge=0.0, le=1.0) 

167 

168 # Chunks included in LLM context after adaptive selection. 

169 max_context_sources: int = ConfigField(default=6, ge=1, writable=True) 

170 

171 # HyDE (Gao et al. 2022): hypothetical-answer embedding search. +~500ms. 

172 hyde: bool = ConfigField(default=False, writable=True) 

173 

174 # HyDE result weight relative to real-doc search (0.0-1.0). 

175 hyde_weight: float = ConfigField(default=0.7, ge=0.0, le=1.0, writable=True) 

176 

177 # HyDE prompt template. Must contain {question} placeholder. 

178 hyde_prompt: str = ( 

179 "Write a 50-100 word passage that directly answers this question as if " 

180 "it were an excerpt from a real document. Do not include any preamble, " 

181 "just write the passage.\n\nQuestion: {question}" 

182 ) 

183 

184 # Reranker model ref. Empty disables reranking. Native GGUFs use 

185 # llama-cpp rank pooling; hosted refs (cohere/voyage/jina/together/hf-tei) 

186 # need the backend extra. 

187 reranker_model: str = ConfigField(default="", public=True) 

188 

189 # Candidate count sent to the reranker. 

190 rerank_candidates: int = ConfigField(default=60, ge=1, writable=True, public=True) 

191 

192 # Date-range filter; only fires when a temporal keyword is detected. 

193 temporal_filtering: bool = ConfigField(default=True, writable=True) 

194 

195 # If True, emit <think>…</think> content as separate SSE reasoning events; 

196 # if False, strip it silently. 

197 show_reasoning: bool = ConfigField(default=False, writable=True) 

198 

199 # Maximum reasoning characters before lilbee forces the model to answer. 

200 # Per-model overrides apply on top of this default. Approx N/4 tokens. 

201 # 0 disables the cap (unlimited reasoning; accept the runaway-loop risk). 

202 max_reasoning_chars: int = ConfigField(default=64_000, ge=0, writable=True) 

203 

204 # Web crawling. 

205 

206 # Optional global ceilings. None = no ceiling. 

207 crawl_max_depth: int | None = ConfigField(default=None, ge=0, writable=True) 

208 crawl_max_pages: int | None = ConfigField(default=None, ge=1, writable=True) 

209 

210 # Per-URL fetch timeout, seconds. 

211 crawl_timeout: int = ConfigField(default=30, ge=1, writable=True) 

212 

213 # 0 = unlimited, default = CPU count. 

214 crawl_max_concurrent: int = Field(default=0, ge=0) 

215 

216 # Seconds between periodic syncs during crawl. 0 = sync only at end. 

217 crawl_sync_interval: int = ConfigField(default=30, ge=0, writable=True) 

218 

219 # Per-request delay + jitter (defaults chosen to be gentler than crawl4ai's). 

220 crawl_mean_delay: float = ConfigField(default=0.5, ge=0.0, writable=True) 

221 crawl_max_delay_range: float = ConfigField(default=0.5, ge=0.0, writable=True) 

222 

223 # In-flight requests per crawl. 

224 crawl_concurrent_requests: int = ConfigField(default=3, ge=1, writable=True) 

225 

226 # Per-domain rate-limiter that backs off on HTTP 429/503 and retries. 

227 crawl_retry_on_rate_limit: bool = ConfigField(default=True, writable=True) 

228 crawl_retry_base_delay_min: float = ConfigField(default=1.0, ge=0.0, writable=True) 

229 crawl_retry_base_delay_max: float = ConfigField(default=3.0, ge=0.0, writable=True) 

230 crawl_retry_max_backoff: float = ConfigField(default=30.0, ge=0.0, writable=True) 

231 crawl_retry_max_attempts: int = ConfigField(default=3, ge=0, writable=True) 

232 

233 # Regex patterns dropped at link-discovery time. Defaults block CMS 

234 # scaffolding (WordPress admin, archives, tracking params, etc.). 

235 crawl_exclude_patterns: list[str] = ConfigField( 

236 default_factory=lambda: list(DEFAULT_CRAWL_EXCLUDE_PATTERNS), 

237 writable=True, 

238 ) 

239 

240 # Fraction of GPU/unified memory reserved for loaded models. 

241 gpu_memory_fraction: float = ConfigField(default=0.75, ge=0.1, le=1.0, writable=True) 

242 

243 # Seconds a model stays loaded after last use. 0 = unload immediately. 

244 model_keep_alive: int = ConfigField(default=300, ge=0, writable=True) 

245 

246 # Per-call deadline for one pool round-trip (send + recv). Embed batches 

247 # larger than this on slow machines surface as TimeoutError; raise for 

248 # heavy ingest jobs. 

249 worker_pool_call_timeout_s: float = ConfigField(default=300.0, gt=0.0, writable=True) 

250 

251 # Spawn every configured role at startup instead of on first use. Trades 

252 # a slower TUI mount (~1-3s per worker, cold-started in parallel) for a 

253 # responsive first interaction. Roles whose model is unset are skipped, 

254 # so a setup with only chat + embed never spawns rerank or vision. 

255 # Set to false for headless / scripted use where the first call doesn't 

256 # need to be fast. 

257 worker_pool_eager_start: bool = ConfigField(default=True, writable=True) 

258 

259 # Idle worker reap. A worker that has been quiet for this many seconds 

260 # is shut down to free RAM/VRAM; the next request respawns it. 

261 # ``0`` disables reaping (workers stay up until TUI exit). 

262 worker_pool_max_idle_s: float = ConfigField(default=300.0, ge=0.0, writable=True) 

263 

264 # Upper bound for the dynamic n_ctx picker. The picker chooses the 

265 # largest 256-multiple ctx that fits in available memory and the 

266 # model's training window; this caps it at a sane ceiling. 

267 num_ctx_max: int = ConfigField(default=16384, ge=512, writable=True) 

268 

269 # Flash attention. None (default) = on with TypeError fallback for 

270 # older llama-cpp-python builds, True = force on, False = off. 

271 # Resolves the 'padding V cache to 1024' warning on models with 

272 # uneven per-layer V dims (e.g. Gemma3) and saves ~25% KV memory. 

273 flash_attention: bool | None = ConfigField(default=None, writable=True) 

274 

275 # KV cache element type. q8_0 / q4_0 halve or quarter cache memory 

276 # but require flash attention to be enabled. 

277 kv_cache_type: KvCacheType = ConfigField(default=KvCacheType.F16, writable=True) 

278 

279 # Number of model layers to offload to GPU. None (default) = all 

280 # layers, 0 = CPU only, positive int = partial offload. Useful when a 

281 # discrete GPU has less VRAM than the model needs. 

282 n_gpu_layers: int | None = ConfigField(default=None, writable=True) 

283 

284 # GPU device picker for dual-GPU machines (typical laptop case: 

285 # discrete NVIDIA + integrated Intel/AMD). The Vulkan backend 

286 # enumerates every adapter the system exposes and may pick the 

287 # integrated one first, producing stalls or OOMs that look like 

288 # llama.cpp bugs. Setting ``gpu_devices`` constrains visibility 

289 # before llama_cpp loads, pinning inference to the chosen device(s). 

290 # 

291 # Accepts a comma-separated list of device indexes ("0", "1", 

292 # "0,1") and applies it to every backend simultaneously: 

293 # ``GGML_VK_VISIBLE_DEVICES`` for Vulkan, ``CUDA_VISIBLE_DEVICES`` 

294 # for CUDA, ``HIP_VISIBLE_DEVICES`` / ``ROCR_VISIBLE_DEVICES`` for 

295 # ROCm. Setting one variable that the active backend ignores is 

296 # harmless, so we set all four rather than detecting the build. 

297 # 

298 # Must be set before the first llama.cpp call; in practice that 

299 # means via ``LILBEE_GPU_DEVICES`` or ``config.toml`` (TUI edits 

300 # only take effect after a restart). ``None`` (default) hands off 

301 # to the autodetect in ``providers/llama_cpp/gpu_select.py``, 

302 # which parses ``vulkaninfo --summary`` and pins the discrete 

303 # adapter when one is present. The autodetect is silent on failure 

304 # (no vulkaninfo, single device, parse error), leaving the 

305 # Vulkan-loader's default ordering in place. 

306 gpu_devices: str | None = ConfigField(default=None, writable=True) 

307 

308 # Primary GPU index passed to ``Llama(main_gpu=...)``. Only matters 

309 # when multiple devices remain visible after ``gpu_devices``; with 

310 # a single visible device, llama.cpp ignores this. ``None`` 

311 # (default) lets llama.cpp pick (index 0). 

312 main_gpu: int | None = ConfigField(default=None, writable=True) 

313 

314 # True = Markdown widget for chat; False = plain Static (faster). 

315 markdown_rendering: bool = True 

316 

317 # TUI theme name; persists the last Ctrl+T pick across sessions. 

318 theme: str = ConfigField(default="rose-pine", writable=True) 

319 

320 # Per-model generation defaults set via apply_model_defaults(). 

321 _model_defaults: Any = None 

322 

323 # Wiki layer. LLM-maintained synthesis pages with citation provenance. 

324 # Off by default; flip to True (or set LILBEE_WIKI=1) to enable. When off, 

325 # the Wiki view tab and the chat ModelBar's scope picker are both hidden. 

326 wiki: bool = ConfigField(default=False, writable=True) 

327 wiki_dir: str = "wiki" 

328 wiki_prune_raw: bool = ConfigField(default=False, writable=True) 

329 

330 # Minimum cosine similarity between a page body and the mean of its 

331 # source chunk vectors before a page is published (below → drafts). 

332 # Replaces the old LLM-based faithfulness score: mean-of-chunks is a 

333 # deterministic, zero-LLM-call signal that routes topic-drifted 

334 # pages to drafts without the 0.0 to 1.0 ambiguity of a model-emitted 

335 # number. Tuning knob: swap to per-chunk max or top-K-mean if the 

336 # default 0.5 produces false drafts. 

337 wiki_embedding_faithfulness_threshold: float = ConfigField( 

338 default=0.5, ge=0.0, le=1.0, writable=True 

339 ) 

340 

341 # Per-call output token cap for wiki generation. Without this a 

342 # reasoning model (Qwen3, DeepSeek-R1) can burn the full context 

343 # window emitting <think> tokens before the actual answer, taking 

344 # minutes per page. Default leaves headroom for a typical reasoning 

345 # budget plus a real response (~1000 output + ~1000 slack). 

346 wiki_summary_max_tokens: int = ConfigField(default=2048, ge=256, writable=True) 

347 

348 # Wiki generation is a structured-output task: the model must emit the 

349 # block separators, the citation footnotes, and verbatim quotes. The 

350 # usual chat default (~0.8) is too creative for that. Lowering the 

351 # sampling temperature makes the model stick to the template and quote 

352 # more faithfully. 0.1 leaves just enough slack to avoid hard loops. 

353 wiki_temperature: float = ConfigField(default=0.1, ge=0.0, le=2.0, writable=True) 

354 

355 # Fraction of citations that must be stale before a wiki page is flagged. 

356 wiki_stale_citation_threshold: float = Field(default=0.5, ge=0.0, le=1.0) 

357 

358 # Fraction of content changed that triggers human-review drift guard. 

359 wiki_drift_threshold: float = Field(default=0.3, ge=0.0, le=1.0) 

360 

361 # LLM prompt templates for wiki page generation. Writable so advanced 

362 # users can override them from /settings, config.toml, or 

363 # ``LILBEE_WIKI_*_PROMPT`` env vars. Templates must keep the expected 

364 # ``{placeholders}``. If you remove one the generator will crash on 

365 # first use. The defaults below are the only reason the pipeline 

366 # works out of the box. 

367 wiki_summary_prompt: str = ConfigField( 

368 writable=True, 

369 default=( 

370 "You are a knowledge compiler. Given the source chunks below from a single " 

371 "document, write a concise wiki summary page in markdown.\n\n" 

372 "Rules:\n" 

373 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n" 

374 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n" 

375 "3. For interpretations or connections not directly stated in the source, " 

376 "mark with [*inference*].\n" 

377 "4. Use blockquotes (>) for directly cited facts.\n" 

378 "5. End with a citation block in this format:\n\n" 

379 "---\n" 

380 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n" 

381 '[^src1]: {source_name}, excerpt: "exact quoted text"\n' 

382 '[^src2]: {source_name}, excerpt: "exact quoted text"\n\n' 

383 "Source document: {source_name}\n\n" 

384 "Chunks:\n{chunks_text}\n\n" 

385 "Write the wiki summary page now. Start with a heading." 

386 ), 

387 ) 

388 wiki_synthesis_prompt: str = ConfigField( 

389 writable=True, 

390 default=( 

391 "You are a knowledge compiler. Given source chunks from MULTIPLE documents " 

392 "about related concepts, write a synthesis wiki page in markdown that connects " 

393 "ideas across sources.\n\n" 

394 "Rules:\n" 

395 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n" 

396 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n" 

397 "3. For connections, interpretations, or patterns you identify across sources, " 

398 "mark with [*inference*].\n" 

399 "4. Use blockquotes (>) for directly cited facts.\n" 

400 "5. Reference each source by its filename when drawing connections.\n" 

401 "6. End with a citation block in this format:\n\n" 

402 "---\n" 

403 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n" 

404 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n' 

405 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n' 

406 "Topic: {topic}\n\n" 

407 "Sources:\n{source_list}\n\n" 

408 "Chunks:\n{chunks_text}\n\n" 

409 "Write the synthesis page now. Start with a heading." 

410 ), 

411 ) 

412 

413 # Wiki synthesis clusterer backend. CONCEPTS requires the [graph] extra 

414 # and falls back to EMBEDDING when unavailable. 

415 wiki_clusterer: ClustererBackend = ConfigField( 

416 default=ClustererBackend.EMBEDDING, writable=True 

417 ) 

418 

419 # Neighborhood size for the mutual-kNN graph. 0 = auto-scale from corpus size. 

420 wiki_clusterer_k: int = ConfigField(default=0, ge=0, writable=True) 

421 

422 # LazyGraphRAG-style concept graph. Requires the [graph] extra. 

423 concept_graph: bool = ConfigField(default=True, writable=True) 

424 

425 # Weight of concept overlap boost relative to vector similarity. 

426 concept_boost_weight: float = ConfigField(default=0.3, ge=0.0, le=1.0, writable=True) 

427 

428 # Floor on post-boost distance to stop weak boosts from promoting marginal hits. 

429 concept_boost_floor: float = ConfigField(default=0.05, ge=0.0, writable=True) 

430 

431 # Max noun-phrase concepts extracted per chunk. 

432 concept_max_per_chunk: int = ConfigField(default=5, ge=1, writable=True) 

433 

434 # spaCy NER labels kept by the wiki entity extractor. Anything not 

435 # in this set (QUANTITY, CARDINAL, DATE, TIME, MONEY, PERCENT, 

436 # ORDINAL, ...) is dropped before aggregation. Override via 

437 # LILBEE_CONCEPT_ALLOWED_ENT_TYPES as a comma-separated list. 

438 concept_allowed_ent_types: frozenset[str] = Field(default=DEFAULT_ALLOWED_NER_LABELS) 

439 

440 # Strategy used to extract entities for the concept/entity wiki. 

441 # NER_ENTITIES (default) pulls typed NER entities with spaCy; concept 

442 # pages are proposed by the LLM inside the per-source batched call, 

443 # not by the extractor. NER_CONCEPTS_PLUS_LLM_TYPES layers an 

444 # LLM-proposed domain schema on top. LLM_TAGGED asks the LLM to tag 

445 # every chunk (most expensive). Unimplemented modes fall back to 

446 # NER_ENTITIES. 

447 wiki_entity_mode: WikiEntityMode = ConfigField( 

448 default=WikiEntityMode.NER_ENTITIES, writable=True 

449 ) 

450 

451 # Minimum distinct chunk mentions before an entity or concept earns 

452 # its own wiki page. Filters one-off noise. 

453 wiki_entity_min_mentions: int = ConfigField(default=3, ge=1, writable=True) 

454 

455 # Maximum chunks passed into each concept or entity page generation 

456 # call. Caps context size so one page does not blow the context 

457 # window on a prolific topic. 

458 wiki_concept_max_chunks_per_page: int = ConfigField(default=25, ge=1, writable=True) 

459 

460 # Maximum number of related concepts the model is asked to list in 

461 # the `## Related` section of each page. 

462 wiki_related_max: int = ConfigField(default=8, ge=0, writable=True) 

463 

464 # Auto-update cap: if a single sync touches more than this many 

465 # concept or entity pages, skip the per-slug regeneration and tell 

466 # the user to run `lilbee wiki update` explicitly. Keeps a surprise 

467 # bulk import from firing hundreds of LLM calls. 

468 wiki_ingest_update_cap: int = ConfigField(default=20, ge=1, writable=True) 

469 

470 # Whether the per-source batched call asks the LLM to curate 

471 # concept pages alongside the pre-extracted entity list. False → 

472 # entity sections only, no concept curation (incremental ingest 

473 # path uses this to avoid churning concept slugs per source-touch). 

474 wiki_extract_concepts: bool = ConfigField(default=True, writable=True) 

475 

476 # Minimum chunk count a source must contribute before it is eligible 

477 # for concept curation. Sources below the floor still get a batched 

478 # call when they have entities (the prompt writes entity-only 

479 # sections); sources below the floor with zero entities are skipped 

480 # entirely. Prevents boilerplate / TOC / appendix documents from 

481 # burning an LLM call to invent "concepts". 

482 wiki_batch_min_chunks: int = ConfigField(default=3, ge=1, writable=True) 

483 

484 # Prompt template for the per-source batched call. Placeholders: 

485 # {source}, {entity_list}, {chunks_text}, {concept_instruction}. 

486 # {concept_instruction} is filled with a concept-curation paragraph 

487 # when concepts are requested, or the empty string otherwise. 

488 wiki_entity_batch_prompt: str = ConfigField( 

489 writable=True, 

490 default=( 

491 "You are writing wiki sections based on these chunks from {source}.\n\n" 

492 "{concept_instruction}" 

493 "Write a wiki section for each of these NER ENTITIES: {entity_list}\n\n" 

494 "Format each section exactly as:\n" 

495 "## Name\n" 

496 "{{content with [^src1]-style citations}}\n\n" 

497 "Rules:\n" 

498 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n" 

499 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n" 

500 "3. For interpretations or connections not directly stated, mark with [*inference*].\n" 

501 "4. Use blockquotes (>) for directly cited facts.\n" 

502 "5. End the response with a citation block in this format:\n\n" 

503 "---\n" 

504 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n" 

505 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n' 

506 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n' 

507 "Source chunks:\n{chunks_text}\n" 

508 ), 

509 ) 

510 

511 # Class variable: not a settings field 

512 _toml_cache: ClassVar[dict[str, Any]] = {} 

513 

514 @field_validator( 

515 "temperature", 

516 "top_p", 

517 "repeat_penalty", 

518 "top_k_sampling", 

519 "num_ctx", 

520 "seed", 

521 mode="before", 

522 ) 

523 @classmethod 

524 def _empty_string_to_none(cls, v: Any) -> Any: 

525 if isinstance(v, str) and v.strip() == "": 

526 return None 

527 return v 

528 

529 @field_validator("chat_mode", mode="before") 

530 @classmethod 

531 def _normalize_chat_mode(cls, v: Any) -> str: 

532 """Coerce chat_mode to a ChatMode value; default ChatMode.SEARCH.""" 

533 if v is None or v == "": 

534 return ChatMode.SEARCH.value 

535 candidate = str(v).strip().lower() 

536 try: 

537 return ChatMode(candidate).value 

538 except ValueError as exc: 

539 valid = ", ".join(repr(m.value) for m in ChatMode) 

540 raise ValueError(f"chat_mode must be one of {{{valid}}}, got {v!r}") from exc 

541 

542 @field_validator("enable_ocr", mode="before") 

543 @classmethod 

544 def _parse_enable_ocr(cls, v: Any) -> bool | None: 

545 """Parse enable_ocr from env var string or direct value. 

546 

547 Accepts: true/false/1/0/yes/no (case-insensitive), empty string 

548 or None for auto-detect. 

549 """ 

550 if v is None: 

551 return None 

552 if isinstance(v, bool): 

553 return v 

554 if isinstance(v, str): 

555 if v.strip().lower() in ("", "auto", "none"): 

556 return None 

557 try: 

558 return parse_bool(v) 

559 except ValueError: 

560 pass # fall through to bool() coercion below for unrecognised strings 

561 return bool(v) 

562 

563 @field_validator("flash_attention", mode="before") 

564 @classmethod 

565 def _parse_flash_attention(cls, v: Any) -> bool | None: 

566 """Auto/on/off tri-state: empty/auto/none -> None, else parse bool.""" 

567 if v is None: 

568 return None 

569 if isinstance(v, bool): 

570 return v 

571 if isinstance(v, str): 

572 if v.strip().lower() in ("", "auto", "none"): 

573 return None 

574 try: 

575 return parse_bool(v) 

576 except ValueError: 

577 return None 

578 return bool(v) 

579 

580 @field_validator("n_gpu_layers", mode="before") 

581 @classmethod 

582 def _parse_n_gpu_layers(cls, v: Any) -> int | None: 

583 """Auto -> None, ``cpu`` alias -> 0, integers parsed verbatim.""" 

584 if v is None: 

585 return None 

586 if isinstance(v, str): 

587 label = v.strip().lower() 

588 if label in ("", "auto", "none"): 

589 return None 

590 if label == "cpu": 

591 return 0 

592 try: 

593 return int(label) 

594 except ValueError: 

595 log.warning("Invalid LILBEE_N_GPU_LAYERS=%r, using auto", v) 

596 return None 

597 return int(v) 

598 

599 @field_validator("main_gpu", mode="before") 

600 @classmethod 

601 def _parse_main_gpu(cls, v: Any) -> int | None: 

602 """Empty/auto strings -> None, integers parsed verbatim.""" 

603 if v is None: 

604 return None 

605 if isinstance(v, str): 

606 label = v.strip().lower() 

607 if label in ("", "auto", "none"): 

608 return None 

609 try: 

610 return int(label) 

611 except ValueError: 

612 log.warning("Invalid LILBEE_MAIN_GPU=%r, using auto", v) 

613 return None 

614 return int(v) 

615 

616 @field_validator("gpu_devices", mode="before") 

617 @classmethod 

618 def _parse_gpu_devices(cls, v: Any) -> str | None: 

619 """Normalize device list: strip whitespace, drop empties, keep order.""" 

620 if v is None: 

621 return None 

622 if isinstance(v, str): 

623 label = v.strip().lower() 

624 if label in ("", "auto", "all", "none"): 

625 return None 

626 parts = [p.strip() for p in v.split(",") if p.strip()] 

627 if not parts: 

628 return None 

629 for part in parts: 

630 if not part.lstrip("-").isdigit(): 

631 log.warning("Invalid LILBEE_GPU_DEVICES=%r, ignoring", v) 

632 return None 

633 return ",".join(parts) 

634 return str(v) 

635 

636 @field_validator("semantic_chunking", mode="before") 

637 @classmethod 

638 def _parse_semantic_chunking(cls, v: Any) -> bool: 

639 """Parse from env string; invalid values warn and fall back to False.""" 

640 if isinstance(v, bool): 

641 return v 

642 if isinstance(v, str): 

643 try: 

644 return parse_bool(v) 

645 except ValueError: 

646 log.warning("Invalid LILBEE_SEMANTIC_CHUNKING=%r, using default False", v) 

647 return False 

648 return bool(v) 

649 

650 @field_validator( 

651 "chat_model", "embedding_model", "vision_model", "reranker_model", mode="after" 

652 ) 

653 @classmethod 

654 def _normalize_model_tag(cls, v: str, info: ValidationInfo) -> str: 

655 """Validate and canonicalize a model ref; blank clears optional roles.""" 

656 if not v or not v.strip(): 

657 if info.field_name in {"chat_model", "embedding_model"}: 

658 raise ValueError(f"{info.field_name} must not be blank") 

659 return "" 

660 from lilbee.providers.model_ref import parse_model_ref 

661 

662 return parse_model_ref(v).for_openai_prefix() 

663 

664 @field_validator("cors_origins", mode="before") 

665 @classmethod 

666 def _split_cors_origins(cls, v: Any) -> Any: 

667 if isinstance(v, str): 

668 return [o.strip() for o in v.split(",") if o.strip()] 

669 return v 

670 

671 @field_validator("crawl_exclude_patterns", mode="before") 

672 @classmethod 

673 def _split_crawl_exclude_patterns(cls, v: Any) -> Any: 

674 """Accept newline-separated strings from env vars / plain-text config. 

675 

676 Regex commonly uses commas (e.g. `{2,4}`) and pipes (alternation), so 

677 newline is the only separator safe to use for this field. TOML lists 

678 and JSON arrays pass through unchanged. 

679 """ 

680 if isinstance(v, str): 

681 return [p.strip() for p in v.splitlines() if p.strip()] 

682 return v 

683 

684 @field_validator("crawl_exclude_patterns", mode="after") 

685 @classmethod 

686 def _validate_crawl_exclude_patterns(cls, v: list[str]) -> list[str]: 

687 """Reject any entry that isn't a valid Python regex. 

688 

689 These patterns are compiled at crawl time. An invalid pattern there 

690 surfaces as an opaque mid-crawl error; catching it at PATCH time gives 

691 the user a 400 with a pointer to the bad entry. 

692 """ 

693 import re 

694 

695 bad: list[str] = [] 

696 for i, pattern in enumerate(v): 

697 try: 

698 re.compile(pattern) 

699 except re.error as exc: 

700 bad.append(f"[{i}] {pattern!r}: {exc}") 

701 if bad: 

702 raise ValueError("invalid regex in crawl_exclude_patterns:\n " + "\n ".join(bad)) 

703 return v 

704 

705 @field_validator("ignore_dirs", mode="before") 

706 @classmethod 

707 def _merge_ignore_dirs(cls, v: Any) -> frozenset[str]: 

708 if isinstance(v, str): 

709 extra = frozenset(name.strip() for name in v.split(",") if name.strip()) 

710 return DEFAULT_IGNORE_DIRS | extra 

711 if isinstance(v, (set, frozenset, list)): 

712 return DEFAULT_IGNORE_DIRS | frozenset(v) 

713 return DEFAULT_IGNORE_DIRS 

714 

715 @field_validator("concept_allowed_ent_types", mode="before") 

716 @classmethod 

717 def _parse_ent_types(cls, v: Any) -> frozenset[str]: 

718 """Replace-semantics override: a narrowed set is used as-is, 

719 not unioned with defaults. A user asking for ``PERSON,ORG`` 

720 wants exactly those kinds. Accepts comma-separated strings 

721 from env and list / set / frozenset from code. Empty input 

722 falls back to :data:`DEFAULT_ALLOWED_NER_LABELS` so an empty 

723 env var does not silently disable the gate. 

724 """ 

725 if isinstance(v, str): 

726 parts = frozenset(name.strip().upper() for name in v.split(",") if name.strip()) 

727 return parts or DEFAULT_ALLOWED_NER_LABELS 

728 if isinstance(v, (set, frozenset, list)): 

729 parts = frozenset(str(x).upper() for x in v) 

730 return parts or DEFAULT_ALLOWED_NER_LABELS 

731 return DEFAULT_ALLOWED_NER_LABELS 

732 

733 @model_validator(mode="before") 

734 @classmethod 

735 def _resolve_defaults(cls, data: Any) -> Any: 

736 from lilbee.core.system import canonical_models_dir, default_data_dir, find_local_root 

737 

738 if not isinstance(data, dict): # pragma: no cover 

739 return data 

740 

741 if data.get("data_root") in (None, _UNSET_PATH): 

742 data_env = os.environ.get("LILBEE_DATA", "").strip() 

743 if data_env: 

744 data["data_root"] = Path(data_env) 

745 else: 

746 local = find_local_root() 

747 data["data_root"] = local if local is not None else default_data_dir() 

748 root = data["data_root"] 

749 if data.get("documents_dir") in (None, _UNSET_PATH): 

750 data["documents_dir"] = root / "documents" 

751 if data.get("data_dir") in (None, _UNSET_PATH): 

752 data["data_dir"] = root / "data" 

753 if data.get("lancedb_dir") in (None, _UNSET_PATH): 

754 data["lancedb_dir"] = root / "data" / "lancedb" 

755 if data.get("models_dir") in (None, _UNSET_PATH): 

756 data["models_dir"] = canonical_models_dir() 

757 

758 return data 

759 

760 @classmethod 

761 def settings_customise_sources( 

762 cls, 

763 settings_cls: type[BaseSettings], 

764 init_settings: Any, 

765 env_settings: Any, 

766 dotenv_settings: Any, 

767 file_secret_settings: Any, 

768 ) -> tuple[Any, ...]: 

769 from lilbee.core.system import default_data_dir, find_local_root 

770 

771 data_env = os.environ.get("LILBEE_DATA", "") 

772 if data_env: 

773 toml_dir = Path(data_env) 

774 else: 

775 local = find_local_root() 

776 toml_dir = local if local else default_data_dir() 

777 toml_path = toml_dir / "config.toml" 

778 

779 plain_env = _PlainEnvSource(settings_cls, env_prefix="LILBEE_", env_ignore_empty=True) 

780 sources: list[Any] = [init_settings, plain_env] 

781 if toml_path.exists() and os.environ.get("LILBEE_SKIP_TOML_CONFIG") != "1": 

782 sources.append(_TomlSource(settings_cls, toml_path)) 

783 return tuple(sources) 

784 

785 @property 

786 def model_defaults(self) -> Any: 

787 """Per-model generation defaults (read-only). Set via apply_model_defaults().""" 

788 return self._model_defaults 

789 

790 def apply_model_defaults(self, defaults: Any) -> None: 

791 """Store per-model generation defaults for 3-layer merge.""" 

792 object.__setattr__(self, "_model_defaults", defaults) 

793 

794 def clear_model_defaults(self) -> None: 

795 """Reset per-model defaults to None.""" 

796 object.__setattr__(self, "_model_defaults", None) 

797 

798 def generation_options(self, **overrides: Any) -> dict[str, Any]: 

799 """Merge model defaults, user config, and per-call overrides, dropping None.""" 

800 result = _model_defaults_dict(self._model_defaults) 

801 user_fields: dict[str, Any] = { 

802 "temperature": self.temperature, 

803 "top_p": self.top_p, 

804 "top_k": self.top_k_sampling, 

805 "repeat_penalty": self.repeat_penalty, 

806 "num_ctx": self.num_ctx, 

807 "seed": self.seed, 

808 "max_tokens": self.max_tokens, 

809 } 

810 for k, v in user_fields.items(): 

811 if v is not None: 

812 result[k] = v 

813 for k, v in overrides.items(): 

814 if v is not None: 

815 result[k] = v 

816 return result 

817 

818 

819def _model_defaults_dict(defaults: Any) -> dict[str, Any]: 

820 """Non-None fields of a ModelDefaults instance as a dict.""" 

821 if defaults is None: 

822 return {} 

823 from dataclasses import fields as dc_fields 

824 

825 return { 

826 f.name: getattr(defaults, f.name) 

827 for f in dc_fields(defaults) 

828 if getattr(defaults, f.name) is not None 

829 } 

830 

831 

832class _PlainEnvSource: 

833 """Reads LILBEE_* env vars as plain strings so field validators handle parsing.""" 

834 

835 def __init__( 

836 self, 

837 settings_cls: type[BaseSettings], 

838 env_prefix: str, 

839 env_ignore_empty: bool = True, 

840 ) -> None: 

841 self._prefix = env_prefix 

842 self._ignore_empty = env_ignore_empty 

843 self._fields = set(settings_cls.model_fields) 

844 

845 def __call__(self) -> dict[str, Any]: 

846 result: dict[str, Any] = {} 

847 for field_name in self._fields: 

848 env_key = f"{self._prefix}{field_name.upper()}" 

849 raw = os.environ.get(env_key) 

850 if raw is None: 

851 continue 

852 if self._ignore_empty and raw == "": 

853 continue 

854 result[field_name] = raw 

855 return result 

856 

857 

858class _TomlSource: 

859 """Custom pydantic-settings source that reads config.toml.""" 

860 

861 def __init__(self, settings_cls: type[BaseSettings], path: Path) -> None: 

862 self._path = path 

863 

864 def __call__(self) -> dict[str, Any]: 

865 import tomllib 

866 

867 try: 

868 with self._path.open("rb") as f: 

869 data = tomllib.load(f) 

870 except (ValueError, OSError): 

871 log.warning("Failed to read %s, ignoring", self._path) 

872 return {} 

873 # Empty strings represent "no persisted value" for nullable scalar 

874 # fields (legacy from set_setting writing "" for None). Pydantic 

875 # can't coerce "" to int|None, so dropping them here lets the field 

876 # default apply rather than crashing the whole Config load. 

877 return {k: str(v) for k, v in data.items() if str(v) != ""} 

878 

879 

880def _build_cfg() -> tuple[Config, Exception | None]: 

881 """Build cfg; on stale-config validation failure, fall back to defaults. 

882 

883 A persisted ``config.toml`` from before a breaking schema change can 

884 contain values the new validators reject. Crashing at module import 

885 means every command (``lilbee --help`` included) emits a Python 

886 traceback. Falling back to env+defaults lets the package load; the 

887 CLI / TUI surfaces the original error before doing real work. 

888 """ 

889 try: 

890 return Config(), None 

891 except Exception as exc: 

892 os.environ["LILBEE_SKIP_TOML_CONFIG"] = "1" 

893 try: 

894 return Config(), exc 

895 finally: 

896 os.environ.pop("LILBEE_SKIP_TOML_CONFIG", None) 

897 

898 

899cfg, config_load_error = _build_cfg() 

900 

901# Canonicalize LILBEE_DATA at the cfg.data_root resolution boundary so 

902# spawn-context worker subprocesses inherit the same data root. 

903# ``setdefault`` preserves a user-set value. 

904os.environ.setdefault("LILBEE_DATA", str(cfg.data_root))