Coverage for src / lilbee / core / config / model.py: 100%
392 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""The :class:`Config` dataclass and the ``cfg`` singleton.
3The settings sources, TOML parser, and the resilient builder that falls
4back to defaults on stale-config validation failures live here too. Every
5``from lilbee.core.config import cfg`` resolves through ``lilbee.core.config.__init__``
6to the same instance defined at module bottom.
7"""
9import logging
10import os
11from pathlib import Path
12from typing import Any, ClassVar
14from pydantic import Field, ValidationInfo, field_validator, model_validator
15from pydantic_settings import BaseSettings, SettingsConfigDict
17from .defaults import (
18 DEFAULT_ALLOWED_NER_LABELS,
19 DEFAULT_CORS_ORIGIN_REGEX,
20 DEFAULT_CRAWL_EXCLUDE_PATTERNS,
21 DEFAULT_GENERAL_SYSTEM_PROMPT,
22 DEFAULT_IGNORE_DIRS,
23 DEFAULT_RAG_SYSTEM_PROMPT,
24)
25from .enums import ChatMode, ClustererBackend, KvCacheType, WikiEntityMode
26from .parsing import parse_bool
27from .validators import ConfigField
29log = logging.getLogger(__name__)
31# Sentinel for unset Path-typed fields. ``Field(default=Path())`` produces an
32# instance equal to this, so the model_validator can distinguish "user passed
33# the default" from "user explicitly set a value".
34_UNSET_PATH = Path()
37class Config(BaseSettings):
38 """Runtime configuration: one singleton instance, mutated by CLI overrides."""
40 model_config = SettingsConfigDict(
41 env_prefix="LILBEE_",
42 validate_assignment=True,
43 arbitrary_types_allowed=True,
44 extra="ignore",
45 )
47 # Paths: resolved from env/defaults in model_validator(mode='before')
48 data_root: Path = Field(default=Path())
49 # Writable so plugin-managed servers can pivot storage to a vault path on
50 # first boot; rebuild the index after migrating.
51 documents_dir: Path = ConfigField(default=Path(), writable=True)
52 data_dir: Path = Field(default=Path())
53 lancedb_dir: Path = Field(default=Path())
54 models_dir: Path = Field(default=Path())
55 # Markdown vault root; when set, search results carry a vault-relative
56 # ``vault_path`` so a host UI can deep-link into the vault.
57 vault_base: Path | None = ConfigField(default=None, writable=True)
59 chat_model: str = Field(default="Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", min_length=1)
60 embedding_model: str = Field(
61 default="nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_K_M.gguf",
62 min_length=1,
63 )
64 # Vision OCR model for scanned PDFs and image-only pages. Empty = disabled;
65 # there is no cross-role fallback onto the chat model even if multimodal.
66 vision_model: str = ConfigField(default="", public=True)
67 embedding_dim: int = Field(default=768, ge=1)
68 chunk_size: int = ConfigField(default=512, ge=64, writable=True, reindex=True)
69 chunk_overlap: int = ConfigField(default=100, ge=0, writable=True, reindex=True)
70 max_embed_chars: int = Field(default=2000, ge=1)
71 top_k: int = ConfigField(default=8, ge=1, writable=True)
72 max_distance: float = ConfigField(default=0.65, ge=0.0, writable=True)
73 # Floor for hybrid-search relevance scores (0.0 = no filtering). lilbee
74 # surfaces LanceDB's raw RRF sum, not a normalized score: with K=60 a
75 # chunk ranked first in both the vector and FTS lists tops out near
76 # 1/61 + 1/61 ~= 0.033, so any positive floor above that silently drops
77 # every result. Keep this at 0.0 unless the RRF scores are normalized first.
78 min_relevance_score: float = ConfigField(default=0.0, ge=0.0, writable=True)
79 adaptive_threshold: bool = Field(default=False)
80 rag_system_prompt: str = ConfigField(
81 default=DEFAULT_RAG_SYSTEM_PROMPT, min_length=1, writable=True
82 )
83 general_system_prompt: str = ConfigField(
84 default=DEFAULT_GENERAL_SYSTEM_PROMPT, min_length=1, writable=True
85 )
86 chat_mode: str = ConfigField(default=ChatMode.SEARCH.value, writable=True)
87 ignore_dirs: frozenset[str] = Field(default=DEFAULT_IGNORE_DIRS)
88 # OCR for scanned PDFs via vision-capable chat model.
89 # None = auto-detect (use OCR if chat model is vision-capable).
90 # True = force OCR regardless of detection.
91 # False = disable OCR entirely.
92 enable_ocr: bool | None = ConfigField(default=None, writable=True)
93 # Per-page timeout in seconds for vision OCR (0 = no limit).
94 ocr_timeout: float = ConfigField(default=120.0, ge=0.0, writable=True)
95 # Outer wall-clock budget for the streamed pool drain: load grace plus
96 # per_page * pages. Tune up for slow hardware (M1 Pro vision is
97 # ~5min/page) or down for fast hardware. ocr_timeout still governs the
98 # per-page expectation that drives the total budget.
99 vision_load_budget_s: float = ConfigField(default=300.0, ge=0.0, writable=True)
101 # Tesseract fallback wall-clock timeout per file, seconds. 0 = no cap.
102 tesseract_timeout: float = ConfigField(default=60.0, ge=0.0, writable=True)
103 semantic_chunking: bool = ConfigField(default=False, writable=True)
104 topic_threshold: float = ConfigField(default=0.75, ge=0.0, le=1.0, writable=True)
105 server_host: str = "127.0.0.1"
106 server_port: int = Field(default=0, ge=0, le=65535)
107 cors_origins: list[str] = Field(default_factory=list)
108 cors_origin_regex: str = Field(default=DEFAULT_CORS_ORIGIN_REGEX)
109 # Seconds between SSE heartbeat events when the producer queue is idle.
110 # Must stay well below the plugin's STREAM_IDLE_TIMEOUT_MS (120s) so a
111 # single long-running vision OCR page can't starve the client into aborting.
112 sse_heartbeat_interval: float = ConfigField(default=30.0, ge=0.0, writable=True)
113 json_mode: bool = False
114 temperature: float | None = ConfigField(default=0.1, ge=0.0, writable=True)
115 top_p: float | None = ConfigField(default=0.9, ge=0.0, le=1.0, writable=True)
116 top_k_sampling: int | None = ConfigField(default=40, ge=1, writable=True)
117 # 1.1 is llama.cpp's default. Leaving this at None caused n-gram loops
118 # ("tire tire tire...") on some open-weights models.
119 repeat_penalty: float | None = ConfigField(default=1.1, ge=0.0, writable=True)
120 num_ctx: int | None = ConfigField(default=None, ge=1, writable=True)
121 max_tokens: int | None = ConfigField(default=4096, ge=1, writable=True)
122 seed: int | None = ConfigField(default=None, writable=True)
123 llm_provider: str = ConfigField(default="auto", writable=True)
124 remote_base_url: str = ConfigField(default="http://localhost:11434", writable=True)
125 llm_api_key: str = ConfigField(default="", writable=True, write_only=True)
126 openrouter_api_key: str = ConfigField(default="", writable=True, write_only=True)
127 gemini_api_key: str = ConfigField(default="", writable=True, write_only=True)
128 anthropic_api_key: str = ConfigField(default="", writable=True, write_only=True)
129 openai_api_key: str = ConfigField(default="", writable=True, write_only=True)
130 mistral_api_key: str = ConfigField(default="", writable=True, write_only=True)
131 deepseek_api_key: str = ConfigField(default="", writable=True, write_only=True)
133 # Retrieval quality knobs.
135 # Max chunks per source in top-k; prevents one large file monopolizing results.
136 diversity_max_per_source: int = ConfigField(default=3, ge=1, writable=True)
138 # MMR relevance/diversity tradeoff; 0 = max diversity, 1 = pure relevance
139 # (Carbonell & Goldstein 1998).
140 mmr_lambda: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True)
142 # Extra candidates retrieved for MMR reranking (multiplies top_k).
143 candidate_multiplier: int = ConfigField(default=3, ge=1, writable=True)
145 # LLM-generated alternative queries for expansion. 0 disables.
146 query_expansion_count: int = ConfigField(default=3, ge=0, writable=True)
148 # Skip LLM expansion when tokenized query length ≤ this. The LLM round-trip
149 # dominates latency on small local models; short queries already have strong
150 # BM25/vector signal. Concept-graph expansion still runs. 0 disables the skip.
151 expansion_short_query_tokens: int = ConfigField(default=2, ge=0, writable=True)
153 # Cosine-distance step when adaptive-widening retry kicks in.
154 adaptive_threshold_step: float = ConfigField(default=0.2, gt=0.0, writable=True)
156 # Reject expansion variants below expansion_similarity_threshold.
157 expansion_guardrails: bool = ConfigField(default=True, writable=True)
159 # Min cosine similarity between question and variant embeddings.
160 expansion_similarity_threshold: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True)
162 # Sigmoid-normalized BM25 score above which query expansion is skipped.
163 expansion_skip_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
165 # Min BM25 top-1 vs top-2 gap to skip expansion.
166 expansion_skip_gap: float = Field(default=0.15, ge=0.0, le=1.0)
168 # Chunks included in LLM context after adaptive selection.
169 max_context_sources: int = ConfigField(default=6, ge=1, writable=True)
171 # HyDE (Gao et al. 2022): hypothetical-answer embedding search. +~500ms.
172 hyde: bool = ConfigField(default=False, writable=True)
174 # HyDE result weight relative to real-doc search (0.0-1.0).
175 hyde_weight: float = ConfigField(default=0.7, ge=0.0, le=1.0, writable=True)
177 # HyDE prompt template. Must contain {question} placeholder.
178 hyde_prompt: str = (
179 "Write a 50-100 word passage that directly answers this question as if "
180 "it were an excerpt from a real document. Do not include any preamble, "
181 "just write the passage.\n\nQuestion: {question}"
182 )
184 # Reranker model ref. Empty disables reranking. Native GGUFs use
185 # llama-cpp rank pooling; hosted refs (cohere/voyage/jina/together/hf-tei)
186 # need the backend extra.
187 reranker_model: str = ConfigField(default="", public=True)
189 # Candidate count sent to the reranker.
190 rerank_candidates: int = ConfigField(default=60, ge=1, writable=True, public=True)
192 # Date-range filter; only fires when a temporal keyword is detected.
193 temporal_filtering: bool = ConfigField(default=True, writable=True)
195 # If True, emit <think>…</think> content as separate SSE reasoning events;
196 # if False, strip it silently.
197 show_reasoning: bool = ConfigField(default=False, writable=True)
199 # Maximum reasoning characters before lilbee forces the model to answer.
200 # Per-model overrides apply on top of this default. Approx N/4 tokens.
201 # 0 disables the cap (unlimited reasoning; accept the runaway-loop risk).
202 max_reasoning_chars: int = ConfigField(default=64_000, ge=0, writable=True)
204 # Web crawling.
206 # Optional global ceilings. None = no ceiling.
207 crawl_max_depth: int | None = ConfigField(default=None, ge=0, writable=True)
208 crawl_max_pages: int | None = ConfigField(default=None, ge=1, writable=True)
210 # Per-URL fetch timeout, seconds.
211 crawl_timeout: int = ConfigField(default=30, ge=1, writable=True)
213 # 0 = unlimited, default = CPU count.
214 crawl_max_concurrent: int = Field(default=0, ge=0)
216 # Seconds between periodic syncs during crawl. 0 = sync only at end.
217 crawl_sync_interval: int = ConfigField(default=30, ge=0, writable=True)
219 # Per-request delay + jitter (defaults chosen to be gentler than crawl4ai's).
220 crawl_mean_delay: float = ConfigField(default=0.5, ge=0.0, writable=True)
221 crawl_max_delay_range: float = ConfigField(default=0.5, ge=0.0, writable=True)
223 # In-flight requests per crawl.
224 crawl_concurrent_requests: int = ConfigField(default=3, ge=1, writable=True)
226 # Per-domain rate-limiter that backs off on HTTP 429/503 and retries.
227 crawl_retry_on_rate_limit: bool = ConfigField(default=True, writable=True)
228 crawl_retry_base_delay_min: float = ConfigField(default=1.0, ge=0.0, writable=True)
229 crawl_retry_base_delay_max: float = ConfigField(default=3.0, ge=0.0, writable=True)
230 crawl_retry_max_backoff: float = ConfigField(default=30.0, ge=0.0, writable=True)
231 crawl_retry_max_attempts: int = ConfigField(default=3, ge=0, writable=True)
233 # Regex patterns dropped at link-discovery time. Defaults block CMS
234 # scaffolding (WordPress admin, archives, tracking params, etc.).
235 crawl_exclude_patterns: list[str] = ConfigField(
236 default_factory=lambda: list(DEFAULT_CRAWL_EXCLUDE_PATTERNS),
237 writable=True,
238 )
240 # Fraction of GPU/unified memory reserved for loaded models.
241 gpu_memory_fraction: float = ConfigField(default=0.75, ge=0.1, le=1.0, writable=True)
243 # Seconds a model stays loaded after last use. 0 = unload immediately.
244 model_keep_alive: int = ConfigField(default=300, ge=0, writable=True)
246 # Per-call deadline for one pool round-trip (send + recv). Embed batches
247 # larger than this on slow machines surface as TimeoutError; raise for
248 # heavy ingest jobs.
249 worker_pool_call_timeout_s: float = ConfigField(default=300.0, gt=0.0, writable=True)
251 # Spawn every configured role at startup instead of on first use. Trades
252 # a slower TUI mount (~1-3s per worker, cold-started in parallel) for a
253 # responsive first interaction. Roles whose model is unset are skipped,
254 # so a setup with only chat + embed never spawns rerank or vision.
255 # Set to false for headless / scripted use where the first call doesn't
256 # need to be fast.
257 worker_pool_eager_start: bool = ConfigField(default=True, writable=True)
259 # Idle worker reap. A worker that has been quiet for this many seconds
260 # is shut down to free RAM/VRAM; the next request respawns it.
261 # ``0`` disables reaping (workers stay up until TUI exit).
262 worker_pool_max_idle_s: float = ConfigField(default=300.0, ge=0.0, writable=True)
264 # Upper bound for the dynamic n_ctx picker. The picker chooses the
265 # largest 256-multiple ctx that fits in available memory and the
266 # model's training window; this caps it at a sane ceiling.
267 num_ctx_max: int = ConfigField(default=16384, ge=512, writable=True)
269 # Flash attention. None (default) = on with TypeError fallback for
270 # older llama-cpp-python builds, True = force on, False = off.
271 # Resolves the 'padding V cache to 1024' warning on models with
272 # uneven per-layer V dims (e.g. Gemma3) and saves ~25% KV memory.
273 flash_attention: bool | None = ConfigField(default=None, writable=True)
275 # KV cache element type. q8_0 / q4_0 halve or quarter cache memory
276 # but require flash attention to be enabled.
277 kv_cache_type: KvCacheType = ConfigField(default=KvCacheType.F16, writable=True)
279 # Number of model layers to offload to GPU. None (default) = all
280 # layers, 0 = CPU only, positive int = partial offload. Useful when a
281 # discrete GPU has less VRAM than the model needs.
282 n_gpu_layers: int | None = ConfigField(default=None, writable=True)
284 # GPU device picker for dual-GPU machines (typical laptop case:
285 # discrete NVIDIA + integrated Intel/AMD). The Vulkan backend
286 # enumerates every adapter the system exposes and may pick the
287 # integrated one first, producing stalls or OOMs that look like
288 # llama.cpp bugs. Setting ``gpu_devices`` constrains visibility
289 # before llama_cpp loads, pinning inference to the chosen device(s).
290 #
291 # Accepts a comma-separated list of device indexes ("0", "1",
292 # "0,1") and applies it to every backend simultaneously:
293 # ``GGML_VK_VISIBLE_DEVICES`` for Vulkan, ``CUDA_VISIBLE_DEVICES``
294 # for CUDA, ``HIP_VISIBLE_DEVICES`` / ``ROCR_VISIBLE_DEVICES`` for
295 # ROCm. Setting one variable that the active backend ignores is
296 # harmless, so we set all four rather than detecting the build.
297 #
298 # Must be set before the first llama.cpp call; in practice that
299 # means via ``LILBEE_GPU_DEVICES`` or ``config.toml`` (TUI edits
300 # only take effect after a restart). ``None`` (default) hands off
301 # to the autodetect in ``providers/llama_cpp/gpu_select.py``,
302 # which parses ``vulkaninfo --summary`` and pins the discrete
303 # adapter when one is present. The autodetect is silent on failure
304 # (no vulkaninfo, single device, parse error), leaving the
305 # Vulkan-loader's default ordering in place.
306 gpu_devices: str | None = ConfigField(default=None, writable=True)
308 # Primary GPU index passed to ``Llama(main_gpu=...)``. Only matters
309 # when multiple devices remain visible after ``gpu_devices``; with
310 # a single visible device, llama.cpp ignores this. ``None``
311 # (default) lets llama.cpp pick (index 0).
312 main_gpu: int | None = ConfigField(default=None, writable=True)
314 # True = Markdown widget for chat; False = plain Static (faster).
315 markdown_rendering: bool = True
317 # TUI theme name; persists the last Ctrl+T pick across sessions.
318 theme: str = ConfigField(default="rose-pine", writable=True)
320 # Per-model generation defaults set via apply_model_defaults().
321 _model_defaults: Any = None
323 # Wiki layer. LLM-maintained synthesis pages with citation provenance.
324 # Off by default; flip to True (or set LILBEE_WIKI=1) to enable. When off,
325 # the Wiki view tab and the chat ModelBar's scope picker are both hidden.
326 wiki: bool = ConfigField(default=False, writable=True)
327 wiki_dir: str = "wiki"
328 wiki_prune_raw: bool = ConfigField(default=False, writable=True)
330 # Minimum cosine similarity between a page body and the mean of its
331 # source chunk vectors before a page is published (below → drafts).
332 # Replaces the old LLM-based faithfulness score: mean-of-chunks is a
333 # deterministic, zero-LLM-call signal that routes topic-drifted
334 # pages to drafts without the 0.0 to 1.0 ambiguity of a model-emitted
335 # number. Tuning knob: swap to per-chunk max or top-K-mean if the
336 # default 0.5 produces false drafts.
337 wiki_embedding_faithfulness_threshold: float = ConfigField(
338 default=0.5, ge=0.0, le=1.0, writable=True
339 )
341 # Per-call output token cap for wiki generation. Without this a
342 # reasoning model (Qwen3, DeepSeek-R1) can burn the full context
343 # window emitting <think> tokens before the actual answer, taking
344 # minutes per page. Default leaves headroom for a typical reasoning
345 # budget plus a real response (~1000 output + ~1000 slack).
346 wiki_summary_max_tokens: int = ConfigField(default=2048, ge=256, writable=True)
348 # Wiki generation is a structured-output task: the model must emit the
349 # block separators, the citation footnotes, and verbatim quotes. The
350 # usual chat default (~0.8) is too creative for that. Lowering the
351 # sampling temperature makes the model stick to the template and quote
352 # more faithfully. 0.1 leaves just enough slack to avoid hard loops.
353 wiki_temperature: float = ConfigField(default=0.1, ge=0.0, le=2.0, writable=True)
355 # Fraction of citations that must be stale before a wiki page is flagged.
356 wiki_stale_citation_threshold: float = Field(default=0.5, ge=0.0, le=1.0)
358 # Fraction of content changed that triggers human-review drift guard.
359 wiki_drift_threshold: float = Field(default=0.3, ge=0.0, le=1.0)
361 # LLM prompt templates for wiki page generation. Writable so advanced
362 # users can override them from /settings, config.toml, or
363 # ``LILBEE_WIKI_*_PROMPT`` env vars. Templates must keep the expected
364 # ``{placeholders}``. If you remove one the generator will crash on
365 # first use. The defaults below are the only reason the pipeline
366 # works out of the box.
367 wiki_summary_prompt: str = ConfigField(
368 writable=True,
369 default=(
370 "You are a knowledge compiler. Given the source chunks below from a single "
371 "document, write a concise wiki summary page in markdown.\n\n"
372 "Rules:\n"
373 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
374 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
375 "3. For interpretations or connections not directly stated in the source, "
376 "mark with [*inference*].\n"
377 "4. Use blockquotes (>) for directly cited facts.\n"
378 "5. End with a citation block in this format:\n\n"
379 "---\n"
380 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
381 '[^src1]: {source_name}, excerpt: "exact quoted text"\n'
382 '[^src2]: {source_name}, excerpt: "exact quoted text"\n\n'
383 "Source document: {source_name}\n\n"
384 "Chunks:\n{chunks_text}\n\n"
385 "Write the wiki summary page now. Start with a heading."
386 ),
387 )
388 wiki_synthesis_prompt: str = ConfigField(
389 writable=True,
390 default=(
391 "You are a knowledge compiler. Given source chunks from MULTIPLE documents "
392 "about related concepts, write a synthesis wiki page in markdown that connects "
393 "ideas across sources.\n\n"
394 "Rules:\n"
395 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
396 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
397 "3. For connections, interpretations, or patterns you identify across sources, "
398 "mark with [*inference*].\n"
399 "4. Use blockquotes (>) for directly cited facts.\n"
400 "5. Reference each source by its filename when drawing connections.\n"
401 "6. End with a citation block in this format:\n\n"
402 "---\n"
403 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
404 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n'
405 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n'
406 "Topic: {topic}\n\n"
407 "Sources:\n{source_list}\n\n"
408 "Chunks:\n{chunks_text}\n\n"
409 "Write the synthesis page now. Start with a heading."
410 ),
411 )
413 # Wiki synthesis clusterer backend. CONCEPTS requires the [graph] extra
414 # and falls back to EMBEDDING when unavailable.
415 wiki_clusterer: ClustererBackend = ConfigField(
416 default=ClustererBackend.EMBEDDING, writable=True
417 )
419 # Neighborhood size for the mutual-kNN graph. 0 = auto-scale from corpus size.
420 wiki_clusterer_k: int = ConfigField(default=0, ge=0, writable=True)
422 # LazyGraphRAG-style concept graph. Requires the [graph] extra.
423 concept_graph: bool = ConfigField(default=True, writable=True)
425 # Weight of concept overlap boost relative to vector similarity.
426 concept_boost_weight: float = ConfigField(default=0.3, ge=0.0, le=1.0, writable=True)
428 # Floor on post-boost distance to stop weak boosts from promoting marginal hits.
429 concept_boost_floor: float = ConfigField(default=0.05, ge=0.0, writable=True)
431 # Max noun-phrase concepts extracted per chunk.
432 concept_max_per_chunk: int = ConfigField(default=5, ge=1, writable=True)
434 # spaCy NER labels kept by the wiki entity extractor. Anything not
435 # in this set (QUANTITY, CARDINAL, DATE, TIME, MONEY, PERCENT,
436 # ORDINAL, ...) is dropped before aggregation. Override via
437 # LILBEE_CONCEPT_ALLOWED_ENT_TYPES as a comma-separated list.
438 concept_allowed_ent_types: frozenset[str] = Field(default=DEFAULT_ALLOWED_NER_LABELS)
440 # Strategy used to extract entities for the concept/entity wiki.
441 # NER_ENTITIES (default) pulls typed NER entities with spaCy; concept
442 # pages are proposed by the LLM inside the per-source batched call,
443 # not by the extractor. NER_CONCEPTS_PLUS_LLM_TYPES layers an
444 # LLM-proposed domain schema on top. LLM_TAGGED asks the LLM to tag
445 # every chunk (most expensive). Unimplemented modes fall back to
446 # NER_ENTITIES.
447 wiki_entity_mode: WikiEntityMode = ConfigField(
448 default=WikiEntityMode.NER_ENTITIES, writable=True
449 )
451 # Minimum distinct chunk mentions before an entity or concept earns
452 # its own wiki page. Filters one-off noise.
453 wiki_entity_min_mentions: int = ConfigField(default=3, ge=1, writable=True)
455 # Maximum chunks passed into each concept or entity page generation
456 # call. Caps context size so one page does not blow the context
457 # window on a prolific topic.
458 wiki_concept_max_chunks_per_page: int = ConfigField(default=25, ge=1, writable=True)
460 # Maximum number of related concepts the model is asked to list in
461 # the `## Related` section of each page.
462 wiki_related_max: int = ConfigField(default=8, ge=0, writable=True)
464 # Auto-update cap: if a single sync touches more than this many
465 # concept or entity pages, skip the per-slug regeneration and tell
466 # the user to run `lilbee wiki update` explicitly. Keeps a surprise
467 # bulk import from firing hundreds of LLM calls.
468 wiki_ingest_update_cap: int = ConfigField(default=20, ge=1, writable=True)
470 # Whether the per-source batched call asks the LLM to curate
471 # concept pages alongside the pre-extracted entity list. False →
472 # entity sections only, no concept curation (incremental ingest
473 # path uses this to avoid churning concept slugs per source-touch).
474 wiki_extract_concepts: bool = ConfigField(default=True, writable=True)
476 # Minimum chunk count a source must contribute before it is eligible
477 # for concept curation. Sources below the floor still get a batched
478 # call when they have entities (the prompt writes entity-only
479 # sections); sources below the floor with zero entities are skipped
480 # entirely. Prevents boilerplate / TOC / appendix documents from
481 # burning an LLM call to invent "concepts".
482 wiki_batch_min_chunks: int = ConfigField(default=3, ge=1, writable=True)
484 # Prompt template for the per-source batched call. Placeholders:
485 # {source}, {entity_list}, {chunks_text}, {concept_instruction}.
486 # {concept_instruction} is filled with a concept-curation paragraph
487 # when concepts are requested, or the empty string otherwise.
488 wiki_entity_batch_prompt: str = ConfigField(
489 writable=True,
490 default=(
491 "You are writing wiki sections based on these chunks from {source}.\n\n"
492 "{concept_instruction}"
493 "Write a wiki section for each of these NER ENTITIES: {entity_list}\n\n"
494 "Format each section exactly as:\n"
495 "## Name\n"
496 "{{content with [^src1]-style citations}}\n\n"
497 "Rules:\n"
498 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
499 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
500 "3. For interpretations or connections not directly stated, mark with [*inference*].\n"
501 "4. Use blockquotes (>) for directly cited facts.\n"
502 "5. End the response with a citation block in this format:\n\n"
503 "---\n"
504 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
505 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n'
506 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n'
507 "Source chunks:\n{chunks_text}\n"
508 ),
509 )
511 # Class variable: not a settings field
512 _toml_cache: ClassVar[dict[str, Any]] = {}
514 @field_validator(
515 "temperature",
516 "top_p",
517 "repeat_penalty",
518 "top_k_sampling",
519 "num_ctx",
520 "seed",
521 mode="before",
522 )
523 @classmethod
524 def _empty_string_to_none(cls, v: Any) -> Any:
525 if isinstance(v, str) and v.strip() == "":
526 return None
527 return v
529 @field_validator("chat_mode", mode="before")
530 @classmethod
531 def _normalize_chat_mode(cls, v: Any) -> str:
532 """Coerce chat_mode to a ChatMode value; default ChatMode.SEARCH."""
533 if v is None or v == "":
534 return ChatMode.SEARCH.value
535 candidate = str(v).strip().lower()
536 try:
537 return ChatMode(candidate).value
538 except ValueError as exc:
539 valid = ", ".join(repr(m.value) for m in ChatMode)
540 raise ValueError(f"chat_mode must be one of {{{valid}}}, got {v!r}") from exc
542 @field_validator("enable_ocr", mode="before")
543 @classmethod
544 def _parse_enable_ocr(cls, v: Any) -> bool | None:
545 """Parse enable_ocr from env var string or direct value.
547 Accepts: true/false/1/0/yes/no (case-insensitive), empty string
548 or None for auto-detect.
549 """
550 if v is None:
551 return None
552 if isinstance(v, bool):
553 return v
554 if isinstance(v, str):
555 if v.strip().lower() in ("", "auto", "none"):
556 return None
557 try:
558 return parse_bool(v)
559 except ValueError:
560 pass # fall through to bool() coercion below for unrecognised strings
561 return bool(v)
563 @field_validator("flash_attention", mode="before")
564 @classmethod
565 def _parse_flash_attention(cls, v: Any) -> bool | None:
566 """Auto/on/off tri-state: empty/auto/none -> None, else parse bool."""
567 if v is None:
568 return None
569 if isinstance(v, bool):
570 return v
571 if isinstance(v, str):
572 if v.strip().lower() in ("", "auto", "none"):
573 return None
574 try:
575 return parse_bool(v)
576 except ValueError:
577 return None
578 return bool(v)
580 @field_validator("n_gpu_layers", mode="before")
581 @classmethod
582 def _parse_n_gpu_layers(cls, v: Any) -> int | None:
583 """Auto -> None, ``cpu`` alias -> 0, integers parsed verbatim."""
584 if v is None:
585 return None
586 if isinstance(v, str):
587 label = v.strip().lower()
588 if label in ("", "auto", "none"):
589 return None
590 if label == "cpu":
591 return 0
592 try:
593 return int(label)
594 except ValueError:
595 log.warning("Invalid LILBEE_N_GPU_LAYERS=%r, using auto", v)
596 return None
597 return int(v)
599 @field_validator("main_gpu", mode="before")
600 @classmethod
601 def _parse_main_gpu(cls, v: Any) -> int | None:
602 """Empty/auto strings -> None, integers parsed verbatim."""
603 if v is None:
604 return None
605 if isinstance(v, str):
606 label = v.strip().lower()
607 if label in ("", "auto", "none"):
608 return None
609 try:
610 return int(label)
611 except ValueError:
612 log.warning("Invalid LILBEE_MAIN_GPU=%r, using auto", v)
613 return None
614 return int(v)
616 @field_validator("gpu_devices", mode="before")
617 @classmethod
618 def _parse_gpu_devices(cls, v: Any) -> str | None:
619 """Normalize device list: strip whitespace, drop empties, keep order."""
620 if v is None:
621 return None
622 if isinstance(v, str):
623 label = v.strip().lower()
624 if label in ("", "auto", "all", "none"):
625 return None
626 parts = [p.strip() for p in v.split(",") if p.strip()]
627 if not parts:
628 return None
629 for part in parts:
630 if not part.lstrip("-").isdigit():
631 log.warning("Invalid LILBEE_GPU_DEVICES=%r, ignoring", v)
632 return None
633 return ",".join(parts)
634 return str(v)
636 @field_validator("semantic_chunking", mode="before")
637 @classmethod
638 def _parse_semantic_chunking(cls, v: Any) -> bool:
639 """Parse from env string; invalid values warn and fall back to False."""
640 if isinstance(v, bool):
641 return v
642 if isinstance(v, str):
643 try:
644 return parse_bool(v)
645 except ValueError:
646 log.warning("Invalid LILBEE_SEMANTIC_CHUNKING=%r, using default False", v)
647 return False
648 return bool(v)
650 @field_validator(
651 "chat_model", "embedding_model", "vision_model", "reranker_model", mode="after"
652 )
653 @classmethod
654 def _normalize_model_tag(cls, v: str, info: ValidationInfo) -> str:
655 """Validate and canonicalize a model ref; blank clears optional roles."""
656 if not v or not v.strip():
657 if info.field_name in {"chat_model", "embedding_model"}:
658 raise ValueError(f"{info.field_name} must not be blank")
659 return ""
660 from lilbee.providers.model_ref import parse_model_ref
662 return parse_model_ref(v).for_openai_prefix()
664 @field_validator("cors_origins", mode="before")
665 @classmethod
666 def _split_cors_origins(cls, v: Any) -> Any:
667 if isinstance(v, str):
668 return [o.strip() for o in v.split(",") if o.strip()]
669 return v
671 @field_validator("crawl_exclude_patterns", mode="before")
672 @classmethod
673 def _split_crawl_exclude_patterns(cls, v: Any) -> Any:
674 """Accept newline-separated strings from env vars / plain-text config.
676 Regex commonly uses commas (e.g. `{2,4}`) and pipes (alternation), so
677 newline is the only separator safe to use for this field. TOML lists
678 and JSON arrays pass through unchanged.
679 """
680 if isinstance(v, str):
681 return [p.strip() for p in v.splitlines() if p.strip()]
682 return v
684 @field_validator("crawl_exclude_patterns", mode="after")
685 @classmethod
686 def _validate_crawl_exclude_patterns(cls, v: list[str]) -> list[str]:
687 """Reject any entry that isn't a valid Python regex.
689 These patterns are compiled at crawl time. An invalid pattern there
690 surfaces as an opaque mid-crawl error; catching it at PATCH time gives
691 the user a 400 with a pointer to the bad entry.
692 """
693 import re
695 bad: list[str] = []
696 for i, pattern in enumerate(v):
697 try:
698 re.compile(pattern)
699 except re.error as exc:
700 bad.append(f"[{i}] {pattern!r}: {exc}")
701 if bad:
702 raise ValueError("invalid regex in crawl_exclude_patterns:\n " + "\n ".join(bad))
703 return v
705 @field_validator("ignore_dirs", mode="before")
706 @classmethod
707 def _merge_ignore_dirs(cls, v: Any) -> frozenset[str]:
708 if isinstance(v, str):
709 extra = frozenset(name.strip() for name in v.split(",") if name.strip())
710 return DEFAULT_IGNORE_DIRS | extra
711 if isinstance(v, (set, frozenset, list)):
712 return DEFAULT_IGNORE_DIRS | frozenset(v)
713 return DEFAULT_IGNORE_DIRS
715 @field_validator("concept_allowed_ent_types", mode="before")
716 @classmethod
717 def _parse_ent_types(cls, v: Any) -> frozenset[str]:
718 """Replace-semantics override: a narrowed set is used as-is,
719 not unioned with defaults. A user asking for ``PERSON,ORG``
720 wants exactly those kinds. Accepts comma-separated strings
721 from env and list / set / frozenset from code. Empty input
722 falls back to :data:`DEFAULT_ALLOWED_NER_LABELS` so an empty
723 env var does not silently disable the gate.
724 """
725 if isinstance(v, str):
726 parts = frozenset(name.strip().upper() for name in v.split(",") if name.strip())
727 return parts or DEFAULT_ALLOWED_NER_LABELS
728 if isinstance(v, (set, frozenset, list)):
729 parts = frozenset(str(x).upper() for x in v)
730 return parts or DEFAULT_ALLOWED_NER_LABELS
731 return DEFAULT_ALLOWED_NER_LABELS
733 @model_validator(mode="before")
734 @classmethod
735 def _resolve_defaults(cls, data: Any) -> Any:
736 from lilbee.core.system import canonical_models_dir, default_data_dir, find_local_root
738 if not isinstance(data, dict): # pragma: no cover
739 return data
741 if data.get("data_root") in (None, _UNSET_PATH):
742 data_env = os.environ.get("LILBEE_DATA", "").strip()
743 if data_env:
744 data["data_root"] = Path(data_env)
745 else:
746 local = find_local_root()
747 data["data_root"] = local if local is not None else default_data_dir()
748 root = data["data_root"]
749 if data.get("documents_dir") in (None, _UNSET_PATH):
750 data["documents_dir"] = root / "documents"
751 if data.get("data_dir") in (None, _UNSET_PATH):
752 data["data_dir"] = root / "data"
753 if data.get("lancedb_dir") in (None, _UNSET_PATH):
754 data["lancedb_dir"] = root / "data" / "lancedb"
755 if data.get("models_dir") in (None, _UNSET_PATH):
756 data["models_dir"] = canonical_models_dir()
758 return data
760 @classmethod
761 def settings_customise_sources(
762 cls,
763 settings_cls: type[BaseSettings],
764 init_settings: Any,
765 env_settings: Any,
766 dotenv_settings: Any,
767 file_secret_settings: Any,
768 ) -> tuple[Any, ...]:
769 from lilbee.core.system import default_data_dir, find_local_root
771 data_env = os.environ.get("LILBEE_DATA", "")
772 if data_env:
773 toml_dir = Path(data_env)
774 else:
775 local = find_local_root()
776 toml_dir = local if local else default_data_dir()
777 toml_path = toml_dir / "config.toml"
779 plain_env = _PlainEnvSource(settings_cls, env_prefix="LILBEE_", env_ignore_empty=True)
780 sources: list[Any] = [init_settings, plain_env]
781 if toml_path.exists() and os.environ.get("LILBEE_SKIP_TOML_CONFIG") != "1":
782 sources.append(_TomlSource(settings_cls, toml_path))
783 return tuple(sources)
785 @property
786 def model_defaults(self) -> Any:
787 """Per-model generation defaults (read-only). Set via apply_model_defaults()."""
788 return self._model_defaults
790 def apply_model_defaults(self, defaults: Any) -> None:
791 """Store per-model generation defaults for 3-layer merge."""
792 object.__setattr__(self, "_model_defaults", defaults)
794 def clear_model_defaults(self) -> None:
795 """Reset per-model defaults to None."""
796 object.__setattr__(self, "_model_defaults", None)
798 def generation_options(self, **overrides: Any) -> dict[str, Any]:
799 """Merge model defaults, user config, and per-call overrides, dropping None."""
800 result = _model_defaults_dict(self._model_defaults)
801 user_fields: dict[str, Any] = {
802 "temperature": self.temperature,
803 "top_p": self.top_p,
804 "top_k": self.top_k_sampling,
805 "repeat_penalty": self.repeat_penalty,
806 "num_ctx": self.num_ctx,
807 "seed": self.seed,
808 "max_tokens": self.max_tokens,
809 }
810 for k, v in user_fields.items():
811 if v is not None:
812 result[k] = v
813 for k, v in overrides.items():
814 if v is not None:
815 result[k] = v
816 return result
819def _model_defaults_dict(defaults: Any) -> dict[str, Any]:
820 """Non-None fields of a ModelDefaults instance as a dict."""
821 if defaults is None:
822 return {}
823 from dataclasses import fields as dc_fields
825 return {
826 f.name: getattr(defaults, f.name)
827 for f in dc_fields(defaults)
828 if getattr(defaults, f.name) is not None
829 }
832class _PlainEnvSource:
833 """Reads LILBEE_* env vars as plain strings so field validators handle parsing."""
835 def __init__(
836 self,
837 settings_cls: type[BaseSettings],
838 env_prefix: str,
839 env_ignore_empty: bool = True,
840 ) -> None:
841 self._prefix = env_prefix
842 self._ignore_empty = env_ignore_empty
843 self._fields = set(settings_cls.model_fields)
845 def __call__(self) -> dict[str, Any]:
846 result: dict[str, Any] = {}
847 for field_name in self._fields:
848 env_key = f"{self._prefix}{field_name.upper()}"
849 raw = os.environ.get(env_key)
850 if raw is None:
851 continue
852 if self._ignore_empty and raw == "":
853 continue
854 result[field_name] = raw
855 return result
858class _TomlSource:
859 """Custom pydantic-settings source that reads config.toml."""
861 def __init__(self, settings_cls: type[BaseSettings], path: Path) -> None:
862 self._path = path
864 def __call__(self) -> dict[str, Any]:
865 import tomllib
867 try:
868 with self._path.open("rb") as f:
869 data = tomllib.load(f)
870 except (ValueError, OSError):
871 log.warning("Failed to read %s, ignoring", self._path)
872 return {}
873 # Empty strings represent "no persisted value" for nullable scalar
874 # fields (legacy from set_setting writing "" for None). Pydantic
875 # can't coerce "" to int|None, so dropping them here lets the field
876 # default apply rather than crashing the whole Config load.
877 return {k: str(v) for k, v in data.items() if str(v) != ""}
880def _build_cfg() -> tuple[Config, Exception | None]:
881 """Build cfg; on stale-config validation failure, fall back to defaults.
883 A persisted ``config.toml`` from before a breaking schema change can
884 contain values the new validators reject. Crashing at module import
885 means every command (``lilbee --help`` included) emits a Python
886 traceback. Falling back to env+defaults lets the package load; the
887 CLI / TUI surfaces the original error before doing real work.
888 """
889 try:
890 return Config(), None
891 except Exception as exc:
892 os.environ["LILBEE_SKIP_TOML_CONFIG"] = "1"
893 try:
894 return Config(), exc
895 finally:
896 os.environ.pop("LILBEE_SKIP_TOML_CONFIG", None)
899cfg, config_load_error = _build_cfg()
901# Canonicalize LILBEE_DATA at the cfg.data_root resolution boundary so
902# spawn-context worker subprocesses inherit the same data root.
903# ``setdefault`` preserves a user-set value.
904os.environ.setdefault("LILBEE_DATA", str(cfg.data_root))