Coverage for src / lilbee / core / config / model.py: 100%
420 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""The :class:`Config` dataclass and the ``cfg`` singleton.
3The settings sources, TOML parser, and the resilient builder that falls
4back to defaults on stale-config validation failures live here too. Every
5``from lilbee.core.config import cfg`` resolves through ``lilbee.core.config.__init__``
6to the same instance defined at module bottom.
7"""
9import logging
10import os
11from pathlib import Path
12from typing import Any, ClassVar
14from pydantic import Field, ValidationInfo, field_validator, model_validator
15from pydantic_settings import BaseSettings, SettingsConfigDict
17from lilbee.core.system import scaled_chat_ctx_target_default
19from .defaults import (
20 DEFAULT_ALLOWED_NER_LABELS,
21 DEFAULT_CORS_ORIGIN_REGEX,
22 DEFAULT_CRAWL_EXCLUDE_PATTERNS,
23 DEFAULT_GENERAL_SYSTEM_PROMPT,
24 DEFAULT_IGNORE_DIRS,
25 DEFAULT_RAG_SYSTEM_PROMPT,
26)
27from .enums import ChatMode, ClustererBackend, CrawlRenderMode, KvCacheType, WikiEntityMode
28from .parsing import parse_bool
29from .validators import ConfigField
31log = logging.getLogger(__name__)
33# Sentinel for unset Path-typed fields. ``Field(default=Path())`` produces an
34# instance equal to this, so the model_validator can distinguish "user passed
35# the default" from "user explicitly set a value".
36_UNSET_PATH = Path()
39class Config(BaseSettings):
40 """Runtime configuration: one singleton instance, mutated by CLI overrides."""
42 model_config = SettingsConfigDict(
43 env_prefix="LILBEE_",
44 validate_assignment=True,
45 arbitrary_types_allowed=True,
46 extra="ignore",
47 )
49 # Paths: resolved from env/defaults in model_validator(mode='before')
50 data_root: Path = Field(default=Path())
51 # Writable so plugin-managed servers can pivot storage to a vault path on
52 # first boot; rebuild the index after migrating.
53 documents_dir: Path = ConfigField(default=Path(), writable=True)
54 data_dir: Path = Field(default=Path())
55 lancedb_dir: Path = Field(default=Path())
56 models_dir: Path = Field(default=Path())
57 # Markdown vault root; when set, search results carry a vault-relative
58 # ``vault_path`` so a host UI can deep-link into the vault.
59 vault_base: Path | None = ConfigField(default=None, writable=True)
61 # Human-readable label for the active lilbee. Empty falls back to
62 # "global" for the platform default dir, otherwise the project path
63 # (~-substituted and left-truncated to a hard cap).
64 lilbee_name: str = ConfigField(default="", writable=True)
65 # If True, the status bar pill shows the full absolute path: expands
66 # "global" to the on-disk platform-default path and skips the
67 # ~-substitution / left-truncation for project paths. Toggled by F4.
68 show_lilbee_path: bool = ConfigField(default=False, writable=True)
70 chat_model: str = Field(default="Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", min_length=1)
71 embedding_model: str = Field(
72 default="nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q4_K_M.gguf",
73 min_length=1,
74 )
75 # Vision OCR model for scanned PDFs and image-only pages. Empty = disabled;
76 # there is no cross-role fallback onto the chat model even if multimodal.
77 vision_model: str = ConfigField(default="", public=True)
78 embedding_dim: int = Field(default=768, ge=1)
79 chunk_size: int = ConfigField(default=512, ge=64, writable=True, reindex=True)
80 chunk_overlap: int = ConfigField(default=100, ge=0, writable=True, reindex=True)
81 max_embed_chars: int = Field(default=2000, ge=1)
82 top_k: int = ConfigField(default=12, ge=1, writable=True)
83 max_distance: float = ConfigField(default=0.75, ge=0.0, writable=True)
84 # Floor for hybrid-search relevance scores (0.0 = no filtering). lilbee
85 # surfaces LanceDB's raw RRF sum, not a normalized score: with K=60 a
86 # chunk ranked first in both the vector and FTS lists tops out near
87 # 1/61 + 1/61 ~= 0.033, so any positive floor above that silently drops
88 # every result. Keep this at 0.0 unless the RRF scores are normalized first.
89 min_relevance_score: float = ConfigField(default=0.0, ge=0.0, writable=True)
90 adaptive_threshold: bool = Field(default=False)
91 rag_system_prompt: str = ConfigField(
92 default=DEFAULT_RAG_SYSTEM_PROMPT, min_length=1, writable=True
93 )
94 general_system_prompt: str = ConfigField(
95 default=DEFAULT_GENERAL_SYSTEM_PROMPT, min_length=1, writable=True
96 )
97 chat_mode: str = ConfigField(default=ChatMode.SEARCH.value, writable=True)
98 ignore_dirs: frozenset[str] = Field(default=DEFAULT_IGNORE_DIRS)
99 # OCR for scanned PDFs via vision-capable chat model.
100 # None = auto-detect (use OCR if chat model is vision-capable).
101 # True = force OCR regardless of detection.
102 # False = disable OCR entirely.
103 enable_ocr: bool | None = ConfigField(default=None, writable=True)
104 # Per-page timeout in seconds for vision OCR (0 = no limit).
105 ocr_timeout: float = ConfigField(default=120.0, ge=0.0, writable=True)
106 # Outer wall-clock budget for the streamed pool drain: load grace plus
107 # per_page * pages. Tune up for slow hardware (M1 Pro vision is
108 # ~5min/page) or down for fast hardware. ocr_timeout still governs the
109 # per-page expectation that drives the total budget.
110 vision_load_budget_s: float = ConfigField(default=300.0, ge=0.0, writable=True)
112 # Tesseract fallback wall-clock timeout per file, seconds. 0 = no cap.
113 tesseract_timeout: float = ConfigField(default=60.0, ge=0.0, writable=True)
114 semantic_chunking: bool = ConfigField(default=False, writable=True)
115 topic_threshold: float = ConfigField(default=0.75, ge=0.0, le=1.0, writable=True)
116 server_host: str = "127.0.0.1"
117 server_port: int = Field(default=0, ge=0, le=65535)
118 cors_origins: list[str] = Field(default_factory=list)
119 cors_origin_regex: str = Field(default=DEFAULT_CORS_ORIGIN_REGEX)
120 # Seconds between SSE heartbeat events when the producer queue is idle.
121 # Must stay well below the plugin's STREAM_IDLE_TIMEOUT_MS (120s) so a
122 # single long-running vision OCR page can't starve the client into aborting.
123 sse_heartbeat_interval: float = ConfigField(default=30.0, ge=0.0, writable=True)
124 json_mode: bool = False
125 temperature: float | None = ConfigField(default=0.1, ge=0.0, writable=True)
126 top_p: float | None = ConfigField(default=0.9, ge=0.0, le=1.0, writable=True)
127 top_k_sampling: int | None = ConfigField(default=40, ge=1, writable=True)
128 # 1.1 is llama.cpp's default. Leaving this at None caused n-gram loops
129 # ("tire tire tire...") on some open-weights models.
130 repeat_penalty: float | None = ConfigField(default=1.1, ge=0.0, writable=True)
131 num_ctx: int | None = ConfigField(default=None, ge=1, writable=True)
132 max_tokens: int | None = ConfigField(default=4096, ge=1, writable=True)
133 seed: int | None = ConfigField(default=None, writable=True)
134 llm_provider: str = ConfigField(default="auto", writable=True)
135 # Per-server local model-manager URLs. Blank means "use the server's spec
136 # default" (resolved in providers.local_servers.config_urls); the default
137 # URL literal lives only in the spec, which core must not import.
138 ollama_base_url: str = ConfigField(default="", writable=True)
139 lm_studio_base_url: str = ConfigField(default="", writable=True)
140 llm_api_key: str = ConfigField(default="", writable=True, write_only=True)
141 openrouter_api_key: str = ConfigField(default="", writable=True, write_only=True)
142 gemini_api_key: str = ConfigField(default="", writable=True, write_only=True)
143 anthropic_api_key: str = ConfigField(default="", writable=True, write_only=True)
144 openai_api_key: str = ConfigField(default="", writable=True, write_only=True)
145 mistral_api_key: str = ConfigField(default="", writable=True, write_only=True)
146 deepseek_api_key: str = ConfigField(default="", writable=True, write_only=True)
147 hf_token: str = ConfigField(default="", writable=True, write_only=True)
149 # Retrieval quality knobs.
151 # Max chunks per source in top-k; prevents one large file monopolizing results.
152 diversity_max_per_source: int = ConfigField(default=5, ge=1, writable=True)
154 # MMR relevance/diversity tradeoff; 0 = max diversity, 1 = pure relevance
155 # (Carbonell & Goldstein 1998).
156 mmr_lambda: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True)
158 # Extra candidates retrieved for MMR reranking (multiplies top_k).
159 candidate_multiplier: int = ConfigField(default=3, ge=1, writable=True)
161 # Chunk count at/above which sync builds an approximate (ANN) vector index
162 # so search stays fast at millions of vectors. Below this, search uses exact
163 # flat scan (faster and exact for small vaults). 0 disables the ANN index.
164 ann_index_threshold: int = ConfigField(default=50_000, ge=0, writable=True)
166 # LLM-generated alternative queries for expansion. 0 disables.
167 query_expansion_count: int = ConfigField(default=3, ge=0, writable=True)
169 # Skip LLM expansion when tokenized query length ≤ this. The LLM round-trip
170 # dominates latency on small local models; short queries already have strong
171 # BM25/vector signal. Concept-graph expansion still runs. 0 disables the skip.
172 expansion_short_query_tokens: int = ConfigField(default=2, ge=0, writable=True)
174 # Cosine-distance step when adaptive-widening retry kicks in.
175 adaptive_threshold_step: float = ConfigField(default=0.2, gt=0.0, writable=True)
177 # Reject expansion variants below expansion_similarity_threshold.
178 expansion_guardrails: bool = ConfigField(default=True, writable=True)
180 # Min cosine similarity between question and variant embeddings.
181 expansion_similarity_threshold: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True)
183 # Sigmoid-normalized BM25 score above which query expansion is skipped.
184 expansion_skip_threshold: float = Field(default=0.8, ge=0.0, le=1.0)
186 # Min BM25 top-1 vs top-2 gap to skip expansion.
187 expansion_skip_gap: float = Field(default=0.15, ge=0.0, le=1.0)
189 # Chunks included in LLM context after adaptive selection.
190 max_context_sources: int = ConfigField(default=8, ge=1, writable=True)
192 # HyDE (Gao et al. 2022): hypothetical-answer embedding search. +~500ms.
193 hyde: bool = ConfigField(default=False, writable=True)
195 # HyDE result weight relative to real-doc search (0.0-1.0).
196 hyde_weight: float = ConfigField(default=0.7, ge=0.0, le=1.0, writable=True)
198 # HyDE prompt template. Must contain {question} placeholder.
199 hyde_prompt: str = (
200 "Write a 50-100 word passage that directly answers this question as if "
201 "it were an excerpt from a real document. Do not include any preamble, "
202 "just write the passage.\n\nQuestion: {question}"
203 )
205 # Reranker model ref. Empty disables reranking. Native GGUFs use
206 # llama-cpp rank pooling; hosted refs (cohere/voyage/jina/together/hf-tei)
207 # need the backend extra.
208 reranker_model: str = ConfigField(default="", public=True)
210 # Long-term chat memory. Off by default (opt-in): when disabled the whole
211 # subsystem is dormant and the write surfaces respond with an enable hint.
212 memory_enabled: bool = ConfigField(default=False, writable=True)
214 # Facts recalled by similarity per turn (preferences are always injected).
215 memory_top_k: int = ConfigField(default=5, ge=0, writable=True)
217 # Cosine-distance ceiling for fact recall; stricter than the document default
218 # because a tiny memory corpus floods at the wider document threshold.
219 memory_max_distance: float = ConfigField(default=0.6, ge=0.0, le=1.0, writable=True)
221 # Char/4 token budget for the injected memory block.
222 memory_token_budget: int = ConfigField(default=512, ge=0, writable=True)
224 # Per-owner soft cap; oldest memories evicted past it (runaway-write guard).
225 memory_max_per_owner: int = ConfigField(default=200, ge=1, writable=True)
227 # Cosine distance below which a new memory is treated as a duplicate of an
228 # existing same-owner memory and updates it in place instead of inserting.
229 memory_dedup_distance: float = ConfigField(default=0.05, ge=0.0, le=1.0, writable=True)
231 # LLM pass that extracts memories from the chat loop. Off by default; extracted
232 # memories are saved directly and recalled like any other memory.
233 memory_auto_extract: bool = ConfigField(default=False, writable=True)
235 # Candidate count sent to the reranker.
236 rerank_candidates: int = ConfigField(default=60, ge=1, writable=True, public=True)
238 # Date-range filter; only fires when a temporal keyword is detected.
239 temporal_filtering: bool = ConfigField(default=True, writable=True)
241 # If True, emit <think>…</think> content as separate SSE reasoning events;
242 # if False, strip it silently.
243 show_reasoning: bool = ConfigField(default=False, writable=True)
245 # Maximum reasoning characters before lilbee forces the model to answer.
246 # Per-model overrides apply on top of this default. Approx N/4 tokens.
247 # 0 disables the cap (unlimited reasoning; accept the runaway-loop risk).
248 max_reasoning_chars: int = ConfigField(default=64_000, ge=0, writable=True)
250 # Web crawling.
252 # How crawls fetch pages. ``http`` (default) uses a plain HTTP client with
253 # no browser, the lightweight path for static / server-rendered sites.
254 # ``browser`` launches a tuned Chromium with JavaScript enabled for sites
255 # that render content client-side, at a much higher memory cost.
256 crawl_render_mode: CrawlRenderMode = ConfigField(default=CrawlRenderMode.HTTP, writable=True)
258 # Browser-mode memory levers (only used when crawl_render_mode is browser).
259 # Recycle the Chromium process every N fetched pages to cap RSS growth on a
260 # long recursive crawl; 0 disables recycling. Raise on a roomy machine for
261 # fewer restarts, lower it if memory is tight.
262 crawl_browser_recycle_pages: int = ConfigField(default=50, ge=0, writable=True)
264 # Extra Chromium launch flags for browser-mode crawls. Defaults trim shared
265 # memory and GPU use; override to pass site- or environment-specific flags.
266 crawl_browser_extra_args: list[str] = ConfigField(
267 default_factory=lambda: ["--disable-dev-shm-usage", "--disable-gpu"],
268 writable=True,
269 )
271 # Optional global ceilings. None = no ceiling.
272 crawl_max_depth: int | None = ConfigField(default=None, ge=0, writable=True)
273 crawl_max_pages: int | None = ConfigField(default=None, ge=1, writable=True)
275 # Default page bound for an unbounded crawl (no explicit max_pages /
276 # crawl_max_pages), so a hostile site can't exhaust the disk by default.
277 # An explicit limit overrides it; raise this to crawl larger sites unbounded.
278 crawl_safety_max_pages: int = ConfigField(default=5_000, ge=1, writable=True)
280 # Per-URL fetch timeout, seconds.
281 crawl_timeout: int = ConfigField(default=30, ge=1, writable=True)
283 # 0 = unlimited, default = CPU count.
284 crawl_max_concurrent: int = Field(default=0, ge=0)
286 # Seconds between periodic syncs during crawl. 0 = sync only at end.
287 crawl_sync_interval: int = ConfigField(default=30, ge=0, writable=True)
289 # Per-request delay + jitter (defaults chosen to be gentler than crawl4ai's).
290 crawl_mean_delay: float = ConfigField(default=0.5, ge=0.0, writable=True)
291 crawl_max_delay_range: float = ConfigField(default=0.5, ge=0.0, writable=True)
293 # In-flight requests per crawl.
294 crawl_concurrent_requests: int = ConfigField(default=3, ge=1, writable=True)
296 # Per-domain rate-limiter that backs off on HTTP 429/503 and retries.
297 crawl_retry_on_rate_limit: bool = ConfigField(default=True, writable=True)
298 crawl_retry_base_delay_min: float = ConfigField(default=1.0, ge=0.0, writable=True)
299 crawl_retry_base_delay_max: float = ConfigField(default=3.0, ge=0.0, writable=True)
300 crawl_retry_max_backoff: float = ConfigField(default=30.0, ge=0.0, writable=True)
301 crawl_retry_max_attempts: int = ConfigField(default=3, ge=0, writable=True)
303 # Regex patterns dropped at link-discovery time. Defaults block CMS
304 # scaffolding (WordPress admin, archives, tracking params, etc.).
305 crawl_exclude_patterns: list[str] = ConfigField(
306 default_factory=lambda: list(DEFAULT_CRAWL_EXCLUDE_PATTERNS),
307 writable=True,
308 )
310 # Fraction of GPU/unified memory reserved for loaded models.
311 gpu_memory_fraction: float = ConfigField(default=0.75, ge=0.1, le=1.0, writable=True)
313 # Seconds a model stays loaded after last use. 0 = unload immediately.
314 model_keep_alive: int = ConfigField(default=300, ge=0, writable=True)
316 # Per-call deadline for one pool round-trip (send + recv). Embed batches
317 # larger than this on slow machines surface as TimeoutError; raise for
318 # heavy ingest jobs.
319 worker_pool_call_timeout_s: float = ConfigField(default=300.0, gt=0.0, writable=True)
321 # Spawn every configured role at startup instead of on first use. Trades
322 # a slower TUI mount (~1-3s per worker, cold-started in parallel) for a
323 # responsive first interaction. Roles whose model is unset are skipped,
324 # so a setup with only chat + embed never spawns rerank or vision.
325 # Set to false for headless / scripted use where the first call doesn't
326 # need to be fast.
327 worker_pool_eager_start: bool = ConfigField(default=True, writable=True)
329 # Idle worker reap. A worker that has been quiet for this many seconds
330 # is shut down to free RAM/VRAM; the next request respawns it.
331 # ``0`` disables reaping (workers stay up until TUI exit).
332 worker_pool_max_idle_s: float = ConfigField(default=300.0, ge=0.0, writable=True)
334 # Working n_ctx the dynamic picker aims for. Default scales with
335 # total host RAM (see core.system.chat_ctx_target_for_total_bytes):
336 # <16 GiB -> 8192, 16-32 -> 12288, 32-64 -> 16384, >=64 -> 24576.
337 # 8192 is the floor; the picker still clamps to training_ctx and
338 # host headroom.
339 chat_n_ctx_target: int = ConfigField(
340 default_factory=scaled_chat_ctx_target_default,
341 ge=512,
342 writable=True,
343 )
345 # Explicit ceiling for the dynamic n_ctx picker. ``None`` (default)
346 # lets the model's training_ctx from GGUF metadata be the ceiling,
347 # so a 128K-context model can reach for it on a host with the RAM
348 # to back it. Set explicitly to cap below the model's training_ctx.
349 num_ctx_max: int | None = ConfigField(default=None, ge=512, writable=True)
351 # Flash attention. None (default) = on with TypeError fallback for
352 # older llama-cpp-python builds, True = force on, False = off.
353 # Resolves the 'padding V cache to 1024' warning on models with
354 # uneven per-layer V dims (e.g. Gemma3) and saves ~25% KV memory.
355 flash_attention: bool | None = ConfigField(default=None, writable=True)
357 # KV cache element type. q8_0 (default) halves cache memory vs f16
358 # with no measurable quality loss for chat; q4_0 quarters it with a
359 # small quality cost. Both require flash attention to be enabled.
360 kv_cache_type: KvCacheType = ConfigField(default=KvCacheType.Q8_0, writable=True)
362 # Number of model layers to offload to GPU. None (default) = all
363 # layers, 0 = CPU only, positive int = partial offload. Useful when a
364 # discrete GPU has less VRAM than the model needs.
365 n_gpu_layers: int | None = ConfigField(default=None, writable=True)
367 # GPU device picker for dual-GPU machines (typical laptop case:
368 # discrete NVIDIA + integrated Intel/AMD). The Vulkan backend
369 # enumerates every adapter the system exposes and may pick the
370 # integrated one first, producing stalls or OOMs that look like
371 # llama.cpp bugs. Setting ``gpu_devices`` constrains visibility
372 # before llama_cpp loads, pinning inference to the chosen device(s).
373 #
374 # Accepts a comma-separated list of device indexes ("0", "1",
375 # "0,1") and applies it to every backend simultaneously:
376 # ``GGML_VK_VISIBLE_DEVICES`` for Vulkan, ``CUDA_VISIBLE_DEVICES``
377 # for CUDA, ``HIP_VISIBLE_DEVICES`` / ``ROCR_VISIBLE_DEVICES`` for
378 # ROCm. Setting one variable that the active backend ignores is
379 # harmless, so we set all four rather than detecting the build.
380 #
381 # Must be set before the first llama.cpp call; in practice that
382 # means via ``LILBEE_GPU_DEVICES`` or ``config.toml`` (TUI edits
383 # only take effect after a restart). ``None`` (default) hands off
384 # to the autodetect in ``providers/llama_cpp/gpu_select.py``,
385 # which parses ``vulkaninfo --summary`` and pins the discrete
386 # adapter when one is present. The autodetect is silent on failure
387 # (no vulkaninfo, single device, parse error), leaving the
388 # Vulkan-loader's default ordering in place.
389 gpu_devices: str | None = ConfigField(default=None, writable=True)
391 # Primary GPU index passed to ``Llama(main_gpu=...)``. Only matters
392 # when multiple devices remain visible after ``gpu_devices``; with
393 # a single visible device, llama.cpp ignores this. ``None``
394 # (default) lets llama.cpp pick (index 0).
395 main_gpu: int | None = ConfigField(default=None, writable=True)
397 # True = Markdown widget for chat; False = plain Static (faster).
398 markdown_rendering: bool = True
400 # TUI theme name; persists the last Ctrl+T pick across sessions.
401 theme: str = ConfigField(default="rose-pine", writable=True)
403 # Per-model generation defaults set via apply_model_defaults().
404 _model_defaults: Any = None
406 # Wiki layer. LLM-maintained synthesis pages with citation provenance.
407 # Off by default; flip to True (or set LILBEE_WIKI=1) to enable. When off,
408 # the Wiki view tab and the chat ModelBar's scope picker are both hidden.
409 wiki: bool = ConfigField(default=False, writable=True)
410 # Read-only: changing the directory at runtime strands prior wiki pages
411 # under the old path. Users who want a different location set it via
412 # LILBEE_WIKI_DIR / config.toml before the first wiki_build.
413 wiki_dir: str = "wiki"
414 wiki_prune_raw: bool = ConfigField(default=False, writable=True)
416 # Minimum cosine similarity between a page body and the mean of its
417 # source chunk vectors before a page is published (below → drafts).
418 # Replaces the old LLM-based faithfulness score: mean-of-chunks is a
419 # deterministic, zero-LLM-call signal that routes topic-drifted
420 # pages to drafts without the 0.0 to 1.0 ambiguity of a model-emitted
421 # number. Tuning knob: swap to per-chunk max or top-K-mean if the
422 # default 0.5 produces false drafts.
423 wiki_embedding_faithfulness_threshold: float = ConfigField(
424 default=0.5, ge=0.0, le=1.0, writable=True
425 )
427 # Per-call output token cap for wiki generation. Without this a
428 # reasoning model (Qwen3, DeepSeek-R1) can burn the full context
429 # window emitting <think> tokens before the actual answer, taking
430 # minutes per page. Default leaves headroom for a typical reasoning
431 # budget plus a real response (~1000 output + ~1000 slack).
432 wiki_summary_max_tokens: int = ConfigField(default=2048, ge=256, writable=True)
434 # Wiki generation is a structured-output task: the model must emit the
435 # block separators, the citation footnotes, and verbatim quotes. The
436 # usual chat default (~0.8) is too creative for that. Lowering the
437 # sampling temperature makes the model stick to the template and quote
438 # more faithfully. 0.1 leaves just enough slack to avoid hard loops.
439 wiki_temperature: float = ConfigField(default=0.1, ge=0.0, le=2.0, writable=True)
441 # Fraction of citations that must be stale before a wiki page is flagged.
442 wiki_stale_citation_threshold: float = ConfigField(default=0.5, ge=0.0, le=1.0, writable=True)
444 # Fraction of content changed that triggers human-review drift guard.
445 wiki_drift_threshold: float = ConfigField(default=0.3, ge=0.0, le=1.0, writable=True)
447 # LLM prompt templates for wiki page generation. Writable so advanced
448 # users can override them from /settings, config.toml, or
449 # ``LILBEE_WIKI_*_PROMPT`` env vars. Templates must keep the expected
450 # ``{placeholders}``. If you remove one the generator will crash on
451 # first use. The defaults below are the only reason the pipeline
452 # works out of the box.
453 wiki_summary_prompt: str = ConfigField(
454 writable=True,
455 default=(
456 "You are a knowledge compiler. Given the source chunks below from a single "
457 "document, write a concise wiki summary page in markdown.\n\n"
458 "Rules:\n"
459 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
460 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
461 "3. For interpretations or connections not directly stated in the source, "
462 "mark with [*inference*].\n"
463 "4. Use blockquotes (>) for directly cited facts.\n"
464 "5. End with a citation block in this format:\n\n"
465 "---\n"
466 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
467 '[^src1]: {source_name}, excerpt: "exact quoted text"\n'
468 '[^src2]: {source_name}, excerpt: "exact quoted text"\n\n'
469 "Source document: {source_name}\n\n"
470 "Chunks:\n{chunks_text}\n\n"
471 "Write the wiki summary page now. Start with a heading."
472 ),
473 )
474 wiki_synthesis_prompt: str = ConfigField(
475 writable=True,
476 default=(
477 "You are a knowledge compiler. Given source chunks from MULTIPLE documents "
478 "about related concepts, write a synthesis wiki page in markdown that connects "
479 "ideas across sources.\n\n"
480 "Rules:\n"
481 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
482 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
483 "3. For connections, interpretations, or patterns you identify across sources, "
484 "mark with [*inference*].\n"
485 "4. Use blockquotes (>) for directly cited facts.\n"
486 "5. Reference each source by its filename when drawing connections.\n"
487 "6. End with a citation block in this format:\n\n"
488 "---\n"
489 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
490 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n'
491 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n'
492 "Topic: {topic}\n\n"
493 "Sources:\n{source_list}\n\n"
494 "Chunks:\n{chunks_text}\n\n"
495 "Write the synthesis page now. Start with a heading."
496 ),
497 )
499 # Wiki synthesis clusterer backend. CONCEPTS requires the [graph] extra
500 # and falls back to EMBEDDING when unavailable.
501 wiki_clusterer: ClustererBackend = ConfigField(
502 default=ClustererBackend.EMBEDDING, writable=True
503 )
505 # Neighborhood size for the mutual-kNN graph. 0 = auto-scale from corpus size.
506 wiki_clusterer_k: int = ConfigField(default=0, ge=0, writable=True)
508 # LazyGraphRAG-style concept graph. Requires the [graph] extra.
509 concept_graph: bool = ConfigField(default=True, writable=True)
511 # Weight of concept overlap boost relative to vector similarity.
512 concept_boost_weight: float = ConfigField(default=0.3, ge=0.0, le=1.0, writable=True)
514 # Floor on post-boost distance to stop weak boosts from promoting marginal hits.
515 concept_boost_floor: float = ConfigField(default=0.05, ge=0.0, writable=True)
517 # Max noun-phrase concepts extracted per chunk.
518 concept_max_per_chunk: int = ConfigField(default=5, ge=1, writable=True)
520 # spaCy NER labels kept by the wiki entity extractor. Anything not
521 # in this set (QUANTITY, CARDINAL, DATE, TIME, MONEY, PERCENT,
522 # ORDINAL, ...) is dropped before aggregation. Override via
523 # LILBEE_CONCEPT_ALLOWED_ENT_TYPES as a comma-separated list.
524 concept_allowed_ent_types: frozenset[str] = Field(default=DEFAULT_ALLOWED_NER_LABELS)
526 # Strategy used to extract entities for the concept/entity wiki.
527 # NER_ENTITIES (default) pulls typed NER entities with spaCy; concept
528 # pages are proposed by the LLM inside the per-source batched call,
529 # not by the extractor. NER_CONCEPTS_PLUS_LLM_TYPES layers an
530 # LLM-proposed domain schema on top. LLM_TAGGED asks the LLM to tag
531 # every chunk (most expensive). Unimplemented modes fall back to
532 # NER_ENTITIES.
533 wiki_entity_mode: WikiEntityMode = ConfigField(
534 default=WikiEntityMode.NER_ENTITIES, writable=True
535 )
537 # Minimum distinct chunk mentions before an entity or concept earns
538 # its own wiki page. Filters one-off noise.
539 wiki_entity_min_mentions: int = ConfigField(default=3, ge=1, writable=True)
541 # Maximum chunks passed into each concept or entity page generation
542 # call. Caps context size so one page does not blow the context
543 # window on a prolific topic.
544 wiki_concept_max_chunks_per_page: int = ConfigField(default=25, ge=1, writable=True)
546 # Maximum number of related concepts the model is asked to list in
547 # the `## Related` section of each page.
548 wiki_related_max: int = ConfigField(default=8, ge=0, writable=True)
550 # Auto-update cap: if a single sync touches more than this many
551 # concept or entity pages, skip the per-slug regeneration and tell
552 # the user to run `lilbee wiki update` explicitly. Keeps a surprise
553 # bulk import from firing hundreds of LLM calls.
554 wiki_ingest_update_cap: int = ConfigField(default=20, ge=1, writable=True)
556 # Whether the per-source batched call asks the LLM to curate
557 # concept pages alongside the pre-extracted entity list. False →
558 # entity sections only, no concept curation (incremental ingest
559 # path uses this to avoid churning concept slugs per source-touch).
560 wiki_extract_concepts: bool = ConfigField(default=True, writable=True)
562 # Minimum chunk count a source must contribute before it is eligible
563 # for concept curation. Sources below the floor still get a batched
564 # call when they have entities (the prompt writes entity-only
565 # sections); sources below the floor with zero entities are skipped
566 # entirely. Prevents boilerplate / TOC / appendix documents from
567 # burning an LLM call to invent "concepts".
568 wiki_batch_min_chunks: int = ConfigField(default=3, ge=1, writable=True)
570 # Prompt template for the per-source batched call. Placeholders:
571 # {source}, {entity_list}, {chunks_text}, {concept_instruction}.
572 # {concept_instruction} is filled with a concept-curation paragraph
573 # when concepts are requested, or the empty string otherwise.
574 wiki_entity_batch_prompt: str = ConfigField(
575 writable=True,
576 default=(
577 "You are writing wiki sections based on these chunks from {source}.\n\n"
578 "{concept_instruction}"
579 "Write a wiki section for each of these NER ENTITIES: {entity_list}\n\n"
580 "Format each section exactly as:\n"
581 "## Name\n"
582 "{{content with [^src1]-style citations}}\n\n"
583 "Rules:\n"
584 "1. Every factual claim MUST have an inline citation [^src1], [^src2], etc.\n"
585 "2. Cite the EXACT text from the source that supports each claim by quoting it.\n"
586 "3. For interpretations or connections not directly stated, mark with [*inference*].\n"
587 "4. Use blockquotes (>) for directly cited facts.\n"
588 "5. End the response with a citation block in this format:\n\n"
589 "---\n"
590 "<!-- citations (auto-generated from _citations table -- do not edit) -->\n"
591 '[^src1]: {{source_name}}, excerpt: "exact quoted text"\n'
592 '[^src2]: {{source_name}}, excerpt: "exact quoted text"\n\n'
593 "Source chunks:\n{chunks_text}\n"
594 ),
595 )
597 # Class variable: not a settings field
598 _toml_cache: ClassVar[dict[str, Any]] = {}
600 @field_validator("lilbee_name", mode="after")
601 @classmethod
602 def _strip_lilbee_name(cls, value: str) -> str:
603 """Strip whitespace; an empty string signals 'use the path-derived label'."""
604 return value.strip()
606 @field_validator(
607 "temperature",
608 "top_p",
609 "repeat_penalty",
610 "top_k_sampling",
611 "num_ctx",
612 "seed",
613 mode="before",
614 )
615 @classmethod
616 def _empty_string_to_none(cls, v: Any) -> Any:
617 if isinstance(v, str) and v.strip() == "":
618 return None
619 return v
621 @field_validator("chat_mode", mode="before")
622 @classmethod
623 def _normalize_chat_mode(cls, v: Any) -> str:
624 """Coerce chat_mode to a ChatMode value; default ChatMode.SEARCH."""
625 if v is None or v == "":
626 return ChatMode.SEARCH.value
627 candidate = str(v).strip().lower()
628 try:
629 return ChatMode(candidate).value
630 except ValueError as exc:
631 valid = ", ".join(repr(m.value) for m in ChatMode)
632 raise ValueError(f"chat_mode must be one of {{{valid}}}, got {v!r}") from exc
634 @field_validator("enable_ocr", mode="before")
635 @classmethod
636 def _parse_enable_ocr(cls, v: Any) -> bool | None:
637 """Parse enable_ocr from env var string or direct value.
639 Accepts: true/false/1/0/yes/no (case-insensitive), empty string
640 or None for auto-detect.
641 """
642 if v is None:
643 return None
644 if isinstance(v, bool):
645 return v
646 if isinstance(v, str):
647 if v.strip().lower() in ("", "auto", "none"):
648 return None
649 try:
650 return parse_bool(v)
651 except ValueError:
652 pass # fall through to bool() coercion below for unrecognised strings
653 return bool(v)
655 @field_validator("flash_attention", mode="before")
656 @classmethod
657 def _parse_flash_attention(cls, v: Any) -> bool | None:
658 """Auto/on/off tri-state: empty/auto/none -> None, else parse bool."""
659 if v is None:
660 return None
661 if isinstance(v, bool):
662 return v
663 if isinstance(v, str):
664 if v.strip().lower() in ("", "auto", "none"):
665 return None
666 try:
667 return parse_bool(v)
668 except ValueError:
669 return None
670 return bool(v)
672 @field_validator("n_gpu_layers", mode="before")
673 @classmethod
674 def _parse_n_gpu_layers(cls, v: Any) -> int | None:
675 """Auto -> None, ``cpu`` alias -> 0, integers parsed verbatim."""
676 if v is None:
677 return None
678 if isinstance(v, str):
679 label = v.strip().lower()
680 if label in ("", "auto", "none"):
681 return None
682 if label == "cpu":
683 return 0
684 try:
685 return int(label)
686 except ValueError:
687 log.warning("Invalid LILBEE_N_GPU_LAYERS=%r, using auto", v)
688 return None
689 return int(v)
691 @field_validator("main_gpu", mode="before")
692 @classmethod
693 def _parse_main_gpu(cls, v: Any) -> int | None:
694 """Empty/auto strings -> None, integers parsed verbatim."""
695 if v is None:
696 return None
697 if isinstance(v, str):
698 label = v.strip().lower()
699 if label in ("", "auto", "none"):
700 return None
701 try:
702 return int(label)
703 except ValueError:
704 log.warning("Invalid LILBEE_MAIN_GPU=%r, using auto", v)
705 return None
706 return int(v)
708 @field_validator("gpu_devices", mode="before")
709 @classmethod
710 def _parse_gpu_devices(cls, v: Any) -> str | None:
711 """Normalize device list: strip whitespace, drop empties, keep order."""
712 if v is None:
713 return None
714 if isinstance(v, str):
715 label = v.strip().lower()
716 if label in ("", "auto", "all", "none"):
717 return None
718 parts = [p.strip() for p in v.split(",") if p.strip()]
719 if not parts:
720 return None
721 for part in parts:
722 if not part.lstrip("-").isdigit():
723 log.warning("Invalid LILBEE_GPU_DEVICES=%r, ignoring", v)
724 return None
725 return ",".join(parts)
726 return str(v)
728 @field_validator("semantic_chunking", mode="before")
729 @classmethod
730 def _parse_semantic_chunking(cls, v: Any) -> bool:
731 """Parse from env string; invalid values warn and fall back to False."""
732 if isinstance(v, bool):
733 return v
734 if isinstance(v, str):
735 try:
736 return parse_bool(v)
737 except ValueError:
738 log.warning("Invalid LILBEE_SEMANTIC_CHUNKING=%r, using default False", v)
739 return False
740 return bool(v)
742 @field_validator(
743 "chat_model", "embedding_model", "vision_model", "reranker_model", mode="after"
744 )
745 @classmethod
746 def _normalize_model_tag(cls, v: str, info: ValidationInfo) -> str:
747 """Validate and canonicalize a model ref; blank clears optional roles."""
748 if not v or not v.strip():
749 if info.field_name in {"chat_model", "embedding_model"}:
750 raise ValueError(f"{info.field_name} must not be blank")
751 return ""
752 from lilbee.providers.model_ref import parse_model_ref
754 return parse_model_ref(v).for_openai_prefix()
756 @field_validator("ollama_base_url", "lm_studio_base_url", mode="after")
757 @classmethod
758 def _strip_trailing_slash(cls, v: str) -> str:
759 """Canonicalize a local-server URL once at the write boundary."""
760 return v.rstrip("/")
762 @field_validator("cors_origins", mode="before")
763 @classmethod
764 def _split_cors_origins(cls, v: Any) -> Any:
765 if isinstance(v, str):
766 return [o.strip() for o in v.split(",") if o.strip()]
767 return v
769 @field_validator("crawl_exclude_patterns", mode="before")
770 @classmethod
771 def _split_crawl_exclude_patterns(cls, v: Any) -> Any:
772 """Accept newline-separated strings from env vars / plain-text config.
774 Regex commonly uses commas (e.g. `{2,4}`) and pipes (alternation), so
775 newline is the only separator safe to use for this field. TOML lists
776 and JSON arrays pass through unchanged.
777 """
778 if isinstance(v, str):
779 return [p.strip() for p in v.splitlines() if p.strip()]
780 return v
782 @field_validator("crawl_exclude_patterns", mode="after")
783 @classmethod
784 def _validate_crawl_exclude_patterns(cls, v: list[str]) -> list[str]:
785 """Reject any entry that isn't a valid Python regex.
787 These patterns are compiled at crawl time. An invalid pattern there
788 surfaces as an opaque mid-crawl error; catching it at PATCH time gives
789 the user a 400 with a pointer to the bad entry.
790 """
791 import re
793 bad: list[str] = []
794 for i, pattern in enumerate(v):
795 try:
796 re.compile(pattern)
797 except re.error as exc:
798 bad.append(f"[{i}] {pattern!r}: {exc}")
799 if bad:
800 raise ValueError("invalid regex in crawl_exclude_patterns:\n " + "\n ".join(bad))
801 return v
803 @field_validator("ignore_dirs", mode="before")
804 @classmethod
805 def _merge_ignore_dirs(cls, v: Any) -> frozenset[str]:
806 if isinstance(v, str):
807 extra = frozenset(name.strip() for name in v.split(",") if name.strip())
808 return DEFAULT_IGNORE_DIRS | extra
809 if isinstance(v, (set, frozenset, list)):
810 return DEFAULT_IGNORE_DIRS | frozenset(v)
811 return DEFAULT_IGNORE_DIRS
813 @field_validator("concept_allowed_ent_types", mode="before")
814 @classmethod
815 def _parse_ent_types(cls, v: Any) -> frozenset[str]:
816 """Replace-semantics override: a narrowed set is used as-is,
817 not unioned with defaults. A user asking for ``PERSON,ORG``
818 wants exactly those kinds. Accepts comma-separated strings
819 from env and list / set / frozenset from code. Empty input
820 falls back to :data:`DEFAULT_ALLOWED_NER_LABELS` so an empty
821 env var does not silently disable the gate.
822 """
823 if isinstance(v, str):
824 parts = frozenset(name.strip().upper() for name in v.split(",") if name.strip())
825 return parts or DEFAULT_ALLOWED_NER_LABELS
826 if isinstance(v, (set, frozenset, list)):
827 parts = frozenset(str(x).upper() for x in v)
828 return parts or DEFAULT_ALLOWED_NER_LABELS
829 return DEFAULT_ALLOWED_NER_LABELS
831 @model_validator(mode="before")
832 @classmethod
833 def _resolve_defaults(cls, data: Any) -> Any:
834 from lilbee.core.system import canonical_models_dir, default_data_dir, find_local_root
836 if not isinstance(data, dict):
837 return data
839 if data.get("data_root") in (None, _UNSET_PATH):
840 data_env = os.environ.get("LILBEE_DATA", "").strip()
841 if data_env:
842 data["data_root"] = Path(data_env)
843 else:
844 local = find_local_root()
845 data["data_root"] = local if local is not None else default_data_dir()
846 root = data["data_root"]
847 if data.get("documents_dir") in (None, _UNSET_PATH):
848 data["documents_dir"] = root / "documents"
849 if data.get("data_dir") in (None, _UNSET_PATH):
850 data["data_dir"] = root / "data"
851 if data.get("lancedb_dir") in (None, _UNSET_PATH):
852 data["lancedb_dir"] = root / "data" / "lancedb"
853 if data.get("models_dir") in (None, _UNSET_PATH):
854 data["models_dir"] = canonical_models_dir()
856 return data
858 @classmethod
859 def settings_customise_sources(
860 cls,
861 settings_cls: type[BaseSettings],
862 init_settings: Any,
863 env_settings: Any,
864 dotenv_settings: Any,
865 file_secret_settings: Any,
866 ) -> tuple[Any, ...]:
867 from lilbee.core.system import default_data_dir, find_local_root
869 data_env = os.environ.get("LILBEE_DATA", "")
870 if data_env:
871 toml_dir = Path(data_env)
872 else:
873 local = find_local_root()
874 toml_dir = local if local else default_data_dir()
875 toml_path = toml_dir / "config.toml"
877 plain_env = _PlainEnvSource(settings_cls, env_prefix="LILBEE_", env_ignore_empty=True)
878 sources: list[Any] = [init_settings, plain_env]
879 if toml_path.exists() and os.environ.get("LILBEE_SKIP_TOML_CONFIG") != "1":
880 sources.append(_TomlSource(settings_cls, toml_path))
881 return tuple(sources)
883 @property
884 def model_defaults(self) -> Any:
885 """Per-model generation defaults (read-only). Set via apply_model_defaults()."""
886 return self._model_defaults
888 def apply_model_defaults(self, defaults: Any) -> None:
889 """Store per-model generation defaults for 3-layer merge."""
890 object.__setattr__(self, "_model_defaults", defaults)
892 def clear_model_defaults(self) -> None:
893 """Reset per-model defaults to None."""
894 object.__setattr__(self, "_model_defaults", None)
896 def generation_options(self, **overrides: Any) -> dict[str, Any]:
897 """Merge model defaults, user config, and per-call overrides, dropping None."""
898 result = _model_defaults_dict(self._model_defaults)
899 user_fields: dict[str, Any] = {
900 "temperature": self.temperature,
901 "top_p": self.top_p,
902 "top_k": self.top_k_sampling,
903 "repeat_penalty": self.repeat_penalty,
904 "num_ctx": self.num_ctx,
905 "seed": self.seed,
906 "max_tokens": self.max_tokens,
907 }
908 for k, v in user_fields.items():
909 if v is not None:
910 result[k] = v
911 for k, v in overrides.items():
912 if v is not None:
913 result[k] = v
914 return result
917def _model_defaults_dict(defaults: Any) -> dict[str, Any]:
918 """Non-None fields of a ModelDefaults instance as a dict."""
919 if defaults is None:
920 return {}
921 from dataclasses import fields as dc_fields
923 return {
924 f.name: getattr(defaults, f.name)
925 for f in dc_fields(defaults)
926 if getattr(defaults, f.name) is not None
927 }
930class _PlainEnvSource:
931 """Reads LILBEE_* env vars as plain strings so field validators handle parsing."""
933 def __init__(
934 self,
935 settings_cls: type[BaseSettings],
936 env_prefix: str,
937 env_ignore_empty: bool = True,
938 ) -> None:
939 self._prefix = env_prefix
940 self._ignore_empty = env_ignore_empty
941 self._fields = set(settings_cls.model_fields)
943 def __call__(self) -> dict[str, Any]:
944 result: dict[str, Any] = {}
945 for field_name in self._fields:
946 env_key = f"{self._prefix}{field_name.upper()}"
947 raw = os.environ.get(env_key)
948 if raw is None:
949 continue
950 if self._ignore_empty and raw == "":
951 continue
952 result[field_name] = raw
953 return result
956class _TomlSource:
957 """Custom pydantic-settings source that reads config.toml."""
959 def __init__(self, settings_cls: type[BaseSettings], path: Path) -> None:
960 self._path = path
962 def __call__(self) -> dict[str, Any]:
963 import tomllib
965 try:
966 with self._path.open("rb") as f:
967 data = tomllib.load(f)
968 except (ValueError, OSError):
969 log.warning("Failed to read %s, ignoring", self._path)
970 return {}
971 # Empty strings represent "no persisted value" for nullable scalar
972 # fields (legacy from set_setting writing "" for None). Pydantic
973 # can't coerce "" to int|None, so dropping them here lets the field
974 # default apply rather than crashing the whole Config load.
975 return {k: str(v) for k, v in data.items() if str(v) != ""}
978def _build_cfg() -> tuple[Config, Exception | None]:
979 """Build cfg; on stale-config validation failure, fall back to defaults.
981 A persisted ``config.toml`` from before a breaking schema change can
982 contain values the new validators reject. Crashing at module import
983 means every command (``lilbee --help`` included) emits a Python
984 traceback. Falling back to env+defaults lets the package load; the
985 CLI / TUI surfaces the original error before doing real work.
986 """
987 try:
988 return Config(), None
989 except Exception as exc:
990 os.environ["LILBEE_SKIP_TOML_CONFIG"] = "1"
991 try:
992 return Config(), exc
993 finally:
994 os.environ.pop("LILBEE_SKIP_TOML_CONFIG", None)
997cfg, config_load_error = _build_cfg()
999# Canonicalize LILBEE_DATA at the cfg.data_root resolution boundary so
1000# spawn-context worker subprocesses inherit the same data root.
1001# ``setdefault`` preserves a user-set value.
1002os.environ.setdefault("LILBEE_DATA", str(cfg.data_root))