Coverage for src / lilbee / cli / settings_map.py: 100%
30 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Shared settings map for interactive configuration."""
3from __future__ import annotations
5from dataclasses import dataclass, field
6from enum import StrEnum
8from pydantic_core import PydanticUndefined
10from lilbee.cli.tui.app import DARK_THEMES
11from lilbee.core.config import cfg
12from lilbee.core.config.enums import ChatMode, ClustererBackend, KvCacheType, WikiEntityMode
15class RenderStyle(StrEnum):
16 """How a setting is displayed in /settings."""
18 COMPACT = "compact"
19 FULL = "full"
20 LIST_COLLAPSED = "list_collapsed"
21 MULTILINE = "multiline"
24@dataclass(frozen=True)
25class SettingDef:
26 """Metadata for an interactive setting.
28 ``hidden`` keeps the setting out of the TUI settings screen while leaving
29 it reachable via ``lilbee set`` and the ``LILBEE_*`` env var: use it for
30 transport/server knobs that aren't relevant to a typical TUI session.
31 """
33 type: type
34 nullable: bool
35 writable: bool = True
36 render: RenderStyle = field(default=RenderStyle.COMPACT)
37 group: str = "General"
38 help_text: str = ""
39 choices: tuple[str, ...] | None = None
40 hidden: bool = False
43def get_default(key: str) -> object:
44 """Return the cfg default for a setting key."""
45 field_info = type(cfg).model_fields[key]
46 if field_info.default_factory is not None:
47 return field_info.default_factory() # type: ignore[call-arg]
48 if field_info.default is PydanticUndefined:
49 return None
50 return field_info.default
53SETTINGS_MAP: dict[str, SettingDef] = {
54 "chat_model": SettingDef(
55 str,
56 nullable=False,
57 writable=False,
58 group="Models",
59 help_text="LLM used for chat generation (vision and reranking are separate slots)",
60 ),
61 "vision_model": SettingDef(
62 str,
63 nullable=True,
64 writable=False,
65 group="Models",
66 help_text="Vision model for scanned PDF OCR (empty = disabled; Tesseract only)",
67 ),
68 "enable_ocr": SettingDef(
69 bool,
70 nullable=True,
71 group="Ingest",
72 help_text="Vision OCR for scanned PDFs (empty = auto-detect from vision_model)",
73 ),
74 "ocr_timeout": SettingDef(
75 float,
76 nullable=False,
77 group="Ingest",
78 help_text="Per-page timeout in seconds for vision OCR (0 = no limit)",
79 ),
80 "vision_load_budget_s": SettingDef(
81 float,
82 nullable=False,
83 group="Ingest",
84 help_text=(
85 "Wall-clock seconds reserved for the vision worker to load the"
86 " model. Total PDF-OCR budget = load_budget + ocr_timeout * pages."
87 ),
88 ),
89 "semantic_chunking": SettingDef(
90 bool,
91 nullable=False,
92 group="Ingest",
93 help_text="Opt-in topic-aware chunker (default off; may fragment numbered procedures)",
94 ),
95 "topic_threshold": SettingDef(
96 float,
97 nullable=False,
98 group="Ingest",
99 help_text="Topic-boundary similarity threshold, 0.0-1.0, used when semantic chunking is on",
100 ),
101 "embedding_model": SettingDef(
102 str,
103 nullable=False,
104 writable=False,
105 group="Models",
106 help_text="Model used to embed document chunks",
107 ),
108 "reranker_model": SettingDef(
109 str,
110 nullable=True,
111 writable=False,
112 group="Models",
113 help_text="Cross-encoder model for result reranking",
114 ),
115 "temperature": SettingDef(
116 float,
117 nullable=True,
118 group="Generation",
119 help_text="Sampling temperature (higher = more creative)",
120 ),
121 "top_p": SettingDef(
122 float,
123 nullable=True,
124 group="Generation",
125 help_text="Nucleus sampling cutoff probability",
126 ),
127 "top_k_sampling": SettingDef(
128 int,
129 nullable=True,
130 group="Generation",
131 help_text="Top-K sampling: number of tokens to consider",
132 ),
133 "repeat_penalty": SettingDef(
134 float,
135 nullable=True,
136 group="Generation",
137 help_text="Penalty for repeating tokens",
138 ),
139 "num_ctx": SettingDef(
140 int,
141 nullable=True,
142 group="Generation",
143 help_text=(
144 "Context window size in tokens. Leave empty to size automatically "
145 "to the host's available memory (capped at num_ctx_max)."
146 ),
147 ),
148 "num_ctx_max": SettingDef(
149 int,
150 nullable=False,
151 group="Generation",
152 help_text=(
153 "Upper bound for the dynamic context picker when num_ctx is unset. "
154 "Higher allows more retrieval context on hosts with spare memory."
155 ),
156 ),
157 "flash_attention": SettingDef(
158 bool,
159 nullable=True,
160 group="Generation",
161 help_text=(
162 "Flash attention. Empty (auto) tries it on with a fallback for older "
163 "llama-cpp-python builds; resolves the V-cache padding warning on "
164 "models with uneven per-layer V dims."
165 ),
166 ),
167 "kv_cache_type": SettingDef(
168 str,
169 nullable=False,
170 group="Generation",
171 help_text=(
172 "KV cache element type. q8_0 / q4_0 halve or quarter cache memory "
173 "but require flash attention to be enabled."
174 ),
175 choices=tuple(t.value for t in KvCacheType),
176 ),
177 "n_gpu_layers": SettingDef(
178 int,
179 nullable=True,
180 group="Generation",
181 help_text=(
182 "Layers to offload to GPU. Empty = all (recommended), 0 = CPU only, "
183 "positive int = partial offload for tight VRAM."
184 ),
185 ),
186 "gpu_devices": SettingDef(
187 str,
188 nullable=True,
189 group="Generation",
190 help_text=(
191 "Restrict llama.cpp to specific GPU indexes on dual-GPU machines "
192 "(e.g. NVIDIA dGPU + integrated). Comma-separated, like '0' or '0,1'. "
193 "Applies to Vulkan, CUDA, and ROCm. Requires a restart to take effect."
194 ),
195 ),
196 "main_gpu": SettingDef(
197 int,
198 nullable=True,
199 group="Generation",
200 help_text=(
201 "Primary GPU index for llama.cpp when multiple devices are visible. "
202 "Empty = let llama.cpp pick (index 0). Set this together with "
203 "gpu_devices to pin inference to a specific card."
204 ),
205 ),
206 "seed": SettingDef(
207 int,
208 nullable=True,
209 group="Generation",
210 help_text="Random seed for reproducible output",
211 ),
212 "rag_system_prompt": SettingDef(
213 str,
214 nullable=False,
215 render=RenderStyle.MULTILINE,
216 group="Generation",
217 help_text="System prompt sent when answering with retrieved context",
218 ),
219 "general_system_prompt": SettingDef(
220 str,
221 nullable=False,
222 render=RenderStyle.MULTILINE,
223 group="Generation",
224 help_text="System prompt sent when there are no documents to ground the answer",
225 ),
226 "chat_mode": SettingDef(
227 str,
228 nullable=False,
229 group="Generation",
230 choices=tuple(m.value for m in ChatMode),
231 help_text="search runs every chat turn through document retrieval; chat skips it",
232 ),
233 "top_k": SettingDef(
234 int,
235 nullable=False,
236 group="Retrieval",
237 help_text="Number of chunks returned by search",
238 ),
239 "rerank_candidates": SettingDef(
240 int,
241 nullable=False,
242 group="Retrieval",
243 help_text="Candidate pool size for reranking",
244 ),
245 "show_reasoning": SettingDef(
246 bool,
247 nullable=False,
248 group="Display",
249 help_text="Show model reasoning/thinking tokens in output",
250 ),
251 "theme": SettingDef(
252 str,
253 nullable=False,
254 group="Display",
255 help_text="TUI color theme. Cycle with Ctrl+T; the active theme persists across sessions.",
256 choices=tuple(DARK_THEMES),
257 ),
258 "wiki": SettingDef(
259 bool,
260 nullable=False,
261 group="Wiki",
262 help_text="Enable the wiki layer (synthesis pages with citations)",
263 ),
264 "wiki_dir": SettingDef(
265 str,
266 nullable=False,
267 group="Wiki",
268 help_text="Directory under data_root where wiki pages are stored",
269 ),
270 "wiki_prune_raw": SettingDef(
271 bool,
272 nullable=False,
273 group="Wiki",
274 help_text="Delete raw chunks after summarizing into the wiki",
275 ),
276 "wiki_embedding_faithfulness_threshold": SettingDef(
277 float,
278 nullable=False,
279 group="Wiki",
280 help_text=(
281 "Minimum cosine similarity (0-1) between a generated page and "
282 "the mean of its source chunk vectors before publishing. "
283 "Pages below the threshold route to drafts/."
284 ),
285 ),
286 "wiki_stale_citation_threshold": SettingDef(
287 float,
288 nullable=False,
289 group="Wiki",
290 help_text="Fraction of stale citations that triggers page regeneration",
291 ),
292 "wiki_drift_threshold": SettingDef(
293 float,
294 nullable=False,
295 group="Wiki",
296 help_text="Max fraction of changed lines before regeneration requires review",
297 ),
298 "wiki_clusterer": SettingDef(
299 str,
300 nullable=False,
301 group="Wiki",
302 help_text="Synthesis clusterer backend (embedding or concepts)",
303 choices=tuple(b.value for b in ClustererBackend),
304 ),
305 "wiki_entity_mode": SettingDef(
306 str,
307 nullable=False,
308 group="Wiki",
309 help_text=(
310 "Entity extraction strategy "
311 "(ner_entities = default, typed NER entities; "
312 "plus_llm_types = NER + LLM-proposed schema; "
313 "llm_tagged = LLM tags every chunk)"
314 ),
315 choices=tuple(m.value for m in WikiEntityMode),
316 ),
317 "wiki_entity_min_mentions": SettingDef(
318 int,
319 nullable=False,
320 group="Wiki",
321 help_text="Minimum chunk mentions before an entity or concept gets its own page",
322 ),
323 "wiki_concept_max_chunks_per_page": SettingDef(
324 int,
325 nullable=False,
326 group="Wiki",
327 help_text="Maximum chunks passed into each concept or entity page generation call",
328 ),
329 "wiki_related_max": SettingDef(
330 int,
331 nullable=False,
332 group="Wiki",
333 help_text="Maximum related concepts listed in the `## Related` section of each page",
334 ),
335 "wiki_ingest_update_cap": SettingDef(
336 int,
337 nullable=False,
338 group="Wiki",
339 help_text=(
340 "Touched-page cap for auto-update after sync. "
341 "Beyond this count, run `lilbee wiki update` manually."
342 ),
343 ),
344 "wiki_summary_prompt": SettingDef(
345 str,
346 nullable=False,
347 render=RenderStyle.FULL,
348 group="Wiki",
349 help_text=(
350 "Prompt for per-source summary pages. "
351 "Must keep the {source_name} and {chunks_text} placeholders."
352 ),
353 ),
354 "wiki_synthesis_prompt": SettingDef(
355 str,
356 nullable=False,
357 render=RenderStyle.FULL,
358 group="Wiki",
359 help_text=(
360 "Prompt for cross-source synthesis pages. "
361 "Must keep {topic}, {source_list}, and {chunks_text}."
362 ),
363 ),
364 "wiki_entity_batch_prompt": SettingDef(
365 str,
366 nullable=False,
367 render=RenderStyle.FULL,
368 group="Wiki",
369 help_text=(
370 "Prompt for the per-source batched call. "
371 "Must keep {source}, {entity_list}, {chunks_text}, and {concept_instruction}."
372 ),
373 ),
374 "wiki_extract_concepts": SettingDef(
375 bool,
376 nullable=False,
377 group="Wiki",
378 help_text=(
379 "Whether the per-source batched call asks the LLM to curate concept pages "
380 "alongside the pre-extracted entity list."
381 ),
382 ),
383 "wiki_batch_min_chunks": SettingDef(
384 int,
385 nullable=False,
386 group="Wiki",
387 help_text=(
388 "Minimum chunks a source must contribute before its batched call includes "
389 "concept curation. Sources below the floor skip the concept-curation "
390 "instruction; sources with zero entities AND below the floor are skipped entirely."
391 ),
392 ),
393 "wiki_clusterer_k": SettingDef(
394 int,
395 nullable=False,
396 group="Wiki",
397 help_text="Mutual-kNN neighborhood size for the clusterer (0 = auto)",
398 ),
399 "crawl_max_depth": SettingDef(
400 int,
401 nullable=True,
402 group="Crawling",
403 help_text="Optional recursion-depth cap (blank = no cap; per-crawl values win)",
404 ),
405 "crawl_max_pages": SettingDef(
406 int,
407 nullable=True,
408 group="Crawling",
409 help_text="Optional global cap on total pages per crawl (blank = no cap).",
410 ),
411 "crawl_timeout": SettingDef(
412 int,
413 nullable=False,
414 group="Crawling",
415 help_text="Per-page fetch timeout in seconds",
416 ),
417 "crawl_sync_interval": SettingDef(
418 int,
419 nullable=False,
420 group="Crawling",
421 help_text="Seconds between periodic re-syncs during a crawl (0 = sync only at end)",
422 ),
423 "crawl_mean_delay": SettingDef(
424 float,
425 nullable=False,
426 group="Crawling",
427 help_text="Seconds between in-flight requests within a single crawl",
428 ),
429 "crawl_max_delay_range": SettingDef(
430 float,
431 nullable=False,
432 group="Crawling",
433 help_text="Random jitter (seconds) added on top of mean delay",
434 ),
435 "crawl_concurrent_requests": SettingDef(
436 int,
437 nullable=False,
438 group="Crawling",
439 help_text="Concurrent in-flight URLs within one crawl",
440 ),
441 "crawl_retry_on_rate_limit": SettingDef(
442 bool,
443 nullable=False,
444 group="Crawling",
445 help_text="Enable per-domain backoff and retries on HTTP 429/503",
446 ),
447 "crawl_retry_base_delay_min": SettingDef(
448 float,
449 nullable=False,
450 group="Crawling",
451 help_text="Minimum base-delay (seconds) on rate-limit responses",
452 ),
453 "crawl_retry_base_delay_max": SettingDef(
454 float,
455 nullable=False,
456 group="Crawling",
457 help_text="Maximum base-delay (seconds) on rate-limit responses",
458 ),
459 "crawl_retry_max_backoff": SettingDef(
460 float,
461 nullable=False,
462 group="Crawling",
463 help_text="Upper bound on any single backoff wait (seconds)",
464 ),
465 "crawl_retry_max_attempts": SettingDef(
466 int,
467 nullable=False,
468 group="Crawling",
469 help_text="Retry count per URL when a rate-limit code comes back",
470 ),
471 "crawl_exclude_patterns": SettingDef(
472 list,
473 nullable=False,
474 group="Crawling",
475 render=RenderStyle.LIST_COLLAPSED,
476 help_text=(
477 "Regex patterns that skip URLs at link-discovery time during "
478 "recursive crawls. One per line."
479 ),
480 ),
481 "openrouter_api_key": SettingDef(
482 str,
483 nullable=False,
484 group="API-Keys",
485 help_text="OpenRouter API key (enables frontier models in chat picker)",
486 ),
487 "gemini_api_key": SettingDef(
488 str,
489 nullable=False,
490 group="API-Keys",
491 help_text="Google Gemini API key (enables frontier models in chat picker)",
492 ),
493 "anthropic_api_key": SettingDef(
494 str,
495 nullable=False,
496 group="API-Keys",
497 help_text="Anthropic API key (enables frontier models in chat picker)",
498 ),
499 "openai_api_key": SettingDef(
500 str,
501 nullable=False,
502 group="API-Keys",
503 help_text="OpenAI API key (enables frontier models in chat picker)",
504 ),
505 "mistral_api_key": SettingDef(
506 str,
507 nullable=False,
508 group="API-Keys",
509 help_text="Mistral API key (enables frontier models in chat picker)",
510 ),
511 "deepseek_api_key": SettingDef(
512 str,
513 nullable=False,
514 group="API-Keys",
515 help_text="DeepSeek API key (enables frontier models in chat picker)",
516 ),
517 "chunk_size": SettingDef(
518 int,
519 nullable=False,
520 group="Ingest",
521 help_text="Document chunk size in tokens (changes invalidate the index)",
522 ),
523 "chunk_overlap": SettingDef(
524 int,
525 nullable=False,
526 group="Ingest",
527 help_text="Tokens of overlap between adjacent chunks (preserves context across boundaries)",
528 ),
529 "tesseract_timeout": SettingDef(
530 float,
531 nullable=False,
532 group="Ingest",
533 help_text="Per-page Tesseract timeout in seconds (used when no vision model is set)",
534 ),
535 "worker_pool_call_timeout_s": SettingDef(
536 float,
537 nullable=False,
538 group="Ingest",
539 help_text=(
540 "Per-call deadline for one worker-pool round-trip in seconds. "
541 "Raise this for very large embed batches on slow machines"
542 ),
543 ),
544 "worker_pool_eager_start": SettingDef(
545 bool,
546 nullable=False,
547 group="Ingest",
548 help_text=(
549 "Spawn every registered worker at TUI startup instead of on first use. "
550 "Trades 1-3 seconds of cold-start per role for first-call latency"
551 ),
552 ),
553 "worker_pool_max_idle_s": SettingDef(
554 float,
555 nullable=False,
556 group="Ingest",
557 help_text=(
558 "Shut a worker down after this many seconds idle to free RAM/VRAM. "
559 "0 disables idle reaping"
560 ),
561 ),
562 "max_tokens": SettingDef(
563 int,
564 nullable=True,
565 group="Generation",
566 help_text="Hard cap on generated tokens per response (blank = no cap)",
567 ),
568 "max_reasoning_chars": SettingDef(
569 int,
570 nullable=False,
571 group="Generation",
572 help_text=(
573 "Maximum reasoning characters before lilbee forces the model to answer "
574 "(0 = unlimited; per-model overrides apply on top)"
575 ),
576 ),
577 "model_keep_alive": SettingDef(
578 int,
579 nullable=False,
580 group="Generation",
581 help_text="Seconds the loaded model stays warm between calls (0 = unload immediately)",
582 ),
583 "gpu_memory_fraction": SettingDef(
584 float,
585 nullable=False,
586 group="Generation",
587 help_text="Fraction of GPU memory the model is allowed to claim (0.1-1.0)",
588 ),
589 "candidate_multiplier": SettingDef(
590 int,
591 nullable=False,
592 group="Retrieval",
593 help_text="Candidate-pool multiplier over top_k before reranking",
594 ),
595 "max_distance": SettingDef(
596 float,
597 nullable=False,
598 group="Retrieval",
599 help_text="Maximum vector distance for retrieval matches (lower = stricter)",
600 ),
601 "min_relevance_score": SettingDef(
602 float,
603 nullable=False,
604 group="Retrieval",
605 help_text="Minimum RRF relevance score for hybrid search results (0.0 = no filter)",
606 ),
607 "max_context_sources": SettingDef(
608 int,
609 nullable=False,
610 group="Retrieval",
611 help_text="Maximum unique sources contributing chunks to a single answer",
612 ),
613 "diversity_max_per_source": SettingDef(
614 int,
615 nullable=False,
616 group="Retrieval",
617 help_text="Maximum chunks accepted from any one source (caps source dominance)",
618 ),
619 "mmr_lambda": SettingDef(
620 float,
621 nullable=False,
622 group="Retrieval",
623 help_text=(
624 "MMR lambda balancing relevance vs diversity (0 = max diversity, 1 = max relevance)"
625 ),
626 ),
627 "temporal_filtering": SettingDef(
628 bool,
629 nullable=False,
630 group="Retrieval",
631 help_text="Detect temporal queries and bias retrieval toward recent chunks",
632 ),
633 "hyde": SettingDef(
634 bool,
635 nullable=False,
636 group="Retrieval",
637 help_text="Use HyDE (hypothetical answer expansion) to broaden retrieval",
638 ),
639 "hyde_weight": SettingDef(
640 float,
641 nullable=False,
642 group="Retrieval",
643 help_text="Weight on the HyDE-generated query vector when blending with the original",
644 ),
645 "query_expansion_count": SettingDef(
646 int,
647 nullable=False,
648 group="Retrieval",
649 help_text="Number of paraphrase expansions per query (0 disables expansion)",
650 ),
651 "expansion_similarity_threshold": SettingDef(
652 float,
653 nullable=False,
654 group="Retrieval",
655 help_text="Minimum cosine similarity an expansion must keep with the original query",
656 ),
657 "expansion_short_query_tokens": SettingDef(
658 int,
659 nullable=False,
660 group="Retrieval",
661 help_text="Queries at or below this token count skip expansion (saves a model call)",
662 ),
663 "expansion_guardrails": SettingDef(
664 bool,
665 nullable=False,
666 group="Retrieval",
667 help_text="Drop expansions that diverge from the original intent",
668 ),
669 "adaptive_threshold_step": SettingDef(
670 float,
671 nullable=False,
672 group="Retrieval",
673 help_text="Step size for adaptive relevance-score relaxation when initial recall is empty",
674 ),
675 "concept_graph": SettingDef(
676 bool,
677 nullable=False,
678 group="Retrieval",
679 help_text="Boost retrieval scores for chunks that share concepts with the query",
680 ),
681 "concept_boost_weight": SettingDef(
682 float,
683 nullable=False,
684 group="Retrieval",
685 help_text="Maximum boost (0-1) the concept graph can add to a chunk's relevance",
686 ),
687 "concept_boost_floor": SettingDef(
688 float,
689 nullable=False,
690 group="Retrieval",
691 help_text="Minimum cosine similarity needed before the concept graph boosts a chunk",
692 ),
693 "concept_max_per_chunk": SettingDef(
694 int,
695 nullable=False,
696 group="Retrieval",
697 help_text="Maximum concept tags stored per chunk (caps graph density)",
698 ),
699 "documents_dir": SettingDef(
700 str,
701 nullable=False,
702 group="System",
703 help_text="Local documents root that lilbee sync ingests (blank = data_root/documents)",
704 ),
705 "vault_base": SettingDef(
706 str,
707 nullable=True,
708 group="System",
709 help_text="Markdown vault root; results carry a vault-relative path (blank = none)",
710 ),
711 "sse_heartbeat_interval": SettingDef(
712 float,
713 nullable=False,
714 group="System",
715 help_text="Seconds between SSE keep-alive frames sent to idle HTTP stream clients",
716 hidden=True,
717 ),
718 "llm_provider": SettingDef(
719 str,
720 nullable=False,
721 group="API-Keys",
722 choices=("auto", "llama-cpp", "remote"),
723 help_text=(
724 "Provider routing: auto picks the first key present; force a specific one when set"
725 ),
726 ),
727 "remote_base_url": SettingDef(
728 str,
729 nullable=False,
730 group="API-Keys",
731 help_text="OpenAI-compatible base URL (Ollama default: http://localhost:11434)",
732 ),
733 "wiki_summary_max_tokens": SettingDef(
734 int,
735 nullable=False,
736 group="Wiki",
737 help_text="Maximum tokens generated per wiki page",
738 ),
739 "wiki_temperature": SettingDef(
740 float,
741 nullable=False,
742 group="Wiki",
743 help_text="Temperature used for wiki page synthesis (low = stay close to sources)",
744 ),
745}