Coverage for src / lilbee / app / settings_map.py: 100%
43 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Shared settings map for interactive configuration."""
3from __future__ import annotations
5from dataclasses import dataclass, field
6from enum import StrEnum
8from pydantic_core import PydanticUndefined
10from lilbee.app.themes import DARK_THEMES
11from lilbee.core.config import cfg
12from lilbee.core.config.enums import (
13 ChatMode,
14 ClustererBackend,
15 CrawlRenderMode,
16 KvCacheType,
17 WikiEntityMode,
18)
21class RenderStyle(StrEnum):
22 """How a setting is displayed in /settings."""
24 COMPACT = "compact"
25 FULL = "full"
26 LIST_COLLAPSED = "list_collapsed"
27 MULTILINE = "multiline"
30class SettingGroup(StrEnum):
31 """Logical bucket names rendered by ``/settings`` and ``settings_list``."""
33 MODELS = "Models"
34 GENERATION = "Generation"
35 RETRIEVAL = "Retrieval"
36 INGEST = "Ingest"
37 WIKI = "Wiki"
38 MEMORY = "Memory"
39 CRAWLING = "Crawling"
40 LOCAL_SERVERS = "Local-Servers"
41 API_KEYS = "API-Keys"
42 SYSTEM = "System"
43 DISPLAY = "Display"
44 GENERAL = "General"
47@dataclass(frozen=True)
48class SettingDef:
49 """Metadata for an interactive setting.
51 ``writable`` is a TUI rendering hint: fields marked ``writable=False``
52 (the model role slots) get a dedicated picker rather than an inline
53 editor, and the ``/set`` slash command refuses them. The actual
54 write contract for HTTP / MCP / programmatic surfaces lives in
55 ``config_meta.WRITABLE_CONFIG_FIELDS`` + ``MODEL_ROLE_FIELDS`` and
56 is enforced by ``app.settings.apply_settings_update``.
58 ``hidden`` keeps the setting out of the TUI settings screen while
59 leaving it reachable via ``lilbee set`` and the ``LILBEE_*`` env
60 var: use it for transport/server knobs that aren't relevant to a
61 typical TUI session.
62 """
64 type: type
65 nullable: bool
66 writable: bool = True
67 render: RenderStyle = field(default=RenderStyle.COMPACT)
68 group: SettingGroup = SettingGroup.GENERAL
69 help_text: str = ""
70 choices: tuple[str, ...] | None = None
71 hidden: bool = False
74def get_default(key: str) -> object:
75 """Return the cfg default for a setting key."""
76 field_info = type(cfg).model_fields[key]
77 if field_info.default_factory is not None:
78 return field_info.default_factory() # type: ignore[call-arg]
79 if field_info.default is PydanticUndefined:
80 return None
81 return field_info.default
84SETTINGS_MAP: dict[str, SettingDef] = {
85 "chat_model": SettingDef(
86 str,
87 nullable=False,
88 writable=False,
89 group=SettingGroup.MODELS,
90 help_text="LLM used for chat generation (vision and reranking are separate slots)",
91 ),
92 "vision_model": SettingDef(
93 str,
94 nullable=True,
95 writable=False,
96 group=SettingGroup.MODELS,
97 help_text="Vision model for scanned PDF OCR (empty = disabled; Tesseract only)",
98 ),
99 "enable_ocr": SettingDef(
100 bool,
101 nullable=True,
102 group=SettingGroup.INGEST,
103 help_text="Vision OCR for scanned PDFs (empty = auto-detect from vision_model)",
104 ),
105 "ocr_timeout": SettingDef(
106 float,
107 nullable=False,
108 group=SettingGroup.INGEST,
109 help_text="Per-page timeout in seconds for vision OCR (0 = no limit)",
110 ),
111 "vision_load_budget_s": SettingDef(
112 float,
113 nullable=False,
114 group=SettingGroup.INGEST,
115 help_text=(
116 "Wall-clock seconds reserved for the vision worker to load the"
117 " model. Total PDF-OCR budget = load_budget + ocr_timeout * pages."
118 ),
119 ),
120 "semantic_chunking": SettingDef(
121 bool,
122 nullable=False,
123 group=SettingGroup.INGEST,
124 help_text="Opt-in topic-aware chunker (default off; may fragment numbered procedures)",
125 ),
126 "topic_threshold": SettingDef(
127 float,
128 nullable=False,
129 group=SettingGroup.INGEST,
130 help_text="Topic-boundary similarity threshold, 0.0-1.0, used when semantic chunking is on",
131 ),
132 "embedding_model": SettingDef(
133 str,
134 nullable=False,
135 writable=False,
136 group=SettingGroup.MODELS,
137 help_text="Model used to embed document chunks",
138 ),
139 "reranker_model": SettingDef(
140 str,
141 nullable=True,
142 writable=False,
143 group=SettingGroup.MODELS,
144 help_text="Cross-encoder model for result reranking",
145 ),
146 "temperature": SettingDef(
147 float,
148 nullable=True,
149 group=SettingGroup.GENERATION,
150 help_text="Sampling temperature (higher = more creative)",
151 ),
152 "top_p": SettingDef(
153 float,
154 nullable=True,
155 group=SettingGroup.GENERATION,
156 help_text="Nucleus sampling cutoff probability",
157 ),
158 "top_k_sampling": SettingDef(
159 int,
160 nullable=True,
161 group=SettingGroup.GENERATION,
162 help_text="Top-K sampling: number of tokens to consider",
163 ),
164 "repeat_penalty": SettingDef(
165 float,
166 nullable=True,
167 group=SettingGroup.GENERATION,
168 help_text="Penalty for repeating tokens",
169 ),
170 "num_ctx": SettingDef(
171 int,
172 nullable=True,
173 group=SettingGroup.GENERATION,
174 help_text=(
175 "Context window size in tokens. Leave empty to size automatically "
176 "(aims for chat_n_ctx_target, ceiling at num_ctx_max or training_ctx)."
177 ),
178 ),
179 "num_ctx_max": SettingDef(
180 int,
181 nullable=True,
182 group=SettingGroup.GENERATION,
183 help_text=(
184 "Explicit ceiling for the dynamic context picker. Leave empty to "
185 "use the model's training_ctx from GGUF metadata as the only "
186 "ceiling. Set to cap below training_ctx (saves KV memory)."
187 ),
188 ),
189 "chat_n_ctx_target": SettingDef(
190 int,
191 nullable=False,
192 group=SettingGroup.GENERATION,
193 help_text=(
194 "Working context the dynamic picker aims for. Fits a RAG turn "
195 "with reasoning headroom; raise for long-document chat."
196 ),
197 ),
198 "flash_attention": SettingDef(
199 bool,
200 nullable=True,
201 group=SettingGroup.GENERATION,
202 help_text=(
203 "Flash attention. Empty (auto) tries it on with a fallback for older "
204 "llama-cpp-python builds; resolves the V-cache padding warning on "
205 "models with uneven per-layer V dims."
206 ),
207 ),
208 "kv_cache_type": SettingDef(
209 str,
210 nullable=False,
211 group=SettingGroup.GENERATION,
212 help_text=(
213 "KV cache element type. q8_0 / q4_0 halve or quarter cache memory "
214 "but require flash attention to be enabled."
215 ),
216 choices=tuple(t.value for t in KvCacheType),
217 ),
218 "n_gpu_layers": SettingDef(
219 int,
220 nullable=True,
221 group=SettingGroup.GENERATION,
222 help_text=(
223 "Layers to offload to GPU. Empty = all (recommended), 0 = CPU only, "
224 "positive int = partial offload for tight VRAM."
225 ),
226 ),
227 "gpu_devices": SettingDef(
228 str,
229 nullable=True,
230 group=SettingGroup.GENERATION,
231 help_text=(
232 "Restrict llama.cpp to specific GPU indexes on dual-GPU machines "
233 "(e.g. NVIDIA dGPU + integrated). Comma-separated, like '0' or '0,1'. "
234 "Applies to Vulkan, CUDA, and ROCm. Requires a restart to take effect."
235 ),
236 ),
237 "main_gpu": SettingDef(
238 int,
239 nullable=True,
240 group=SettingGroup.GENERATION,
241 help_text=(
242 "Primary GPU index for llama.cpp when multiple devices are visible. "
243 "Empty = let llama.cpp pick (index 0). Set this together with "
244 "gpu_devices to pin inference to a specific card."
245 ),
246 ),
247 "seed": SettingDef(
248 int,
249 nullable=True,
250 group=SettingGroup.GENERATION,
251 help_text="Random seed for reproducible output",
252 ),
253 "rag_system_prompt": SettingDef(
254 str,
255 nullable=False,
256 render=RenderStyle.MULTILINE,
257 group=SettingGroup.GENERATION,
258 help_text="System prompt sent when answering with retrieved context",
259 ),
260 "general_system_prompt": SettingDef(
261 str,
262 nullable=False,
263 render=RenderStyle.MULTILINE,
264 group=SettingGroup.GENERATION,
265 help_text="System prompt sent when there are no documents to ground the answer",
266 ),
267 "chat_mode": SettingDef(
268 str,
269 nullable=False,
270 group=SettingGroup.GENERATION,
271 choices=tuple(m.value for m in ChatMode),
272 help_text="search runs every chat turn through document retrieval; chat skips it",
273 ),
274 "top_k": SettingDef(
275 int,
276 nullable=False,
277 group=SettingGroup.RETRIEVAL,
278 help_text="Number of chunks returned by search",
279 ),
280 "rerank_candidates": SettingDef(
281 int,
282 nullable=False,
283 group=SettingGroup.RETRIEVAL,
284 help_text="Candidate pool size for reranking",
285 ),
286 "show_reasoning": SettingDef(
287 bool,
288 nullable=False,
289 group=SettingGroup.DISPLAY,
290 help_text="Show model reasoning/thinking tokens in output",
291 ),
292 "lilbee_name": SettingDef(
293 str,
294 nullable=False,
295 group=SettingGroup.DISPLAY,
296 help_text=(
297 "Human-readable label for this lilbee, shown in the status bar. "
298 "Empty falls back to 'global' for the platform default dir or "
299 "to the project path (~-substituted and left-truncated)."
300 ),
301 ),
302 "show_lilbee_path": SettingDef(
303 bool,
304 nullable=False,
305 group=SettingGroup.DISPLAY,
306 help_text=(
307 "Show the full absolute path in the status bar: expands 'global' "
308 "to its on-disk path and skips ~ substitution / truncation."
309 ),
310 ),
311 "theme": SettingDef(
312 str,
313 nullable=False,
314 group=SettingGroup.DISPLAY,
315 help_text="TUI color theme. Cycle with Ctrl+T; the active theme persists across sessions.",
316 choices=tuple(DARK_THEMES),
317 ),
318 "wiki": SettingDef(
319 bool,
320 nullable=False,
321 group=SettingGroup.WIKI,
322 help_text="Enable the wiki layer (synthesis pages with citations)",
323 ),
324 "wiki_dir": SettingDef(
325 str,
326 nullable=False,
327 writable=False,
328 group=SettingGroup.WIKI,
329 help_text=(
330 "Directory under data_root where wiki pages live (set via env / config.toml only)"
331 ),
332 ),
333 "wiki_prune_raw": SettingDef(
334 bool,
335 nullable=False,
336 group=SettingGroup.WIKI,
337 help_text="Delete raw chunks after summarizing into the wiki",
338 ),
339 "wiki_embedding_faithfulness_threshold": SettingDef(
340 float,
341 nullable=False,
342 group=SettingGroup.WIKI,
343 help_text=(
344 "Minimum cosine similarity (0-1) between a generated page and "
345 "the mean of its source chunk vectors before publishing. "
346 "Pages below the threshold route to drafts/."
347 ),
348 ),
349 "wiki_stale_citation_threshold": SettingDef(
350 float,
351 nullable=False,
352 group=SettingGroup.WIKI,
353 help_text="Fraction of stale citations that triggers page regeneration",
354 ),
355 "wiki_drift_threshold": SettingDef(
356 float,
357 nullable=False,
358 group=SettingGroup.WIKI,
359 help_text="Max fraction of changed lines before regeneration requires review",
360 ),
361 "wiki_clusterer": SettingDef(
362 str,
363 nullable=False,
364 group=SettingGroup.WIKI,
365 help_text="Synthesis clusterer backend (embedding or concepts)",
366 choices=tuple(b.value for b in ClustererBackend),
367 ),
368 "wiki_entity_mode": SettingDef(
369 str,
370 nullable=False,
371 group=SettingGroup.WIKI,
372 help_text=(
373 "Entity extraction strategy "
374 "(ner_entities = default, typed NER entities; "
375 "plus_llm_types = NER + LLM-proposed schema; "
376 "llm_tagged = LLM tags every chunk)"
377 ),
378 choices=tuple(m.value for m in WikiEntityMode),
379 ),
380 "wiki_entity_min_mentions": SettingDef(
381 int,
382 nullable=False,
383 group=SettingGroup.WIKI,
384 help_text="Minimum chunk mentions before an entity or concept gets its own page",
385 ),
386 "wiki_concept_max_chunks_per_page": SettingDef(
387 int,
388 nullable=False,
389 group=SettingGroup.WIKI,
390 help_text="Maximum chunks passed into each concept or entity page generation call",
391 ),
392 "wiki_related_max": SettingDef(
393 int,
394 nullable=False,
395 group=SettingGroup.WIKI,
396 help_text="Maximum related concepts listed in the `## Related` section of each page",
397 ),
398 "wiki_ingest_update_cap": SettingDef(
399 int,
400 nullable=False,
401 group=SettingGroup.WIKI,
402 help_text=(
403 "Touched-page cap for auto-update after sync. "
404 "Beyond this count, run `lilbee wiki update` manually."
405 ),
406 ),
407 "wiki_summary_prompt": SettingDef(
408 str,
409 nullable=False,
410 render=RenderStyle.FULL,
411 group=SettingGroup.WIKI,
412 help_text=(
413 "Prompt for per-source summary pages. "
414 "Must keep the {source_name} and {chunks_text} placeholders."
415 ),
416 ),
417 "wiki_synthesis_prompt": SettingDef(
418 str,
419 nullable=False,
420 render=RenderStyle.FULL,
421 group=SettingGroup.WIKI,
422 help_text=(
423 "Prompt for cross-source synthesis pages. "
424 "Must keep {topic}, {source_list}, and {chunks_text}."
425 ),
426 ),
427 "wiki_entity_batch_prompt": SettingDef(
428 str,
429 nullable=False,
430 render=RenderStyle.FULL,
431 group=SettingGroup.WIKI,
432 help_text=(
433 "Prompt for the per-source batched call. "
434 "Must keep {source}, {entity_list}, {chunks_text}, and {concept_instruction}."
435 ),
436 ),
437 "wiki_extract_concepts": SettingDef(
438 bool,
439 nullable=False,
440 group=SettingGroup.WIKI,
441 help_text=(
442 "Whether the per-source batched call asks the LLM to curate concept pages "
443 "alongside the pre-extracted entity list."
444 ),
445 ),
446 "wiki_batch_min_chunks": SettingDef(
447 int,
448 nullable=False,
449 group=SettingGroup.WIKI,
450 help_text=(
451 "Minimum chunks a source must contribute before its batched call includes "
452 "concept curation. Sources below the floor skip the concept-curation "
453 "instruction; sources with zero entities AND below the floor are skipped entirely."
454 ),
455 ),
456 "wiki_clusterer_k": SettingDef(
457 int,
458 nullable=False,
459 group=SettingGroup.WIKI,
460 help_text="Mutual-kNN neighborhood size for the clusterer (0 = auto)",
461 ),
462 "memory_enabled": SettingDef(
463 bool,
464 nullable=False,
465 group=SettingGroup.MEMORY,
466 help_text="Master switch for long-term chat memory (off by default)",
467 ),
468 "memory_auto_extract": SettingDef(
469 bool,
470 nullable=False,
471 group=SettingGroup.MEMORY,
472 help_text="Auto-save durable facts and preferences from each TUI turn (needs memory on)",
473 ),
474 "memory_top_k": SettingDef(
475 int,
476 nullable=False,
477 group=SettingGroup.MEMORY,
478 help_text="Maximum facts recalled into context per turn",
479 ),
480 "memory_max_distance": SettingDef(
481 float,
482 nullable=False,
483 group=SettingGroup.MEMORY,
484 help_text="Recall cutoff distance, 0.0-1.0 (lower is stricter)",
485 ),
486 "memory_token_budget": SettingDef(
487 int,
488 nullable=False,
489 group=SettingGroup.MEMORY,
490 help_text="Token cap on the recalled-memory block added to the prompt",
491 ),
492 "memory_max_per_owner": SettingDef(
493 int,
494 nullable=False,
495 group=SettingGroup.MEMORY,
496 help_text="Soft cap before the oldest memories are evicted",
497 hidden=True,
498 ),
499 "memory_dedup_distance": SettingDef(
500 float,
501 nullable=False,
502 group=SettingGroup.MEMORY,
503 help_text="Near-duplicate distance below which a new memory updates the old",
504 hidden=True,
505 ),
506 "crawl_max_depth": SettingDef(
507 int,
508 nullable=True,
509 group=SettingGroup.CRAWLING,
510 help_text="Optional recursion-depth cap (blank = no cap; per-crawl values win)",
511 ),
512 "crawl_render_mode": SettingDef(
513 str,
514 nullable=False,
515 group=SettingGroup.CRAWLING,
516 help_text=(
517 "How crawls fetch pages. http = lightweight, no browser (default, best "
518 "for static and server-rendered sites). browser = Chromium with "
519 "JavaScript enabled for client-rendered sites, at much higher memory cost."
520 ),
521 choices=tuple(m.value for m in CrawlRenderMode),
522 ),
523 "crawl_browser_recycle_pages": SettingDef(
524 int,
525 nullable=False,
526 group=SettingGroup.CRAWLING,
527 help_text=(
528 "Browser mode: recycle the Chromium process every N pages to cap memory "
529 "growth on long crawls (0 = never recycle)."
530 ),
531 ),
532 "crawl_browser_extra_args": SettingDef(
533 list,
534 nullable=False,
535 group=SettingGroup.CRAWLING,
536 render=RenderStyle.LIST_COLLAPSED,
537 help_text=(
538 "Browser mode: extra Chromium launch flags, one per line. "
539 "Defaults trim shared-memory and GPU use."
540 ),
541 ),
542 "crawl_max_pages": SettingDef(
543 int,
544 nullable=True,
545 group=SettingGroup.CRAWLING,
546 help_text="Optional global cap on total pages per crawl (blank = no cap).",
547 ),
548 "crawl_safety_max_pages": SettingDef(
549 int,
550 nullable=False,
551 group=SettingGroup.CRAWLING,
552 help_text="Default page bound for an unbounded crawl, so a hostile site cannot "
553 "exhaust the disk. An explicit max-pages overrides it; raise this to crawl "
554 "larger sites unbounded.",
555 ),
556 "crawl_timeout": SettingDef(
557 int,
558 nullable=False,
559 group=SettingGroup.CRAWLING,
560 help_text="Per-page fetch timeout in seconds",
561 ),
562 "crawl_sync_interval": SettingDef(
563 int,
564 nullable=False,
565 group=SettingGroup.CRAWLING,
566 help_text="Seconds between periodic re-syncs during a crawl (0 = sync only at end)",
567 ),
568 "crawl_mean_delay": SettingDef(
569 float,
570 nullable=False,
571 group=SettingGroup.CRAWLING,
572 help_text="Seconds between in-flight requests within a single crawl",
573 ),
574 "crawl_max_delay_range": SettingDef(
575 float,
576 nullable=False,
577 group=SettingGroup.CRAWLING,
578 help_text="Random jitter (seconds) added on top of mean delay",
579 ),
580 "crawl_concurrent_requests": SettingDef(
581 int,
582 nullable=False,
583 group=SettingGroup.CRAWLING,
584 help_text="Concurrent in-flight URLs within one crawl",
585 ),
586 "crawl_retry_on_rate_limit": SettingDef(
587 bool,
588 nullable=False,
589 group=SettingGroup.CRAWLING,
590 help_text="Enable per-domain backoff and retries on HTTP 429/503",
591 ),
592 "crawl_retry_base_delay_min": SettingDef(
593 float,
594 nullable=False,
595 group=SettingGroup.CRAWLING,
596 help_text="Minimum base-delay (seconds) on rate-limit responses",
597 ),
598 "crawl_retry_base_delay_max": SettingDef(
599 float,
600 nullable=False,
601 group=SettingGroup.CRAWLING,
602 help_text="Maximum base-delay (seconds) on rate-limit responses",
603 ),
604 "crawl_retry_max_backoff": SettingDef(
605 float,
606 nullable=False,
607 group=SettingGroup.CRAWLING,
608 help_text="Upper bound on any single backoff wait (seconds)",
609 ),
610 "crawl_retry_max_attempts": SettingDef(
611 int,
612 nullable=False,
613 group=SettingGroup.CRAWLING,
614 help_text="Retry count per URL when a rate-limit code comes back",
615 ),
616 "crawl_exclude_patterns": SettingDef(
617 list,
618 nullable=False,
619 group=SettingGroup.CRAWLING,
620 render=RenderStyle.LIST_COLLAPSED,
621 help_text=(
622 "Regex patterns that skip URLs at link-discovery time during "
623 "recursive crawls. One per line."
624 ),
625 ),
626 "openrouter_api_key": SettingDef(
627 str,
628 nullable=False,
629 group=SettingGroup.API_KEYS,
630 help_text="OpenRouter API key (enables frontier models in chat picker)",
631 ),
632 "gemini_api_key": SettingDef(
633 str,
634 nullable=False,
635 group=SettingGroup.API_KEYS,
636 help_text="Google Gemini API key (enables frontier models in chat picker)",
637 ),
638 "anthropic_api_key": SettingDef(
639 str,
640 nullable=False,
641 group=SettingGroup.API_KEYS,
642 help_text="Anthropic API key (enables frontier models in chat picker)",
643 ),
644 "openai_api_key": SettingDef(
645 str,
646 nullable=False,
647 group=SettingGroup.API_KEYS,
648 help_text="OpenAI API key (enables frontier models in chat picker)",
649 ),
650 "mistral_api_key": SettingDef(
651 str,
652 nullable=False,
653 group=SettingGroup.API_KEYS,
654 help_text="Mistral API key (enables frontier models in chat picker)",
655 ),
656 "deepseek_api_key": SettingDef(
657 str,
658 nullable=False,
659 group=SettingGroup.API_KEYS,
660 help_text="DeepSeek API key (enables frontier models in chat picker)",
661 ),
662 "hf_token": SettingDef(
663 str,
664 nullable=False,
665 group=SettingGroup.SYSTEM,
666 help_text=(
667 "HuggingFace access token. Avoids the unauthenticated download "
668 "rate limit and unlocks gated repos. Stored in plain text in "
669 "config.toml. Env vars (LILBEE_HF_TOKEN, HF_TOKEN) override."
670 ),
671 ),
672 "chunk_size": SettingDef(
673 int,
674 nullable=False,
675 group=SettingGroup.INGEST,
676 help_text="Document chunk size in tokens (changes invalidate the index)",
677 ),
678 "chunk_overlap": SettingDef(
679 int,
680 nullable=False,
681 group=SettingGroup.INGEST,
682 help_text="Tokens of overlap between adjacent chunks (preserves context across boundaries)",
683 ),
684 "tesseract_timeout": SettingDef(
685 float,
686 nullable=False,
687 group=SettingGroup.INGEST,
688 help_text="Per-page Tesseract timeout in seconds (used when no vision model is set)",
689 ),
690 "worker_pool_call_timeout_s": SettingDef(
691 float,
692 nullable=False,
693 group=SettingGroup.INGEST,
694 help_text=(
695 "Per-call deadline for one worker-pool round-trip in seconds. "
696 "Raise this for very large embed batches on slow machines"
697 ),
698 ),
699 "worker_pool_eager_start": SettingDef(
700 bool,
701 nullable=False,
702 group=SettingGroup.INGEST,
703 help_text=(
704 "Spawn every registered worker at TUI startup instead of on first use. "
705 "Trades 1-3 seconds of cold-start per role for first-call latency"
706 ),
707 ),
708 "worker_pool_max_idle_s": SettingDef(
709 float,
710 nullable=False,
711 group=SettingGroup.INGEST,
712 help_text=(
713 "Shut a worker down after this many seconds idle to free RAM/VRAM. "
714 "0 disables idle reaping"
715 ),
716 ),
717 "max_tokens": SettingDef(
718 int,
719 nullable=True,
720 group=SettingGroup.GENERATION,
721 help_text="Hard cap on generated tokens per response (blank = no cap)",
722 ),
723 "max_reasoning_chars": SettingDef(
724 int,
725 nullable=False,
726 group=SettingGroup.GENERATION,
727 help_text=(
728 "Maximum reasoning characters before lilbee forces the model to answer "
729 "(0 = unlimited; per-model overrides apply on top)"
730 ),
731 ),
732 "model_keep_alive": SettingDef(
733 int,
734 nullable=False,
735 group=SettingGroup.GENERATION,
736 help_text="Seconds the loaded model stays warm between calls (0 = unload immediately)",
737 ),
738 "gpu_memory_fraction": SettingDef(
739 float,
740 nullable=False,
741 group=SettingGroup.GENERATION,
742 help_text="Fraction of GPU memory the model is allowed to claim (0.1-1.0)",
743 ),
744 "candidate_multiplier": SettingDef(
745 int,
746 nullable=False,
747 group=SettingGroup.RETRIEVAL,
748 help_text="Candidate-pool multiplier over top_k before reranking",
749 ),
750 "ann_index_threshold": SettingDef(
751 int,
752 nullable=False,
753 group=SettingGroup.RETRIEVAL,
754 help_text="Chunk count to start building an ANN vector index (0 = always flat search)",
755 ),
756 "max_distance": SettingDef(
757 float,
758 nullable=False,
759 group=SettingGroup.RETRIEVAL,
760 help_text="Maximum vector distance for retrieval matches (lower = stricter)",
761 ),
762 "min_relevance_score": SettingDef(
763 float,
764 nullable=False,
765 group=SettingGroup.RETRIEVAL,
766 help_text="Minimum RRF relevance score for hybrid search results (0.0 = no filter)",
767 ),
768 "max_context_sources": SettingDef(
769 int,
770 nullable=False,
771 group=SettingGroup.RETRIEVAL,
772 help_text="Maximum unique sources contributing chunks to a single answer",
773 ),
774 "diversity_max_per_source": SettingDef(
775 int,
776 nullable=False,
777 group=SettingGroup.RETRIEVAL,
778 help_text="Maximum chunks accepted from any one source (caps source dominance)",
779 ),
780 "mmr_lambda": SettingDef(
781 float,
782 nullable=False,
783 group=SettingGroup.RETRIEVAL,
784 help_text=(
785 "MMR lambda balancing relevance vs diversity (0 = max diversity, 1 = max relevance)"
786 ),
787 ),
788 "temporal_filtering": SettingDef(
789 bool,
790 nullable=False,
791 group=SettingGroup.RETRIEVAL,
792 help_text="Detect temporal queries and bias retrieval toward recent chunks",
793 ),
794 "hyde": SettingDef(
795 bool,
796 nullable=False,
797 group=SettingGroup.RETRIEVAL,
798 help_text="Use HyDE (hypothetical answer expansion) to broaden retrieval",
799 ),
800 "hyde_weight": SettingDef(
801 float,
802 nullable=False,
803 group=SettingGroup.RETRIEVAL,
804 help_text="Weight on the HyDE-generated query vector when blending with the original",
805 ),
806 "query_expansion_count": SettingDef(
807 int,
808 nullable=False,
809 group=SettingGroup.RETRIEVAL,
810 help_text="Number of paraphrase expansions per query (0 disables expansion)",
811 ),
812 "expansion_similarity_threshold": SettingDef(
813 float,
814 nullable=False,
815 group=SettingGroup.RETRIEVAL,
816 help_text="Minimum cosine similarity an expansion must keep with the original query",
817 ),
818 "expansion_short_query_tokens": SettingDef(
819 int,
820 nullable=False,
821 group=SettingGroup.RETRIEVAL,
822 help_text="Queries at or below this token count skip expansion (saves a model call)",
823 ),
824 "expansion_guardrails": SettingDef(
825 bool,
826 nullable=False,
827 group=SettingGroup.RETRIEVAL,
828 help_text="Drop expansions that diverge from the original intent",
829 ),
830 "adaptive_threshold_step": SettingDef(
831 float,
832 nullable=False,
833 group=SettingGroup.RETRIEVAL,
834 help_text="Step size for adaptive relevance-score relaxation when initial recall is empty",
835 ),
836 "concept_graph": SettingDef(
837 bool,
838 nullable=False,
839 group=SettingGroup.RETRIEVAL,
840 help_text="Boost retrieval scores for chunks that share concepts with the query",
841 ),
842 "concept_boost_weight": SettingDef(
843 float,
844 nullable=False,
845 group=SettingGroup.RETRIEVAL,
846 help_text="Maximum boost (0-1) the concept graph can add to a chunk's relevance",
847 ),
848 "concept_boost_floor": SettingDef(
849 float,
850 nullable=False,
851 group=SettingGroup.RETRIEVAL,
852 help_text="Minimum cosine similarity needed before the concept graph boosts a chunk",
853 ),
854 "concept_max_per_chunk": SettingDef(
855 int,
856 nullable=False,
857 group=SettingGroup.RETRIEVAL,
858 help_text="Maximum concept tags stored per chunk (caps graph density)",
859 ),
860 "documents_dir": SettingDef(
861 str,
862 nullable=False,
863 group=SettingGroup.SYSTEM,
864 help_text="Local documents root that lilbee sync ingests (blank = data_root/documents)",
865 ),
866 "vault_base": SettingDef(
867 str,
868 nullable=True,
869 group=SettingGroup.SYSTEM,
870 help_text="Markdown vault root; results carry a vault-relative path (blank = none)",
871 ),
872 "sse_heartbeat_interval": SettingDef(
873 float,
874 nullable=False,
875 group=SettingGroup.SYSTEM,
876 help_text="Seconds between SSE keep-alive frames sent to idle HTTP stream clients",
877 hidden=True,
878 ),
879 "llm_provider": SettingDef(
880 str,
881 nullable=False,
882 group=SettingGroup.API_KEYS,
883 choices=("auto", "llama-cpp", "remote"),
884 help_text=(
885 "Provider routing: auto picks the first key present; force a specific one when set"
886 ),
887 ),
888 "ollama_base_url": SettingDef(
889 str,
890 nullable=False,
891 group=SettingGroup.LOCAL_SERVERS,
892 help_text="Ollama server URL (blank uses http://localhost:11434)",
893 ),
894 "lm_studio_base_url": SettingDef(
895 str,
896 nullable=False,
897 group=SettingGroup.LOCAL_SERVERS,
898 help_text="LM Studio server URL (blank uses http://localhost:1234/v1)",
899 ),
900 "wiki_summary_max_tokens": SettingDef(
901 int,
902 nullable=False,
903 group=SettingGroup.WIKI,
904 help_text="Maximum tokens generated per wiki page",
905 ),
906 "wiki_temperature": SettingDef(
907 float,
908 nullable=False,
909 group=SettingGroup.WIKI,
910 help_text="Temperature used for wiki page synthesis (low = stay close to sources)",
911 ),
912}