Coverage for src/lilbee/app/settings

1"""Shared settings map for interactive configuration."""

3from __future__ import annotations

5from dataclasses import dataclass, field

6from enum import StrEnum

8from pydantic_core import PydanticUndefined

10from lilbee.app.themes import DARK_THEMES

11from lilbee.core.config import cfg

12from lilbee.core.config.enums import (

13 ChatMode,

14 ClustererBackend,

15 CrawlRenderMode,

16 KvCacheType,

17 WikiEntityMode,

18)

21class RenderStyle(StrEnum):

22 """How a setting is displayed in /settings."""

24 COMPACT = "compact"

25 FULL = "full"

26 LIST_COLLAPSED = "list_collapsed"

27 MULTILINE = "multiline"

30class SettingGroup(StrEnum):

31 """Logical bucket names rendered by ``/settings`` and ``settings_list``."""

33 MODELS = "Models"

34 GENERATION = "Generation"

35 RETRIEVAL = "Retrieval"

36 INGEST = "Ingest"

37 WIKI = "Wiki"

38 MEMORY = "Memory"

39 CRAWLING = "Crawling"

40 LOCAL_SERVERS = "Local-Servers"

41 API_KEYS = "API-Keys"

42 SYSTEM = "System"

43 DISPLAY = "Display"

44 GENERAL = "General"

47@dataclass(frozen=True)

48class SettingDef:

49 """Metadata for an interactive setting.

51 ``writable`` is a TUI rendering hint: fields marked ``writable=False``

52 (the model role slots) get a dedicated picker rather than an inline

53 editor, and the ``/set`` slash command refuses them. The actual

54 write contract for HTTP / MCP / programmatic surfaces lives in

55 ``config_meta.WRITABLE_CONFIG_FIELDS`` + ``MODEL_ROLE_FIELDS`` and

56 is enforced by ``app.settings.apply_settings_update``.

58 ``hidden`` keeps the setting out of the TUI settings screen while

59 leaving it reachable via ``lilbee set`` and the ``LILBEE_*`` env

60 var: use it for transport/server knobs that aren't relevant to a

61 typical TUI session.

62 """

64 type: type

65 nullable: bool

66 writable: bool = True

67 render: RenderStyle = field(default=RenderStyle.COMPACT)

68 group: SettingGroup = SettingGroup.GENERAL

69 help_text: str = ""

70 choices: tuple[str, ...] | None = None

71 hidden: bool = False

74def get_default(key: str) -> object:

75 """Return the cfg default for a setting key."""

76 field_info = type(cfg).model_fields[key]

77 if field_info.default_factory is not None:

78 return field_info.default_factory() # type: ignore[call-arg]

79 if field_info.default is PydanticUndefined:

80 return None

81 return field_info.default

84SETTINGS_MAP: dict[str, SettingDef] = {

85 "chat_model": SettingDef(

86 str,

87 nullable=False,

88 writable=False,

89 group=SettingGroup.MODELS,

90 help_text="LLM used for chat generation (vision and reranking are separate slots)",

91 ),

92 "vision_model": SettingDef(

93 str,

94 nullable=True,

95 writable=False,

96 group=SettingGroup.MODELS,

97 help_text="Vision model for scanned PDF OCR (empty = disabled; Tesseract only)",

98 ),

99 "enable_ocr": SettingDef(

100 bool,

101 nullable=True,

102 group=SettingGroup.INGEST,

103 help_text="Vision OCR for scanned PDFs (empty = auto-detect from vision_model)",

104 ),

105 "ocr_timeout": SettingDef(

106 float,

107 nullable=False,

108 group=SettingGroup.INGEST,

109 help_text="Per-page timeout in seconds for vision OCR (0 = no limit)",

110 ),

111 "vision_load_budget_s": SettingDef(

112 float,

113 nullable=False,

114 group=SettingGroup.INGEST,

115 help_text=(

116 "Wall-clock seconds reserved for the vision worker to load the"

117 " model. Total PDF-OCR budget = load_budget + ocr_timeout * pages."

118 ),

119 ),

120 "semantic_chunking": SettingDef(

121 bool,

122 nullable=False,

123 group=SettingGroup.INGEST,

124 help_text="Opt-in topic-aware chunker (default off; may fragment numbered procedures)",

125 ),

126 "topic_threshold": SettingDef(

127 float,

128 nullable=False,

129 group=SettingGroup.INGEST,

130 help_text="Topic-boundary similarity threshold, 0.0-1.0, used when semantic chunking is on",

131 ),

132 "embedding_model": SettingDef(

133 str,

134 nullable=False,

135 writable=False,

136 group=SettingGroup.MODELS,

137 help_text="Model used to embed document chunks",

138 ),

139 "reranker_model": SettingDef(

140 str,

141 nullable=True,

142 writable=False,

143 group=SettingGroup.MODELS,

144 help_text="Cross-encoder model for result reranking",

145 ),

146 "temperature": SettingDef(

147 float,

148 nullable=True,

149 group=SettingGroup.GENERATION,

150 help_text="Sampling temperature (higher = more creative)",

151 ),

152 "top_p": SettingDef(

153 float,

154 nullable=True,

155 group=SettingGroup.GENERATION,

156 help_text="Nucleus sampling cutoff probability",

157 ),

158 "top_k_sampling": SettingDef(

159 int,

160 nullable=True,

161 group=SettingGroup.GENERATION,

162 help_text="Top-K sampling: number of tokens to consider",

163 ),

164 "repeat_penalty": SettingDef(

165 float,

166 nullable=True,

167 group=SettingGroup.GENERATION,

168 help_text="Penalty for repeating tokens",

169 ),

170 "num_ctx": SettingDef(

171 int,

172 nullable=True,

173 group=SettingGroup.GENERATION,

174 help_text=(

175 "Context window size in tokens. Leave empty to size automatically "

176 "(aims for chat_n_ctx_target, ceiling at num_ctx_max or training_ctx)."

177 ),

178 ),

179 "num_ctx_max": SettingDef(

180 int,

181 nullable=True,

182 group=SettingGroup.GENERATION,

183 help_text=(

184 "Explicit ceiling for the dynamic context picker. Leave empty to "

185 "use the model's training_ctx from GGUF metadata as the only "

186 "ceiling. Set to cap below training_ctx (saves KV memory)."

187 ),

188 ),

189 "chat_n_ctx_target": SettingDef(

190 int,

191 nullable=False,

192 group=SettingGroup.GENERATION,

193 help_text=(

194 "Working context the dynamic picker aims for. Fits a RAG turn "

195 "with reasoning headroom; raise for long-document chat."

196 ),

197 ),

198 "flash_attention": SettingDef(

199 bool,

200 nullable=True,

201 group=SettingGroup.GENERATION,

202 help_text=(

203 "Flash attention. Empty (auto) tries it on with a fallback for older "

204 "llama-cpp-python builds; resolves the V-cache padding warning on "

205 "models with uneven per-layer V dims."

206 ),

207 ),

208 "kv_cache_type": SettingDef(

209 str,

210 nullable=False,

211 group=SettingGroup.GENERATION,

212 help_text=(

213 "KV cache element type. q8_0 / q4_0 halve or quarter cache memory "

214 "but require flash attention to be enabled."

215 ),

216 choices=tuple(t.value for t in KvCacheType),

217 ),

218 "n_gpu_layers": SettingDef(

219 int,

220 nullable=True,

221 group=SettingGroup.GENERATION,

222 help_text=(

223 "Layers to offload to GPU. Empty = all (recommended), 0 = CPU only, "

224 "positive int = partial offload for tight VRAM."

225 ),

226 ),

227 "gpu_devices": SettingDef(

228 str,

229 nullable=True,

230 group=SettingGroup.GENERATION,

231 help_text=(

232 "Restrict llama.cpp to specific GPU indexes on dual-GPU machines "

233 "(e.g. NVIDIA dGPU + integrated). Comma-separated, like '0' or '0,1'. "

234 "Applies to Vulkan, CUDA, and ROCm. Requires a restart to take effect."

235 ),

236 ),

237 "main_gpu": SettingDef(

238 int,

239 nullable=True,

240 group=SettingGroup.GENERATION,

241 help_text=(

242 "Primary GPU index for llama.cpp when multiple devices are visible. "

243 "Empty = let llama.cpp pick (index 0). Set this together with "

244 "gpu_devices to pin inference to a specific card."

245 ),

246 ),

247 "seed": SettingDef(

248 int,

249 nullable=True,

250 group=SettingGroup.GENERATION,

251 help_text="Random seed for reproducible output",

252 ),

253 "rag_system_prompt": SettingDef(

254 str,

255 nullable=False,

256 render=RenderStyle.MULTILINE,

257 group=SettingGroup.GENERATION,

258 help_text="System prompt sent when answering with retrieved context",

259 ),

260 "general_system_prompt": SettingDef(

261 str,

262 nullable=False,

263 render=RenderStyle.MULTILINE,

264 group=SettingGroup.GENERATION,

265 help_text="System prompt sent when there are no documents to ground the answer",

266 ),

267 "chat_mode": SettingDef(

268 str,

269 nullable=False,

270 group=SettingGroup.GENERATION,

271 choices=tuple(m.value for m in ChatMode),

272 help_text="search runs every chat turn through document retrieval; chat skips it",

273 ),

274 "top_k": SettingDef(

275 int,

276 nullable=False,

277 group=SettingGroup.RETRIEVAL,

278 help_text="Number of chunks returned by search",

279 ),

280 "rerank_candidates": SettingDef(

281 int,

282 nullable=False,

283 group=SettingGroup.RETRIEVAL,

284 help_text="Candidate pool size for reranking",

285 ),

286 "show_reasoning": SettingDef(

287 bool,

288 nullable=False,

289 group=SettingGroup.DISPLAY,

290 help_text="Show model reasoning/thinking tokens in output",

291 ),

292 "lilbee_name": SettingDef(

293 str,

294 nullable=False,

295 group=SettingGroup.DISPLAY,

296 help_text=(

297 "Human-readable label for this lilbee, shown in the status bar. "

298 "Empty falls back to 'global' for the platform default dir or "

299 "to the project path (~-substituted and left-truncated)."

300 ),

301 ),

302 "show_lilbee_path": SettingDef(

303 bool,

304 nullable=False,

305 group=SettingGroup.DISPLAY,

306 help_text=(

307 "Show the full absolute path in the status bar: expands 'global' "

308 "to its on-disk path and skips ~ substitution / truncation."

309 ),

310 ),

311 "theme": SettingDef(

312 str,

313 nullable=False,

314 group=SettingGroup.DISPLAY,

315 help_text="TUI color theme. Cycle with Ctrl+T; the active theme persists across sessions.",

316 choices=tuple(DARK_THEMES),

317 ),

318 "wiki": SettingDef(

319 bool,

320 nullable=False,

321 group=SettingGroup.WIKI,

322 help_text="Enable the wiki layer (synthesis pages with citations)",

323 ),

324 "wiki_dir": SettingDef(

325 str,

326 nullable=False,

327 writable=False,

328 group=SettingGroup.WIKI,

329 help_text=(

330 "Directory under data_root where wiki pages live (set via env / config.toml only)"

331 ),

332 ),

333 "wiki_prune_raw": SettingDef(

334 bool,

335 nullable=False,

336 group=SettingGroup.WIKI,

337 help_text="Delete raw chunks after summarizing into the wiki",

338 ),

339 "wiki_embedding_faithfulness_threshold": SettingDef(

340 float,

341 nullable=False,

342 group=SettingGroup.WIKI,

343 help_text=(

344 "Minimum cosine similarity (0-1) between a generated page and "

345 "the mean of its source chunk vectors before publishing. "

346 "Pages below the threshold route to drafts/."

347 ),

348 ),

349 "wiki_stale_citation_threshold": SettingDef(

350 float,

351 nullable=False,

352 group=SettingGroup.WIKI,

353 help_text="Fraction of stale citations that triggers page regeneration",

354 ),

355 "wiki_drift_threshold": SettingDef(

356 float,

357 nullable=False,

358 group=SettingGroup.WIKI,

359 help_text="Max fraction of changed lines before regeneration requires review",

360 ),

361 "wiki_clusterer": SettingDef(

362 str,

363 nullable=False,

364 group=SettingGroup.WIKI,

365 help_text="Synthesis clusterer backend (embedding or concepts)",

366 choices=tuple(b.value for b in ClustererBackend),

367 ),

368 "wiki_entity_mode": SettingDef(

369 str,

370 nullable=False,

371 group=SettingGroup.WIKI,

372 help_text=(

373 "Entity extraction strategy "

374 "(ner_entities = default, typed NER entities; "

375 "plus_llm_types = NER + LLM-proposed schema; "

376 "llm_tagged = LLM tags every chunk)"

377 ),

378 choices=tuple(m.value for m in WikiEntityMode),

379 ),

380 "wiki_entity_min_mentions": SettingDef(

381 int,

382 nullable=False,

383 group=SettingGroup.WIKI,

384 help_text="Minimum chunk mentions before an entity or concept gets its own page",

385 ),

386 "wiki_concept_max_chunks_per_page": SettingDef(

387 int,

388 nullable=False,

389 group=SettingGroup.WIKI,

390 help_text="Maximum chunks passed into each concept or entity page generation call",

391 ),

392 "wiki_related_max": SettingDef(

393 int,

394 nullable=False,

395 group=SettingGroup.WIKI,

396 help_text="Maximum related concepts listed in the `## Related` section of each page",

397 ),

398 "wiki_ingest_update_cap": SettingDef(

399 int,

400 nullable=False,

401 group=SettingGroup.WIKI,

402 help_text=(

403 "Touched-page cap for auto-update after sync. "

404 "Beyond this count, run `lilbee wiki update` manually."

405 ),

406 ),

407 "wiki_summary_prompt": SettingDef(

408 str,

409 nullable=False,

410 render=RenderStyle.FULL,

411 group=SettingGroup.WIKI,

412 help_text=(

413 "Prompt for per-source summary pages. "

414 "Must keep the {source_name} and {chunks_text} placeholders."

415 ),

416 ),

417 "wiki_synthesis_prompt": SettingDef(

418 str,

419 nullable=False,

420 render=RenderStyle.FULL,

421 group=SettingGroup.WIKI,

422 help_text=(

423 "Prompt for cross-source synthesis pages. "

424 "Must keep {topic}, {source_list}, and {chunks_text}."

425 ),

426 ),

427 "wiki_entity_batch_prompt": SettingDef(

428 str,

429 nullable=False,

430 render=RenderStyle.FULL,

431 group=SettingGroup.WIKI,

432 help_text=(

433 "Prompt for the per-source batched call. "

434 "Must keep {source}, {entity_list}, {chunks_text}, and {concept_instruction}."

435 ),

436 ),

437 "wiki_extract_concepts": SettingDef(

438 bool,

439 nullable=False,

440 group=SettingGroup.WIKI,

441 help_text=(

442 "Whether the per-source batched call asks the LLM to curate concept pages "

443 "alongside the pre-extracted entity list."

444 ),

445 ),

446 "wiki_batch_min_chunks": SettingDef(

447 int,

448 nullable=False,

449 group=SettingGroup.WIKI,

450 help_text=(

451 "Minimum chunks a source must contribute before its batched call includes "

452 "concept curation. Sources below the floor skip the concept-curation "

453 "instruction; sources with zero entities AND below the floor are skipped entirely."

454 ),

455 ),

456 "wiki_clusterer_k": SettingDef(

457 int,

458 nullable=False,

459 group=SettingGroup.WIKI,

460 help_text="Mutual-kNN neighborhood size for the clusterer (0 = auto)",

461 ),

462 "memory_enabled": SettingDef(

463 bool,

464 nullable=False,

465 group=SettingGroup.MEMORY,

466 help_text="Master switch for long-term chat memory (off by default)",

467 ),

468 "memory_auto_extract": SettingDef(

469 bool,

470 nullable=False,

471 group=SettingGroup.MEMORY,

472 help_text="Auto-save durable facts and preferences from each TUI turn (needs memory on)",

473 ),

474 "memory_top_k": SettingDef(

475 int,

476 nullable=False,

477 group=SettingGroup.MEMORY,

478 help_text="Maximum facts recalled into context per turn",

479 ),

480 "memory_max_distance": SettingDef(

481 float,

482 nullable=False,

483 group=SettingGroup.MEMORY,

484 help_text="Recall cutoff distance, 0.0-1.0 (lower is stricter)",

485 ),

486 "memory_token_budget": SettingDef(

487 int,

488 nullable=False,

489 group=SettingGroup.MEMORY,

490 help_text="Token cap on the recalled-memory block added to the prompt",

491 ),

492 "memory_max_per_owner": SettingDef(

493 int,

494 nullable=False,

495 group=SettingGroup.MEMORY,

496 help_text="Soft cap before the oldest memories are evicted",

497 hidden=True,

498 ),

499 "memory_dedup_distance": SettingDef(

500 float,

501 nullable=False,

502 group=SettingGroup.MEMORY,

503 help_text="Near-duplicate distance below which a new memory updates the old",

504 hidden=True,

505 ),

506 "crawl_max_depth": SettingDef(

507 int,

508 nullable=True,

509 group=SettingGroup.CRAWLING,

510 help_text="Optional recursion-depth cap (blank = no cap; per-crawl values win)",

511 ),

512 "crawl_render_mode": SettingDef(

513 str,

514 nullable=False,

515 group=SettingGroup.CRAWLING,

516 help_text=(

517 "How crawls fetch pages. http = lightweight, no browser (default, best "

518 "for static and server-rendered sites). browser = Chromium with "

519 "JavaScript enabled for client-rendered sites, at much higher memory cost."

520 ),

521 choices=tuple(m.value for m in CrawlRenderMode),

522 ),

523 "crawl_browser_recycle_pages": SettingDef(

524 int,

525 nullable=False,

526 group=SettingGroup.CRAWLING,

527 help_text=(

528 "Browser mode: recycle the Chromium process every N pages to cap memory "

529 "growth on long crawls (0 = never recycle)."

530 ),

531 ),

532 "crawl_browser_extra_args": SettingDef(

533 list,

534 nullable=False,

535 group=SettingGroup.CRAWLING,

536 render=RenderStyle.LIST_COLLAPSED,

537 help_text=(

538 "Browser mode: extra Chromium launch flags, one per line. "

539 "Defaults trim shared-memory and GPU use."

540 ),

541 ),

542 "crawl_max_pages": SettingDef(

543 int,

544 nullable=True,

545 group=SettingGroup.CRAWLING,

546 help_text="Optional global cap on total pages per crawl (blank = no cap).",

547 ),

548 "crawl_safety_max_pages": SettingDef(

549 int,

550 nullable=False,

551 group=SettingGroup.CRAWLING,

552 help_text="Default page bound for an unbounded crawl, so a hostile site cannot "

553 "exhaust the disk. An explicit max-pages overrides it; raise this to crawl "

554 "larger sites unbounded.",

555 ),

556 "crawl_timeout": SettingDef(

557 int,

558 nullable=False,

559 group=SettingGroup.CRAWLING,

560 help_text="Per-page fetch timeout in seconds",

561 ),

562 "crawl_sync_interval": SettingDef(

563 int,

564 nullable=False,

565 group=SettingGroup.CRAWLING,

566 help_text="Seconds between periodic re-syncs during a crawl (0 = sync only at end)",

567 ),

568 "crawl_mean_delay": SettingDef(

569 float,

570 nullable=False,

571 group=SettingGroup.CRAWLING,

572 help_text="Seconds between in-flight requests within a single crawl",

573 ),

574 "crawl_max_delay_range": SettingDef(

575 float,

576 nullable=False,

577 group=SettingGroup.CRAWLING,

578 help_text="Random jitter (seconds) added on top of mean delay",

579 ),

580 "crawl_concurrent_requests": SettingDef(

581 int,

582 nullable=False,

583 group=SettingGroup.CRAWLING,

584 help_text="Concurrent in-flight URLs within one crawl",

585 ),

586 "crawl_retry_on_rate_limit": SettingDef(

587 bool,

588 nullable=False,

589 group=SettingGroup.CRAWLING,

590 help_text="Enable per-domain backoff and retries on HTTP 429/503",

591 ),

592 "crawl_retry_base_delay_min": SettingDef(

593 float,

594 nullable=False,

595 group=SettingGroup.CRAWLING,

596 help_text="Minimum base-delay (seconds) on rate-limit responses",

597 ),

598 "crawl_retry_base_delay_max": SettingDef(

599 float,

600 nullable=False,

601 group=SettingGroup.CRAWLING,

602 help_text="Maximum base-delay (seconds) on rate-limit responses",

603 ),

604 "crawl_retry_max_backoff": SettingDef(

605 float,

606 nullable=False,

607 group=SettingGroup.CRAWLING,

608 help_text="Upper bound on any single backoff wait (seconds)",

609 ),

610 "crawl_retry_max_attempts": SettingDef(

611 int,

612 nullable=False,

613 group=SettingGroup.CRAWLING,

614 help_text="Retry count per URL when a rate-limit code comes back",

615 ),

616 "crawl_exclude_patterns": SettingDef(

617 list,

618 nullable=False,

619 group=SettingGroup.CRAWLING,

620 render=RenderStyle.LIST_COLLAPSED,

621 help_text=(

622 "Regex patterns that skip URLs at link-discovery time during "

623 "recursive crawls. One per line."

624 ),

625 ),

626 "openrouter_api_key": SettingDef(

627 str,

628 nullable=False,

629 group=SettingGroup.API_KEYS,

630 help_text="OpenRouter API key (enables frontier models in chat picker)",

631 ),

632 "gemini_api_key": SettingDef(

633 str,

634 nullable=False,

635 group=SettingGroup.API_KEYS,

636 help_text="Google Gemini API key (enables frontier models in chat picker)",

637 ),

638 "anthropic_api_key": SettingDef(

639 str,

640 nullable=False,

641 group=SettingGroup.API_KEYS,

642 help_text="Anthropic API key (enables frontier models in chat picker)",

643 ),

644 "openai_api_key": SettingDef(

645 str,

646 nullable=False,

647 group=SettingGroup.API_KEYS,

648 help_text="OpenAI API key (enables frontier models in chat picker)",

649 ),

650 "mistral_api_key": SettingDef(

651 str,

652 nullable=False,

653 group=SettingGroup.API_KEYS,

654 help_text="Mistral API key (enables frontier models in chat picker)",

655 ),

656 "deepseek_api_key": SettingDef(

657 str,

658 nullable=False,

659 group=SettingGroup.API_KEYS,

660 help_text="DeepSeek API key (enables frontier models in chat picker)",

661 ),

662 "hf_token": SettingDef(

663 str,

664 nullable=False,

665 group=SettingGroup.SYSTEM,

666 help_text=(

667 "HuggingFace access token. Avoids the unauthenticated download "

668 "rate limit and unlocks gated repos. Stored in plain text in "

669 "config.toml. Env vars (LILBEE_HF_TOKEN, HF_TOKEN) override."

670 ),

671 ),

672 "chunk_size": SettingDef(

673 int,

674 nullable=False,

675 group=SettingGroup.INGEST,

676 help_text="Document chunk size in tokens (changes invalidate the index)",

677 ),

678 "chunk_overlap": SettingDef(

679 int,

680 nullable=False,

681 group=SettingGroup.INGEST,

682 help_text="Tokens of overlap between adjacent chunks (preserves context across boundaries)",

683 ),

684 "tesseract_timeout": SettingDef(

685 float,

686 nullable=False,

687 group=SettingGroup.INGEST,

688 help_text="Per-page Tesseract timeout in seconds (used when no vision model is set)",

689 ),

690 "worker_pool_call_timeout_s": SettingDef(

691 float,

692 nullable=False,

693 group=SettingGroup.INGEST,

694 help_text=(

695 "Per-call deadline for one worker-pool round-trip in seconds. "

696 "Raise this for very large embed batches on slow machines"

697 ),

698 ),

699 "worker_pool_eager_start": SettingDef(

700 bool,

701 nullable=False,

702 group=SettingGroup.INGEST,

703 help_text=(

704 "Spawn every registered worker at TUI startup instead of on first use. "

705 "Trades 1-3 seconds of cold-start per role for first-call latency"

706 ),

707 ),

708 "worker_pool_max_idle_s": SettingDef(

709 float,

710 nullable=False,

711 group=SettingGroup.INGEST,

712 help_text=(

713 "Shut a worker down after this many seconds idle to free RAM/VRAM. "

714 "0 disables idle reaping"

715 ),

716 ),

717 "max_tokens": SettingDef(

718 int,

719 nullable=True,

720 group=SettingGroup.GENERATION,

721 help_text="Hard cap on generated tokens per response (blank = no cap)",

722 ),

723 "max_reasoning_chars": SettingDef(

724 int,

725 nullable=False,

726 group=SettingGroup.GENERATION,

727 help_text=(

728 "Maximum reasoning characters before lilbee forces the model to answer "

729 "(0 = unlimited; per-model overrides apply on top)"

730 ),

731 ),

732 "model_keep_alive": SettingDef(

733 int,

734 nullable=False,

735 group=SettingGroup.GENERATION,

736 help_text="Seconds the loaded model stays warm between calls (0 = unload immediately)",

737 ),

738 "gpu_memory_fraction": SettingDef(

739 float,

740 nullable=False,

741 group=SettingGroup.GENERATION,

742 help_text="Fraction of GPU memory the model is allowed to claim (0.1-1.0)",

743 ),

744 "candidate_multiplier": SettingDef(

745 int,

746 nullable=False,

747 group=SettingGroup.RETRIEVAL,

748 help_text="Candidate-pool multiplier over top_k before reranking",

749 ),

750 "ann_index_threshold": SettingDef(

751 int,

752 nullable=False,

753 group=SettingGroup.RETRIEVAL,

754 help_text="Chunk count to start building an ANN vector index (0 = always flat search)",

755 ),

756 "max_distance": SettingDef(

757 float,

758 nullable=False,

759 group=SettingGroup.RETRIEVAL,

760 help_text="Maximum vector distance for retrieval matches (lower = stricter)",

761 ),

762 "min_relevance_score": SettingDef(

763 float,

764 nullable=False,

765 group=SettingGroup.RETRIEVAL,

766 help_text="Minimum RRF relevance score for hybrid search results (0.0 = no filter)",

767 ),

768 "max_context_sources": SettingDef(

769 int,

770 nullable=False,

771 group=SettingGroup.RETRIEVAL,

772 help_text="Maximum unique sources contributing chunks to a single answer",

773 ),

774 "diversity_max_per_source": SettingDef(

775 int,

776 nullable=False,

777 group=SettingGroup.RETRIEVAL,

778 help_text="Maximum chunks accepted from any one source (caps source dominance)",

779 ),

780 "mmr_lambda": SettingDef(

781 float,

782 nullable=False,

783 group=SettingGroup.RETRIEVAL,

784 help_text=(

785 "MMR lambda balancing relevance vs diversity (0 = max diversity, 1 = max relevance)"

786 ),

787 ),

788 "temporal_filtering": SettingDef(

789 bool,

790 nullable=False,

791 group=SettingGroup.RETRIEVAL,

792 help_text="Detect temporal queries and bias retrieval toward recent chunks",

793 ),

794 "hyde": SettingDef(

795 bool,

796 nullable=False,

797 group=SettingGroup.RETRIEVAL,

798 help_text="Use HyDE (hypothetical answer expansion) to broaden retrieval",

799 ),

800 "hyde_weight": SettingDef(

801 float,

802 nullable=False,

803 group=SettingGroup.RETRIEVAL,

804 help_text="Weight on the HyDE-generated query vector when blending with the original",

805 ),

806 "query_expansion_count": SettingDef(

807 int,

808 nullable=False,

809 group=SettingGroup.RETRIEVAL,

810 help_text="Number of paraphrase expansions per query (0 disables expansion)",

811 ),

812 "expansion_similarity_threshold": SettingDef(

813 float,

814 nullable=False,

815 group=SettingGroup.RETRIEVAL,

816 help_text="Minimum cosine similarity an expansion must keep with the original query",

817 ),

818 "expansion_short_query_tokens": SettingDef(

819 int,

820 nullable=False,

821 group=SettingGroup.RETRIEVAL,

822 help_text="Queries at or below this token count skip expansion (saves a model call)",

823 ),

824 "expansion_guardrails": SettingDef(

825 bool,

826 nullable=False,

827 group=SettingGroup.RETRIEVAL,

828 help_text="Drop expansions that diverge from the original intent",

829 ),

830 "adaptive_threshold_step": SettingDef(

831 float,

832 nullable=False,

833 group=SettingGroup.RETRIEVAL,

834 help_text="Step size for adaptive relevance-score relaxation when initial recall is empty",

835 ),

836 "concept_graph": SettingDef(

837 bool,

838 nullable=False,

839 group=SettingGroup.RETRIEVAL,

840 help_text="Boost retrieval scores for chunks that share concepts with the query",

841 ),

842 "concept_boost_weight": SettingDef(

843 float,

844 nullable=False,

845 group=SettingGroup.RETRIEVAL,

846 help_text="Maximum boost (0-1) the concept graph can add to a chunk's relevance",

847 ),

848 "concept_boost_floor": SettingDef(

849 float,

850 nullable=False,

851 group=SettingGroup.RETRIEVAL,

852 help_text="Minimum cosine similarity needed before the concept graph boosts a chunk",

853 ),

854 "concept_max_per_chunk": SettingDef(

855 int,

856 nullable=False,

857 group=SettingGroup.RETRIEVAL,

858 help_text="Maximum concept tags stored per chunk (caps graph density)",

859 ),

860 "documents_dir": SettingDef(

861 str,

862 nullable=False,

863 group=SettingGroup.SYSTEM,

864 help_text="Local documents root that lilbee sync ingests (blank = data_root/documents)",

865 ),

866 "vault_base": SettingDef(

867 str,

868 nullable=True,

869 group=SettingGroup.SYSTEM,

870 help_text="Markdown vault root; results carry a vault-relative path (blank = none)",

871 ),

872 "sse_heartbeat_interval": SettingDef(

873 float,

874 nullable=False,

875 group=SettingGroup.SYSTEM,

876 help_text="Seconds between SSE keep-alive frames sent to idle HTTP stream clients",

877 hidden=True,

878 ),

879 "llm_provider": SettingDef(

880 str,

881 nullable=False,

882 group=SettingGroup.API_KEYS,

883 choices=("auto", "llama-cpp", "remote"),

884 help_text=(

885 "Provider routing: auto picks the first key present; force a specific one when set"

886 ),

887 ),

888 "ollama_base_url": SettingDef(

889 str,

890 nullable=False,

891 group=SettingGroup.LOCAL_SERVERS,

892 help_text="Ollama server URL (blank uses http://localhost:11434)",

893 ),

894 "lm_studio_base_url": SettingDef(

895 str,

896 nullable=False,

897 group=SettingGroup.LOCAL_SERVERS,

898 help_text="LM Studio server URL (blank uses http://localhost:1234/v1)",

899 ),

900 "wiki_summary_max_tokens": SettingDef(

901 int,

902 nullable=False,

903 group=SettingGroup.WIKI,

904 help_text="Maximum tokens generated per wiki page",

905 ),

906 "wiki_temperature": SettingDef(

907 float,

908 nullable=False,

909 group=SettingGroup.WIKI,

910 help_text="Temperature used for wiki page synthesis (low = stay close to sources)",

911 ),

912}

Coverage for src / lilbee / app / settings_map.py: 100%

43 statements