Coverage for src / lilbee / app / settings_map.py: 100%

43 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""Shared settings map for interactive configuration.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass, field 

6from enum import StrEnum 

7 

8from pydantic_core import PydanticUndefined 

9 

10from lilbee.app.themes import DARK_THEMES 

11from lilbee.core.config import cfg 

12from lilbee.core.config.enums import ( 

13 ChatMode, 

14 ClustererBackend, 

15 CrawlRenderMode, 

16 KvCacheType, 

17 WikiEntityMode, 

18) 

19 

20 

21class RenderStyle(StrEnum): 

22 """How a setting is displayed in /settings.""" 

23 

24 COMPACT = "compact" 

25 FULL = "full" 

26 LIST_COLLAPSED = "list_collapsed" 

27 MULTILINE = "multiline" 

28 

29 

30class SettingGroup(StrEnum): 

31 """Logical bucket names rendered by ``/settings`` and ``settings_list``.""" 

32 

33 MODELS = "Models" 

34 GENERATION = "Generation" 

35 RETRIEVAL = "Retrieval" 

36 INGEST = "Ingest" 

37 WIKI = "Wiki" 

38 MEMORY = "Memory" 

39 CRAWLING = "Crawling" 

40 LOCAL_SERVERS = "Local-Servers" 

41 API_KEYS = "API-Keys" 

42 SYSTEM = "System" 

43 DISPLAY = "Display" 

44 GENERAL = "General" 

45 

46 

47@dataclass(frozen=True) 

48class SettingDef: 

49 """Metadata for an interactive setting. 

50 

51 ``writable`` is a TUI rendering hint: fields marked ``writable=False`` 

52 (the model role slots) get a dedicated picker rather than an inline 

53 editor, and the ``/set`` slash command refuses them. The actual 

54 write contract for HTTP / MCP / programmatic surfaces lives in 

55 ``config_meta.WRITABLE_CONFIG_FIELDS`` + ``MODEL_ROLE_FIELDS`` and 

56 is enforced by ``app.settings.apply_settings_update``. 

57 

58 ``hidden`` keeps the setting out of the TUI settings screen while 

59 leaving it reachable via ``lilbee set`` and the ``LILBEE_*`` env 

60 var: use it for transport/server knobs that aren't relevant to a 

61 typical TUI session. 

62 """ 

63 

64 type: type 

65 nullable: bool 

66 writable: bool = True 

67 render: RenderStyle = field(default=RenderStyle.COMPACT) 

68 group: SettingGroup = SettingGroup.GENERAL 

69 help_text: str = "" 

70 choices: tuple[str, ...] | None = None 

71 hidden: bool = False 

72 

73 

74def get_default(key: str) -> object: 

75 """Return the cfg default for a setting key.""" 

76 field_info = type(cfg).model_fields[key] 

77 if field_info.default_factory is not None: 

78 return field_info.default_factory() # type: ignore[call-arg] 

79 if field_info.default is PydanticUndefined: 

80 return None 

81 return field_info.default 

82 

83 

84SETTINGS_MAP: dict[str, SettingDef] = { 

85 "chat_model": SettingDef( 

86 str, 

87 nullable=False, 

88 writable=False, 

89 group=SettingGroup.MODELS, 

90 help_text="LLM used for chat generation (vision and reranking are separate slots)", 

91 ), 

92 "vision_model": SettingDef( 

93 str, 

94 nullable=True, 

95 writable=False, 

96 group=SettingGroup.MODELS, 

97 help_text="Vision model for scanned PDF OCR (empty = disabled; Tesseract only)", 

98 ), 

99 "enable_ocr": SettingDef( 

100 bool, 

101 nullable=True, 

102 group=SettingGroup.INGEST, 

103 help_text="Vision OCR for scanned PDFs (empty = auto-detect from vision_model)", 

104 ), 

105 "ocr_timeout": SettingDef( 

106 float, 

107 nullable=False, 

108 group=SettingGroup.INGEST, 

109 help_text="Per-page timeout in seconds for vision OCR (0 = no limit)", 

110 ), 

111 "vision_load_budget_s": SettingDef( 

112 float, 

113 nullable=False, 

114 group=SettingGroup.INGEST, 

115 help_text=( 

116 "Wall-clock seconds reserved for the vision worker to load the" 

117 " model. Total PDF-OCR budget = load_budget + ocr_timeout * pages." 

118 ), 

119 ), 

120 "semantic_chunking": SettingDef( 

121 bool, 

122 nullable=False, 

123 group=SettingGroup.INGEST, 

124 help_text="Opt-in topic-aware chunker (default off; may fragment numbered procedures)", 

125 ), 

126 "topic_threshold": SettingDef( 

127 float, 

128 nullable=False, 

129 group=SettingGroup.INGEST, 

130 help_text="Topic-boundary similarity threshold, 0.0-1.0, used when semantic chunking is on", 

131 ), 

132 "embedding_model": SettingDef( 

133 str, 

134 nullable=False, 

135 writable=False, 

136 group=SettingGroup.MODELS, 

137 help_text="Model used to embed document chunks", 

138 ), 

139 "reranker_model": SettingDef( 

140 str, 

141 nullable=True, 

142 writable=False, 

143 group=SettingGroup.MODELS, 

144 help_text="Cross-encoder model for result reranking", 

145 ), 

146 "temperature": SettingDef( 

147 float, 

148 nullable=True, 

149 group=SettingGroup.GENERATION, 

150 help_text="Sampling temperature (higher = more creative)", 

151 ), 

152 "top_p": SettingDef( 

153 float, 

154 nullable=True, 

155 group=SettingGroup.GENERATION, 

156 help_text="Nucleus sampling cutoff probability", 

157 ), 

158 "top_k_sampling": SettingDef( 

159 int, 

160 nullable=True, 

161 group=SettingGroup.GENERATION, 

162 help_text="Top-K sampling: number of tokens to consider", 

163 ), 

164 "repeat_penalty": SettingDef( 

165 float, 

166 nullable=True, 

167 group=SettingGroup.GENERATION, 

168 help_text="Penalty for repeating tokens", 

169 ), 

170 "num_ctx": SettingDef( 

171 int, 

172 nullable=True, 

173 group=SettingGroup.GENERATION, 

174 help_text=( 

175 "Context window size in tokens. Leave empty to size automatically " 

176 "(aims for chat_n_ctx_target, ceiling at num_ctx_max or training_ctx)." 

177 ), 

178 ), 

179 "num_ctx_max": SettingDef( 

180 int, 

181 nullable=True, 

182 group=SettingGroup.GENERATION, 

183 help_text=( 

184 "Explicit ceiling for the dynamic context picker. Leave empty to " 

185 "use the model's training_ctx from GGUF metadata as the only " 

186 "ceiling. Set to cap below training_ctx (saves KV memory)." 

187 ), 

188 ), 

189 "chat_n_ctx_target": SettingDef( 

190 int, 

191 nullable=False, 

192 group=SettingGroup.GENERATION, 

193 help_text=( 

194 "Working context the dynamic picker aims for. Fits a RAG turn " 

195 "with reasoning headroom; raise for long-document chat." 

196 ), 

197 ), 

198 "flash_attention": SettingDef( 

199 bool, 

200 nullable=True, 

201 group=SettingGroup.GENERATION, 

202 help_text=( 

203 "Flash attention. Empty (auto) tries it on with a fallback for older " 

204 "llama-cpp-python builds; resolves the V-cache padding warning on " 

205 "models with uneven per-layer V dims." 

206 ), 

207 ), 

208 "kv_cache_type": SettingDef( 

209 str, 

210 nullable=False, 

211 group=SettingGroup.GENERATION, 

212 help_text=( 

213 "KV cache element type. q8_0 / q4_0 halve or quarter cache memory " 

214 "but require flash attention to be enabled." 

215 ), 

216 choices=tuple(t.value for t in KvCacheType), 

217 ), 

218 "n_gpu_layers": SettingDef( 

219 int, 

220 nullable=True, 

221 group=SettingGroup.GENERATION, 

222 help_text=( 

223 "Layers to offload to GPU. Empty = all (recommended), 0 = CPU only, " 

224 "positive int = partial offload for tight VRAM." 

225 ), 

226 ), 

227 "gpu_devices": SettingDef( 

228 str, 

229 nullable=True, 

230 group=SettingGroup.GENERATION, 

231 help_text=( 

232 "Restrict llama.cpp to specific GPU indexes on dual-GPU machines " 

233 "(e.g. NVIDIA dGPU + integrated). Comma-separated, like '0' or '0,1'. " 

234 "Applies to Vulkan, CUDA, and ROCm. Requires a restart to take effect." 

235 ), 

236 ), 

237 "main_gpu": SettingDef( 

238 int, 

239 nullable=True, 

240 group=SettingGroup.GENERATION, 

241 help_text=( 

242 "Primary GPU index for llama.cpp when multiple devices are visible. " 

243 "Empty = let llama.cpp pick (index 0). Set this together with " 

244 "gpu_devices to pin inference to a specific card." 

245 ), 

246 ), 

247 "seed": SettingDef( 

248 int, 

249 nullable=True, 

250 group=SettingGroup.GENERATION, 

251 help_text="Random seed for reproducible output", 

252 ), 

253 "rag_system_prompt": SettingDef( 

254 str, 

255 nullable=False, 

256 render=RenderStyle.MULTILINE, 

257 group=SettingGroup.GENERATION, 

258 help_text="System prompt sent when answering with retrieved context", 

259 ), 

260 "general_system_prompt": SettingDef( 

261 str, 

262 nullable=False, 

263 render=RenderStyle.MULTILINE, 

264 group=SettingGroup.GENERATION, 

265 help_text="System prompt sent when there are no documents to ground the answer", 

266 ), 

267 "chat_mode": SettingDef( 

268 str, 

269 nullable=False, 

270 group=SettingGroup.GENERATION, 

271 choices=tuple(m.value for m in ChatMode), 

272 help_text="search runs every chat turn through document retrieval; chat skips it", 

273 ), 

274 "top_k": SettingDef( 

275 int, 

276 nullable=False, 

277 group=SettingGroup.RETRIEVAL, 

278 help_text="Number of chunks returned by search", 

279 ), 

280 "rerank_candidates": SettingDef( 

281 int, 

282 nullable=False, 

283 group=SettingGroup.RETRIEVAL, 

284 help_text="Candidate pool size for reranking", 

285 ), 

286 "show_reasoning": SettingDef( 

287 bool, 

288 nullable=False, 

289 group=SettingGroup.DISPLAY, 

290 help_text="Show model reasoning/thinking tokens in output", 

291 ), 

292 "lilbee_name": SettingDef( 

293 str, 

294 nullable=False, 

295 group=SettingGroup.DISPLAY, 

296 help_text=( 

297 "Human-readable label for this lilbee, shown in the status bar. " 

298 "Empty falls back to 'global' for the platform default dir or " 

299 "to the project path (~-substituted and left-truncated)." 

300 ), 

301 ), 

302 "show_lilbee_path": SettingDef( 

303 bool, 

304 nullable=False, 

305 group=SettingGroup.DISPLAY, 

306 help_text=( 

307 "Show the full absolute path in the status bar: expands 'global' " 

308 "to its on-disk path and skips ~ substitution / truncation." 

309 ), 

310 ), 

311 "theme": SettingDef( 

312 str, 

313 nullable=False, 

314 group=SettingGroup.DISPLAY, 

315 help_text="TUI color theme. Cycle with Ctrl+T; the active theme persists across sessions.", 

316 choices=tuple(DARK_THEMES), 

317 ), 

318 "wiki": SettingDef( 

319 bool, 

320 nullable=False, 

321 group=SettingGroup.WIKI, 

322 help_text="Enable the wiki layer (synthesis pages with citations)", 

323 ), 

324 "wiki_dir": SettingDef( 

325 str, 

326 nullable=False, 

327 writable=False, 

328 group=SettingGroup.WIKI, 

329 help_text=( 

330 "Directory under data_root where wiki pages live (set via env / config.toml only)" 

331 ), 

332 ), 

333 "wiki_prune_raw": SettingDef( 

334 bool, 

335 nullable=False, 

336 group=SettingGroup.WIKI, 

337 help_text="Delete raw chunks after summarizing into the wiki", 

338 ), 

339 "wiki_embedding_faithfulness_threshold": SettingDef( 

340 float, 

341 nullable=False, 

342 group=SettingGroup.WIKI, 

343 help_text=( 

344 "Minimum cosine similarity (0-1) between a generated page and " 

345 "the mean of its source chunk vectors before publishing. " 

346 "Pages below the threshold route to drafts/." 

347 ), 

348 ), 

349 "wiki_stale_citation_threshold": SettingDef( 

350 float, 

351 nullable=False, 

352 group=SettingGroup.WIKI, 

353 help_text="Fraction of stale citations that triggers page regeneration", 

354 ), 

355 "wiki_drift_threshold": SettingDef( 

356 float, 

357 nullable=False, 

358 group=SettingGroup.WIKI, 

359 help_text="Max fraction of changed lines before regeneration requires review", 

360 ), 

361 "wiki_clusterer": SettingDef( 

362 str, 

363 nullable=False, 

364 group=SettingGroup.WIKI, 

365 help_text="Synthesis clusterer backend (embedding or concepts)", 

366 choices=tuple(b.value for b in ClustererBackend), 

367 ), 

368 "wiki_entity_mode": SettingDef( 

369 str, 

370 nullable=False, 

371 group=SettingGroup.WIKI, 

372 help_text=( 

373 "Entity extraction strategy " 

374 "(ner_entities = default, typed NER entities; " 

375 "plus_llm_types = NER + LLM-proposed schema; " 

376 "llm_tagged = LLM tags every chunk)" 

377 ), 

378 choices=tuple(m.value for m in WikiEntityMode), 

379 ), 

380 "wiki_entity_min_mentions": SettingDef( 

381 int, 

382 nullable=False, 

383 group=SettingGroup.WIKI, 

384 help_text="Minimum chunk mentions before an entity or concept gets its own page", 

385 ), 

386 "wiki_concept_max_chunks_per_page": SettingDef( 

387 int, 

388 nullable=False, 

389 group=SettingGroup.WIKI, 

390 help_text="Maximum chunks passed into each concept or entity page generation call", 

391 ), 

392 "wiki_related_max": SettingDef( 

393 int, 

394 nullable=False, 

395 group=SettingGroup.WIKI, 

396 help_text="Maximum related concepts listed in the `## Related` section of each page", 

397 ), 

398 "wiki_ingest_update_cap": SettingDef( 

399 int, 

400 nullable=False, 

401 group=SettingGroup.WIKI, 

402 help_text=( 

403 "Touched-page cap for auto-update after sync. " 

404 "Beyond this count, run `lilbee wiki update` manually." 

405 ), 

406 ), 

407 "wiki_summary_prompt": SettingDef( 

408 str, 

409 nullable=False, 

410 render=RenderStyle.FULL, 

411 group=SettingGroup.WIKI, 

412 help_text=( 

413 "Prompt for per-source summary pages. " 

414 "Must keep the {source_name} and {chunks_text} placeholders." 

415 ), 

416 ), 

417 "wiki_synthesis_prompt": SettingDef( 

418 str, 

419 nullable=False, 

420 render=RenderStyle.FULL, 

421 group=SettingGroup.WIKI, 

422 help_text=( 

423 "Prompt for cross-source synthesis pages. " 

424 "Must keep {topic}, {source_list}, and {chunks_text}." 

425 ), 

426 ), 

427 "wiki_entity_batch_prompt": SettingDef( 

428 str, 

429 nullable=False, 

430 render=RenderStyle.FULL, 

431 group=SettingGroup.WIKI, 

432 help_text=( 

433 "Prompt for the per-source batched call. " 

434 "Must keep {source}, {entity_list}, {chunks_text}, and {concept_instruction}." 

435 ), 

436 ), 

437 "wiki_extract_concepts": SettingDef( 

438 bool, 

439 nullable=False, 

440 group=SettingGroup.WIKI, 

441 help_text=( 

442 "Whether the per-source batched call asks the LLM to curate concept pages " 

443 "alongside the pre-extracted entity list." 

444 ), 

445 ), 

446 "wiki_batch_min_chunks": SettingDef( 

447 int, 

448 nullable=False, 

449 group=SettingGroup.WIKI, 

450 help_text=( 

451 "Minimum chunks a source must contribute before its batched call includes " 

452 "concept curation. Sources below the floor skip the concept-curation " 

453 "instruction; sources with zero entities AND below the floor are skipped entirely." 

454 ), 

455 ), 

456 "wiki_clusterer_k": SettingDef( 

457 int, 

458 nullable=False, 

459 group=SettingGroup.WIKI, 

460 help_text="Mutual-kNN neighborhood size for the clusterer (0 = auto)", 

461 ), 

462 "memory_enabled": SettingDef( 

463 bool, 

464 nullable=False, 

465 group=SettingGroup.MEMORY, 

466 help_text="Master switch for long-term chat memory (off by default)", 

467 ), 

468 "memory_auto_extract": SettingDef( 

469 bool, 

470 nullable=False, 

471 group=SettingGroup.MEMORY, 

472 help_text="Auto-save durable facts and preferences from each TUI turn (needs memory on)", 

473 ), 

474 "memory_top_k": SettingDef( 

475 int, 

476 nullable=False, 

477 group=SettingGroup.MEMORY, 

478 help_text="Maximum facts recalled into context per turn", 

479 ), 

480 "memory_max_distance": SettingDef( 

481 float, 

482 nullable=False, 

483 group=SettingGroup.MEMORY, 

484 help_text="Recall cutoff distance, 0.0-1.0 (lower is stricter)", 

485 ), 

486 "memory_token_budget": SettingDef( 

487 int, 

488 nullable=False, 

489 group=SettingGroup.MEMORY, 

490 help_text="Token cap on the recalled-memory block added to the prompt", 

491 ), 

492 "memory_max_per_owner": SettingDef( 

493 int, 

494 nullable=False, 

495 group=SettingGroup.MEMORY, 

496 help_text="Soft cap before the oldest memories are evicted", 

497 hidden=True, 

498 ), 

499 "memory_dedup_distance": SettingDef( 

500 float, 

501 nullable=False, 

502 group=SettingGroup.MEMORY, 

503 help_text="Near-duplicate distance below which a new memory updates the old", 

504 hidden=True, 

505 ), 

506 "crawl_max_depth": SettingDef( 

507 int, 

508 nullable=True, 

509 group=SettingGroup.CRAWLING, 

510 help_text="Optional recursion-depth cap (blank = no cap; per-crawl values win)", 

511 ), 

512 "crawl_render_mode": SettingDef( 

513 str, 

514 nullable=False, 

515 group=SettingGroup.CRAWLING, 

516 help_text=( 

517 "How crawls fetch pages. http = lightweight, no browser (default, best " 

518 "for static and server-rendered sites). browser = Chromium with " 

519 "JavaScript enabled for client-rendered sites, at much higher memory cost." 

520 ), 

521 choices=tuple(m.value for m in CrawlRenderMode), 

522 ), 

523 "crawl_browser_recycle_pages": SettingDef( 

524 int, 

525 nullable=False, 

526 group=SettingGroup.CRAWLING, 

527 help_text=( 

528 "Browser mode: recycle the Chromium process every N pages to cap memory " 

529 "growth on long crawls (0 = never recycle)." 

530 ), 

531 ), 

532 "crawl_browser_extra_args": SettingDef( 

533 list, 

534 nullable=False, 

535 group=SettingGroup.CRAWLING, 

536 render=RenderStyle.LIST_COLLAPSED, 

537 help_text=( 

538 "Browser mode: extra Chromium launch flags, one per line. " 

539 "Defaults trim shared-memory and GPU use." 

540 ), 

541 ), 

542 "crawl_max_pages": SettingDef( 

543 int, 

544 nullable=True, 

545 group=SettingGroup.CRAWLING, 

546 help_text="Optional global cap on total pages per crawl (blank = no cap).", 

547 ), 

548 "crawl_safety_max_pages": SettingDef( 

549 int, 

550 nullable=False, 

551 group=SettingGroup.CRAWLING, 

552 help_text="Default page bound for an unbounded crawl, so a hostile site cannot " 

553 "exhaust the disk. An explicit max-pages overrides it; raise this to crawl " 

554 "larger sites unbounded.", 

555 ), 

556 "crawl_timeout": SettingDef( 

557 int, 

558 nullable=False, 

559 group=SettingGroup.CRAWLING, 

560 help_text="Per-page fetch timeout in seconds", 

561 ), 

562 "crawl_sync_interval": SettingDef( 

563 int, 

564 nullable=False, 

565 group=SettingGroup.CRAWLING, 

566 help_text="Seconds between periodic re-syncs during a crawl (0 = sync only at end)", 

567 ), 

568 "crawl_mean_delay": SettingDef( 

569 float, 

570 nullable=False, 

571 group=SettingGroup.CRAWLING, 

572 help_text="Seconds between in-flight requests within a single crawl", 

573 ), 

574 "crawl_max_delay_range": SettingDef( 

575 float, 

576 nullable=False, 

577 group=SettingGroup.CRAWLING, 

578 help_text="Random jitter (seconds) added on top of mean delay", 

579 ), 

580 "crawl_concurrent_requests": SettingDef( 

581 int, 

582 nullable=False, 

583 group=SettingGroup.CRAWLING, 

584 help_text="Concurrent in-flight URLs within one crawl", 

585 ), 

586 "crawl_retry_on_rate_limit": SettingDef( 

587 bool, 

588 nullable=False, 

589 group=SettingGroup.CRAWLING, 

590 help_text="Enable per-domain backoff and retries on HTTP 429/503", 

591 ), 

592 "crawl_retry_base_delay_min": SettingDef( 

593 float, 

594 nullable=False, 

595 group=SettingGroup.CRAWLING, 

596 help_text="Minimum base-delay (seconds) on rate-limit responses", 

597 ), 

598 "crawl_retry_base_delay_max": SettingDef( 

599 float, 

600 nullable=False, 

601 group=SettingGroup.CRAWLING, 

602 help_text="Maximum base-delay (seconds) on rate-limit responses", 

603 ), 

604 "crawl_retry_max_backoff": SettingDef( 

605 float, 

606 nullable=False, 

607 group=SettingGroup.CRAWLING, 

608 help_text="Upper bound on any single backoff wait (seconds)", 

609 ), 

610 "crawl_retry_max_attempts": SettingDef( 

611 int, 

612 nullable=False, 

613 group=SettingGroup.CRAWLING, 

614 help_text="Retry count per URL when a rate-limit code comes back", 

615 ), 

616 "crawl_exclude_patterns": SettingDef( 

617 list, 

618 nullable=False, 

619 group=SettingGroup.CRAWLING, 

620 render=RenderStyle.LIST_COLLAPSED, 

621 help_text=( 

622 "Regex patterns that skip URLs at link-discovery time during " 

623 "recursive crawls. One per line." 

624 ), 

625 ), 

626 "openrouter_api_key": SettingDef( 

627 str, 

628 nullable=False, 

629 group=SettingGroup.API_KEYS, 

630 help_text="OpenRouter API key (enables frontier models in chat picker)", 

631 ), 

632 "gemini_api_key": SettingDef( 

633 str, 

634 nullable=False, 

635 group=SettingGroup.API_KEYS, 

636 help_text="Google Gemini API key (enables frontier models in chat picker)", 

637 ), 

638 "anthropic_api_key": SettingDef( 

639 str, 

640 nullable=False, 

641 group=SettingGroup.API_KEYS, 

642 help_text="Anthropic API key (enables frontier models in chat picker)", 

643 ), 

644 "openai_api_key": SettingDef( 

645 str, 

646 nullable=False, 

647 group=SettingGroup.API_KEYS, 

648 help_text="OpenAI API key (enables frontier models in chat picker)", 

649 ), 

650 "mistral_api_key": SettingDef( 

651 str, 

652 nullable=False, 

653 group=SettingGroup.API_KEYS, 

654 help_text="Mistral API key (enables frontier models in chat picker)", 

655 ), 

656 "deepseek_api_key": SettingDef( 

657 str, 

658 nullable=False, 

659 group=SettingGroup.API_KEYS, 

660 help_text="DeepSeek API key (enables frontier models in chat picker)", 

661 ), 

662 "hf_token": SettingDef( 

663 str, 

664 nullable=False, 

665 group=SettingGroup.SYSTEM, 

666 help_text=( 

667 "HuggingFace access token. Avoids the unauthenticated download " 

668 "rate limit and unlocks gated repos. Stored in plain text in " 

669 "config.toml. Env vars (LILBEE_HF_TOKEN, HF_TOKEN) override." 

670 ), 

671 ), 

672 "chunk_size": SettingDef( 

673 int, 

674 nullable=False, 

675 group=SettingGroup.INGEST, 

676 help_text="Document chunk size in tokens (changes invalidate the index)", 

677 ), 

678 "chunk_overlap": SettingDef( 

679 int, 

680 nullable=False, 

681 group=SettingGroup.INGEST, 

682 help_text="Tokens of overlap between adjacent chunks (preserves context across boundaries)", 

683 ), 

684 "tesseract_timeout": SettingDef( 

685 float, 

686 nullable=False, 

687 group=SettingGroup.INGEST, 

688 help_text="Per-page Tesseract timeout in seconds (used when no vision model is set)", 

689 ), 

690 "worker_pool_call_timeout_s": SettingDef( 

691 float, 

692 nullable=False, 

693 group=SettingGroup.INGEST, 

694 help_text=( 

695 "Per-call deadline for one worker-pool round-trip in seconds. " 

696 "Raise this for very large embed batches on slow machines" 

697 ), 

698 ), 

699 "worker_pool_eager_start": SettingDef( 

700 bool, 

701 nullable=False, 

702 group=SettingGroup.INGEST, 

703 help_text=( 

704 "Spawn every registered worker at TUI startup instead of on first use. " 

705 "Trades 1-3 seconds of cold-start per role for first-call latency" 

706 ), 

707 ), 

708 "worker_pool_max_idle_s": SettingDef( 

709 float, 

710 nullable=False, 

711 group=SettingGroup.INGEST, 

712 help_text=( 

713 "Shut a worker down after this many seconds idle to free RAM/VRAM. " 

714 "0 disables idle reaping" 

715 ), 

716 ), 

717 "max_tokens": SettingDef( 

718 int, 

719 nullable=True, 

720 group=SettingGroup.GENERATION, 

721 help_text="Hard cap on generated tokens per response (blank = no cap)", 

722 ), 

723 "max_reasoning_chars": SettingDef( 

724 int, 

725 nullable=False, 

726 group=SettingGroup.GENERATION, 

727 help_text=( 

728 "Maximum reasoning characters before lilbee forces the model to answer " 

729 "(0 = unlimited; per-model overrides apply on top)" 

730 ), 

731 ), 

732 "model_keep_alive": SettingDef( 

733 int, 

734 nullable=False, 

735 group=SettingGroup.GENERATION, 

736 help_text="Seconds the loaded model stays warm between calls (0 = unload immediately)", 

737 ), 

738 "gpu_memory_fraction": SettingDef( 

739 float, 

740 nullable=False, 

741 group=SettingGroup.GENERATION, 

742 help_text="Fraction of GPU memory the model is allowed to claim (0.1-1.0)", 

743 ), 

744 "candidate_multiplier": SettingDef( 

745 int, 

746 nullable=False, 

747 group=SettingGroup.RETRIEVAL, 

748 help_text="Candidate-pool multiplier over top_k before reranking", 

749 ), 

750 "ann_index_threshold": SettingDef( 

751 int, 

752 nullable=False, 

753 group=SettingGroup.RETRIEVAL, 

754 help_text="Chunk count to start building an ANN vector index (0 = always flat search)", 

755 ), 

756 "max_distance": SettingDef( 

757 float, 

758 nullable=False, 

759 group=SettingGroup.RETRIEVAL, 

760 help_text="Maximum vector distance for retrieval matches (lower = stricter)", 

761 ), 

762 "min_relevance_score": SettingDef( 

763 float, 

764 nullable=False, 

765 group=SettingGroup.RETRIEVAL, 

766 help_text="Minimum RRF relevance score for hybrid search results (0.0 = no filter)", 

767 ), 

768 "max_context_sources": SettingDef( 

769 int, 

770 nullable=False, 

771 group=SettingGroup.RETRIEVAL, 

772 help_text="Maximum unique sources contributing chunks to a single answer", 

773 ), 

774 "diversity_max_per_source": SettingDef( 

775 int, 

776 nullable=False, 

777 group=SettingGroup.RETRIEVAL, 

778 help_text="Maximum chunks accepted from any one source (caps source dominance)", 

779 ), 

780 "mmr_lambda": SettingDef( 

781 float, 

782 nullable=False, 

783 group=SettingGroup.RETRIEVAL, 

784 help_text=( 

785 "MMR lambda balancing relevance vs diversity (0 = max diversity, 1 = max relevance)" 

786 ), 

787 ), 

788 "temporal_filtering": SettingDef( 

789 bool, 

790 nullable=False, 

791 group=SettingGroup.RETRIEVAL, 

792 help_text="Detect temporal queries and bias retrieval toward recent chunks", 

793 ), 

794 "hyde": SettingDef( 

795 bool, 

796 nullable=False, 

797 group=SettingGroup.RETRIEVAL, 

798 help_text="Use HyDE (hypothetical answer expansion) to broaden retrieval", 

799 ), 

800 "hyde_weight": SettingDef( 

801 float, 

802 nullable=False, 

803 group=SettingGroup.RETRIEVAL, 

804 help_text="Weight on the HyDE-generated query vector when blending with the original", 

805 ), 

806 "query_expansion_count": SettingDef( 

807 int, 

808 nullable=False, 

809 group=SettingGroup.RETRIEVAL, 

810 help_text="Number of paraphrase expansions per query (0 disables expansion)", 

811 ), 

812 "expansion_similarity_threshold": SettingDef( 

813 float, 

814 nullable=False, 

815 group=SettingGroup.RETRIEVAL, 

816 help_text="Minimum cosine similarity an expansion must keep with the original query", 

817 ), 

818 "expansion_short_query_tokens": SettingDef( 

819 int, 

820 nullable=False, 

821 group=SettingGroup.RETRIEVAL, 

822 help_text="Queries at or below this token count skip expansion (saves a model call)", 

823 ), 

824 "expansion_guardrails": SettingDef( 

825 bool, 

826 nullable=False, 

827 group=SettingGroup.RETRIEVAL, 

828 help_text="Drop expansions that diverge from the original intent", 

829 ), 

830 "adaptive_threshold_step": SettingDef( 

831 float, 

832 nullable=False, 

833 group=SettingGroup.RETRIEVAL, 

834 help_text="Step size for adaptive relevance-score relaxation when initial recall is empty", 

835 ), 

836 "concept_graph": SettingDef( 

837 bool, 

838 nullable=False, 

839 group=SettingGroup.RETRIEVAL, 

840 help_text="Boost retrieval scores for chunks that share concepts with the query", 

841 ), 

842 "concept_boost_weight": SettingDef( 

843 float, 

844 nullable=False, 

845 group=SettingGroup.RETRIEVAL, 

846 help_text="Maximum boost (0-1) the concept graph can add to a chunk's relevance", 

847 ), 

848 "concept_boost_floor": SettingDef( 

849 float, 

850 nullable=False, 

851 group=SettingGroup.RETRIEVAL, 

852 help_text="Minimum cosine similarity needed before the concept graph boosts a chunk", 

853 ), 

854 "concept_max_per_chunk": SettingDef( 

855 int, 

856 nullable=False, 

857 group=SettingGroup.RETRIEVAL, 

858 help_text="Maximum concept tags stored per chunk (caps graph density)", 

859 ), 

860 "documents_dir": SettingDef( 

861 str, 

862 nullable=False, 

863 group=SettingGroup.SYSTEM, 

864 help_text="Local documents root that lilbee sync ingests (blank = data_root/documents)", 

865 ), 

866 "vault_base": SettingDef( 

867 str, 

868 nullable=True, 

869 group=SettingGroup.SYSTEM, 

870 help_text="Markdown vault root; results carry a vault-relative path (blank = none)", 

871 ), 

872 "sse_heartbeat_interval": SettingDef( 

873 float, 

874 nullable=False, 

875 group=SettingGroup.SYSTEM, 

876 help_text="Seconds between SSE keep-alive frames sent to idle HTTP stream clients", 

877 hidden=True, 

878 ), 

879 "llm_provider": SettingDef( 

880 str, 

881 nullable=False, 

882 group=SettingGroup.API_KEYS, 

883 choices=("auto", "llama-cpp", "remote"), 

884 help_text=( 

885 "Provider routing: auto picks the first key present; force a specific one when set" 

886 ), 

887 ), 

888 "ollama_base_url": SettingDef( 

889 str, 

890 nullable=False, 

891 group=SettingGroup.LOCAL_SERVERS, 

892 help_text="Ollama server URL (blank uses http://localhost:11434)", 

893 ), 

894 "lm_studio_base_url": SettingDef( 

895 str, 

896 nullable=False, 

897 group=SettingGroup.LOCAL_SERVERS, 

898 help_text="LM Studio server URL (blank uses http://localhost:1234/v1)", 

899 ), 

900 "wiki_summary_max_tokens": SettingDef( 

901 int, 

902 nullable=False, 

903 group=SettingGroup.WIKI, 

904 help_text="Maximum tokens generated per wiki page", 

905 ), 

906 "wiki_temperature": SettingDef( 

907 float, 

908 nullable=False, 

909 group=SettingGroup.WIKI, 

910 help_text="Temperature used for wiki page synthesis (low = stay close to sources)", 

911 ), 

912}