Coverage for src/lilbee/cli/settings

1"""Shared settings map for interactive configuration."""

3from __future__ import annotations

5from dataclasses import dataclass, field

6from enum import StrEnum

8from pydantic_core import PydanticUndefined

10from lilbee.cli.tui.app import DARK_THEMES

11from lilbee.core.config import cfg

12from lilbee.core.config.enums import ChatMode, ClustererBackend, KvCacheType, WikiEntityMode

15class RenderStyle(StrEnum):

16 """How a setting is displayed in /settings."""

18 COMPACT = "compact"

19 FULL = "full"

20 LIST_COLLAPSED = "list_collapsed"

21 MULTILINE = "multiline"

24@dataclass(frozen=True)

25class SettingDef:

26 """Metadata for an interactive setting.

28 ``hidden`` keeps the setting out of the TUI settings screen while leaving

29 it reachable via ``lilbee set`` and the ``LILBEE_*`` env var: use it for

30 transport/server knobs that aren't relevant to a typical TUI session.

31 """

33 type: type

34 nullable: bool

35 writable: bool = True

36 render: RenderStyle = field(default=RenderStyle.COMPACT)

37 group: str = "General"

38 help_text: str = ""

39 choices: tuple[str, ...] | None = None

40 hidden: bool = False

43def get_default(key: str) -> object:

44 """Return the cfg default for a setting key."""

45 field_info = type(cfg).model_fields[key]

46 if field_info.default_factory is not None:

47 return field_info.default_factory() # type: ignore[call-arg]

48 if field_info.default is PydanticUndefined:

49 return None

50 return field_info.default

53SETTINGS_MAP: dict[str, SettingDef] = {

54 "chat_model": SettingDef(

55 str,

56 nullable=False,

57 writable=False,

58 group="Models",

59 help_text="LLM used for chat generation (vision and reranking are separate slots)",

60 ),

61 "vision_model": SettingDef(

62 str,

63 nullable=True,

64 writable=False,

65 group="Models",

66 help_text="Vision model for scanned PDF OCR (empty = disabled; Tesseract only)",

67 ),

68 "enable_ocr": SettingDef(

69 bool,

70 nullable=True,

71 group="Ingest",

72 help_text="Vision OCR for scanned PDFs (empty = auto-detect from vision_model)",

73 ),

74 "ocr_timeout": SettingDef(

75 float,

76 nullable=False,

77 group="Ingest",

78 help_text="Per-page timeout in seconds for vision OCR (0 = no limit)",

79 ),

80 "vision_load_budget_s": SettingDef(

81 float,

82 nullable=False,

83 group="Ingest",

84 help_text=(

85 "Wall-clock seconds reserved for the vision worker to load the"

86 " model. Total PDF-OCR budget = load_budget + ocr_timeout * pages."

87 ),

88 ),

89 "semantic_chunking": SettingDef(

90 bool,

91 nullable=False,

92 group="Ingest",

93 help_text="Opt-in topic-aware chunker (default off; may fragment numbered procedures)",

94 ),

95 "topic_threshold": SettingDef(

96 float,

97 nullable=False,

98 group="Ingest",

99 help_text="Topic-boundary similarity threshold, 0.0-1.0, used when semantic chunking is on",

100 ),

101 "embedding_model": SettingDef(

102 str,

103 nullable=False,

104 writable=False,

105 group="Models",

106 help_text="Model used to embed document chunks",

107 ),

108 "reranker_model": SettingDef(

109 str,

110 nullable=True,

111 writable=False,

112 group="Models",

113 help_text="Cross-encoder model for result reranking",

114 ),

115 "temperature": SettingDef(

116 float,

117 nullable=True,

118 group="Generation",

119 help_text="Sampling temperature (higher = more creative)",

120 ),

121 "top_p": SettingDef(

122 float,

123 nullable=True,

124 group="Generation",

125 help_text="Nucleus sampling cutoff probability",

126 ),

127 "top_k_sampling": SettingDef(

128 int,

129 nullable=True,

130 group="Generation",

131 help_text="Top-K sampling: number of tokens to consider",

132 ),

133 "repeat_penalty": SettingDef(

134 float,

135 nullable=True,

136 group="Generation",

137 help_text="Penalty for repeating tokens",

138 ),

139 "num_ctx": SettingDef(

140 int,

141 nullable=True,

142 group="Generation",

143 help_text=(

144 "Context window size in tokens. Leave empty to size automatically "

145 "to the host's available memory (capped at num_ctx_max)."

146 ),

147 ),

148 "num_ctx_max": SettingDef(

149 int,

150 nullable=False,

151 group="Generation",

152 help_text=(

153 "Upper bound for the dynamic context picker when num_ctx is unset. "

154 "Higher allows more retrieval context on hosts with spare memory."

155 ),

156 ),

157 "flash_attention": SettingDef(

158 bool,

159 nullable=True,

160 group="Generation",

161 help_text=(

162 "Flash attention. Empty (auto) tries it on with a fallback for older "

163 "llama-cpp-python builds; resolves the V-cache padding warning on "

164 "models with uneven per-layer V dims."

165 ),

166 ),

167 "kv_cache_type": SettingDef(

168 str,

169 nullable=False,

170 group="Generation",

171 help_text=(

172 "KV cache element type. q8_0 / q4_0 halve or quarter cache memory "

173 "but require flash attention to be enabled."

174 ),

175 choices=tuple(t.value for t in KvCacheType),

176 ),

177 "n_gpu_layers": SettingDef(

178 int,

179 nullable=True,

180 group="Generation",

181 help_text=(

182 "Layers to offload to GPU. Empty = all (recommended), 0 = CPU only, "

183 "positive int = partial offload for tight VRAM."

184 ),

185 ),

186 "gpu_devices": SettingDef(

187 str,

188 nullable=True,

189 group="Generation",

190 help_text=(

191 "Restrict llama.cpp to specific GPU indexes on dual-GPU machines "

192 "(e.g. NVIDIA dGPU + integrated). Comma-separated, like '0' or '0,1'. "

193 "Applies to Vulkan, CUDA, and ROCm. Requires a restart to take effect."

194 ),

195 ),

196 "main_gpu": SettingDef(

197 int,

198 nullable=True,

199 group="Generation",

200 help_text=(

201 "Primary GPU index for llama.cpp when multiple devices are visible. "

202 "Empty = let llama.cpp pick (index 0). Set this together with "

203 "gpu_devices to pin inference to a specific card."

204 ),

205 ),

206 "seed": SettingDef(

207 int,

208 nullable=True,

209 group="Generation",

210 help_text="Random seed for reproducible output",

211 ),

212 "rag_system_prompt": SettingDef(

213 str,

214 nullable=False,

215 render=RenderStyle.MULTILINE,

216 group="Generation",

217 help_text="System prompt sent when answering with retrieved context",

218 ),

219 "general_system_prompt": SettingDef(

220 str,

221 nullable=False,

222 render=RenderStyle.MULTILINE,

223 group="Generation",

224 help_text="System prompt sent when there are no documents to ground the answer",

225 ),

226 "chat_mode": SettingDef(

227 str,

228 nullable=False,

229 group="Generation",

230 choices=tuple(m.value for m in ChatMode),

231 help_text="search runs every chat turn through document retrieval; chat skips it",

232 ),

233 "top_k": SettingDef(

234 int,

235 nullable=False,

236 group="Retrieval",

237 help_text="Number of chunks returned by search",

238 ),

239 "rerank_candidates": SettingDef(

240 int,

241 nullable=False,

242 group="Retrieval",

243 help_text="Candidate pool size for reranking",

244 ),

245 "show_reasoning": SettingDef(

246 bool,

247 nullable=False,

248 group="Display",

249 help_text="Show model reasoning/thinking tokens in output",

250 ),

251 "theme": SettingDef(

252 str,

253 nullable=False,

254 group="Display",

255 help_text="TUI color theme. Cycle with Ctrl+T; the active theme persists across sessions.",

256 choices=tuple(DARK_THEMES),

257 ),

258 "wiki": SettingDef(

259 bool,

260 nullable=False,

261 group="Wiki",

262 help_text="Enable the wiki layer (synthesis pages with citations)",

263 ),

264 "wiki_dir": SettingDef(

265 str,

266 nullable=False,

267 group="Wiki",

268 help_text="Directory under data_root where wiki pages are stored",

269 ),

270 "wiki_prune_raw": SettingDef(

271 bool,

272 nullable=False,

273 group="Wiki",

274 help_text="Delete raw chunks after summarizing into the wiki",

275 ),

276 "wiki_embedding_faithfulness_threshold": SettingDef(

277 float,

278 nullable=False,

279 group="Wiki",

280 help_text=(

281 "Minimum cosine similarity (0-1) between a generated page and "

282 "the mean of its source chunk vectors before publishing. "

283 "Pages below the threshold route to drafts/."

284 ),

285 ),

286 "wiki_stale_citation_threshold": SettingDef(

287 float,

288 nullable=False,

289 group="Wiki",

290 help_text="Fraction of stale citations that triggers page regeneration",

291 ),

292 "wiki_drift_threshold": SettingDef(

293 float,

294 nullable=False,

295 group="Wiki",

296 help_text="Max fraction of changed lines before regeneration requires review",

297 ),

298 "wiki_clusterer": SettingDef(

299 str,

300 nullable=False,

301 group="Wiki",

302 help_text="Synthesis clusterer backend (embedding or concepts)",

303 choices=tuple(b.value for b in ClustererBackend),

304 ),

305 "wiki_entity_mode": SettingDef(

306 str,

307 nullable=False,

308 group="Wiki",

309 help_text=(

310 "Entity extraction strategy "

311 "(ner_entities = default, typed NER entities; "

312 "plus_llm_types = NER + LLM-proposed schema; "

313 "llm_tagged = LLM tags every chunk)"

314 ),

315 choices=tuple(m.value for m in WikiEntityMode),

316 ),

317 "wiki_entity_min_mentions": SettingDef(

318 int,

319 nullable=False,

320 group="Wiki",

321 help_text="Minimum chunk mentions before an entity or concept gets its own page",

322 ),

323 "wiki_concept_max_chunks_per_page": SettingDef(

324 int,

325 nullable=False,

326 group="Wiki",

327 help_text="Maximum chunks passed into each concept or entity page generation call",

328 ),

329 "wiki_related_max": SettingDef(

330 int,

331 nullable=False,

332 group="Wiki",

333 help_text="Maximum related concepts listed in the `## Related` section of each page",

334 ),

335 "wiki_ingest_update_cap": SettingDef(

336 int,

337 nullable=False,

338 group="Wiki",

339 help_text=(

340 "Touched-page cap for auto-update after sync. "

341 "Beyond this count, run `lilbee wiki update` manually."

342 ),

343 ),

344 "wiki_summary_prompt": SettingDef(

345 str,

346 nullable=False,

347 render=RenderStyle.FULL,

348 group="Wiki",

349 help_text=(

350 "Prompt for per-source summary pages. "

351 "Must keep the {source_name} and {chunks_text} placeholders."

352 ),

353 ),

354 "wiki_synthesis_prompt": SettingDef(

355 str,

356 nullable=False,

357 render=RenderStyle.FULL,

358 group="Wiki",

359 help_text=(

360 "Prompt for cross-source synthesis pages. "

361 "Must keep {topic}, {source_list}, and {chunks_text}."

362 ),

363 ),

364 "wiki_entity_batch_prompt": SettingDef(

365 str,

366 nullable=False,

367 render=RenderStyle.FULL,

368 group="Wiki",

369 help_text=(

370 "Prompt for the per-source batched call. "

371 "Must keep {source}, {entity_list}, {chunks_text}, and {concept_instruction}."

372 ),

373 ),

374 "wiki_extract_concepts": SettingDef(

375 bool,

376 nullable=False,

377 group="Wiki",

378 help_text=(

379 "Whether the per-source batched call asks the LLM to curate concept pages "

380 "alongside the pre-extracted entity list."

381 ),

382 ),

383 "wiki_batch_min_chunks": SettingDef(

384 int,

385 nullable=False,

386 group="Wiki",

387 help_text=(

388 "Minimum chunks a source must contribute before its batched call includes "

389 "concept curation. Sources below the floor skip the concept-curation "

390 "instruction; sources with zero entities AND below the floor are skipped entirely."

391 ),

392 ),

393 "wiki_clusterer_k": SettingDef(

394 int,

395 nullable=False,

396 group="Wiki",

397 help_text="Mutual-kNN neighborhood size for the clusterer (0 = auto)",

398 ),

399 "crawl_max_depth": SettingDef(

400 int,

401 nullable=True,

402 group="Crawling",

403 help_text="Optional recursion-depth cap (blank = no cap; per-crawl values win)",

404 ),

405 "crawl_max_pages": SettingDef(

406 int,

407 nullable=True,

408 group="Crawling",

409 help_text="Optional global cap on total pages per crawl (blank = no cap).",

410 ),

411 "crawl_timeout": SettingDef(

412 int,

413 nullable=False,

414 group="Crawling",

415 help_text="Per-page fetch timeout in seconds",

416 ),

417 "crawl_sync_interval": SettingDef(

418 int,

419 nullable=False,

420 group="Crawling",

421 help_text="Seconds between periodic re-syncs during a crawl (0 = sync only at end)",

422 ),

423 "crawl_mean_delay": SettingDef(

424 float,

425 nullable=False,

426 group="Crawling",

427 help_text="Seconds between in-flight requests within a single crawl",

428 ),

429 "crawl_max_delay_range": SettingDef(

430 float,

431 nullable=False,

432 group="Crawling",

433 help_text="Random jitter (seconds) added on top of mean delay",

434 ),

435 "crawl_concurrent_requests": SettingDef(

436 int,

437 nullable=False,

438 group="Crawling",

439 help_text="Concurrent in-flight URLs within one crawl",

440 ),

441 "crawl_retry_on_rate_limit": SettingDef(

442 bool,

443 nullable=False,

444 group="Crawling",

445 help_text="Enable per-domain backoff and retries on HTTP 429/503",

446 ),

447 "crawl_retry_base_delay_min": SettingDef(

448 float,

449 nullable=False,

450 group="Crawling",

451 help_text="Minimum base-delay (seconds) on rate-limit responses",

452 ),

453 "crawl_retry_base_delay_max": SettingDef(

454 float,

455 nullable=False,

456 group="Crawling",

457 help_text="Maximum base-delay (seconds) on rate-limit responses",

458 ),

459 "crawl_retry_max_backoff": SettingDef(

460 float,

461 nullable=False,

462 group="Crawling",

463 help_text="Upper bound on any single backoff wait (seconds)",

464 ),

465 "crawl_retry_max_attempts": SettingDef(

466 int,

467 nullable=False,

468 group="Crawling",

469 help_text="Retry count per URL when a rate-limit code comes back",

470 ),

471 "crawl_exclude_patterns": SettingDef(

472 list,

473 nullable=False,

474 group="Crawling",

475 render=RenderStyle.LIST_COLLAPSED,

476 help_text=(

477 "Regex patterns that skip URLs at link-discovery time during "

478 "recursive crawls. One per line."

479 ),

480 ),

481 "openrouter_api_key": SettingDef(

482 str,

483 nullable=False,

484 group="API-Keys",

485 help_text="OpenRouter API key (enables frontier models in chat picker)",

486 ),

487 "gemini_api_key": SettingDef(

488 str,

489 nullable=False,

490 group="API-Keys",

491 help_text="Google Gemini API key (enables frontier models in chat picker)",

492 ),

493 "anthropic_api_key": SettingDef(

494 str,

495 nullable=False,

496 group="API-Keys",

497 help_text="Anthropic API key (enables frontier models in chat picker)",

498 ),

499 "openai_api_key": SettingDef(

500 str,

501 nullable=False,

502 group="API-Keys",

503 help_text="OpenAI API key (enables frontier models in chat picker)",

504 ),

505 "mistral_api_key": SettingDef(

506 str,

507 nullable=False,

508 group="API-Keys",

509 help_text="Mistral API key (enables frontier models in chat picker)",

510 ),

511 "deepseek_api_key": SettingDef(

512 str,

513 nullable=False,

514 group="API-Keys",

515 help_text="DeepSeek API key (enables frontier models in chat picker)",

516 ),

517 "chunk_size": SettingDef(

518 int,

519 nullable=False,

520 group="Ingest",

521 help_text="Document chunk size in tokens (changes invalidate the index)",

522 ),

523 "chunk_overlap": SettingDef(

524 int,

525 nullable=False,

526 group="Ingest",

527 help_text="Tokens of overlap between adjacent chunks (preserves context across boundaries)",

528 ),

529 "tesseract_timeout": SettingDef(

530 float,

531 nullable=False,

532 group="Ingest",

533 help_text="Per-page Tesseract timeout in seconds (used when no vision model is set)",

534 ),

535 "worker_pool_call_timeout_s": SettingDef(

536 float,

537 nullable=False,

538 group="Ingest",

539 help_text=(

540 "Per-call deadline for one worker-pool round-trip in seconds. "

541 "Raise this for very large embed batches on slow machines"

542 ),

543 ),

544 "worker_pool_eager_start": SettingDef(

545 bool,

546 nullable=False,

547 group="Ingest",

548 help_text=(

549 "Spawn every registered worker at TUI startup instead of on first use. "

550 "Trades 1-3 seconds of cold-start per role for first-call latency"

551 ),

552 ),

553 "worker_pool_max_idle_s": SettingDef(

554 float,

555 nullable=False,

556 group="Ingest",

557 help_text=(

558 "Shut a worker down after this many seconds idle to free RAM/VRAM. "

559 "0 disables idle reaping"

560 ),

561 ),

562 "max_tokens": SettingDef(

563 int,

564 nullable=True,

565 group="Generation",

566 help_text="Hard cap on generated tokens per response (blank = no cap)",

567 ),

568 "max_reasoning_chars": SettingDef(

569 int,

570 nullable=False,

571 group="Generation",

572 help_text=(

573 "Maximum reasoning characters before lilbee forces the model to answer "

574 "(0 = unlimited; per-model overrides apply on top)"

575 ),

576 ),

577 "model_keep_alive": SettingDef(

578 int,

579 nullable=False,

580 group="Generation",

581 help_text="Seconds the loaded model stays warm between calls (0 = unload immediately)",

582 ),

583 "gpu_memory_fraction": SettingDef(

584 float,

585 nullable=False,

586 group="Generation",

587 help_text="Fraction of GPU memory the model is allowed to claim (0.1-1.0)",

588 ),

589 "candidate_multiplier": SettingDef(

590 int,

591 nullable=False,

592 group="Retrieval",

593 help_text="Candidate-pool multiplier over top_k before reranking",

594 ),

595 "max_distance": SettingDef(

596 float,

597 nullable=False,

598 group="Retrieval",

599 help_text="Maximum vector distance for retrieval matches (lower = stricter)",

600 ),

601 "min_relevance_score": SettingDef(

602 float,

603 nullable=False,

604 group="Retrieval",

605 help_text="Minimum RRF relevance score for hybrid search results (0.0 = no filter)",

606 ),

607 "max_context_sources": SettingDef(

608 int,

609 nullable=False,

610 group="Retrieval",

611 help_text="Maximum unique sources contributing chunks to a single answer",

612 ),

613 "diversity_max_per_source": SettingDef(

614 int,

615 nullable=False,

616 group="Retrieval",

617 help_text="Maximum chunks accepted from any one source (caps source dominance)",

618 ),

619 "mmr_lambda": SettingDef(

620 float,

621 nullable=False,

622 group="Retrieval",

623 help_text=(

624 "MMR lambda balancing relevance vs diversity (0 = max diversity, 1 = max relevance)"

625 ),

626 ),

627 "temporal_filtering": SettingDef(

628 bool,

629 nullable=False,

630 group="Retrieval",

631 help_text="Detect temporal queries and bias retrieval toward recent chunks",

632 ),

633 "hyde": SettingDef(

634 bool,

635 nullable=False,

636 group="Retrieval",

637 help_text="Use HyDE (hypothetical answer expansion) to broaden retrieval",

638 ),

639 "hyde_weight": SettingDef(

640 float,

641 nullable=False,

642 group="Retrieval",

643 help_text="Weight on the HyDE-generated query vector when blending with the original",

644 ),

645 "query_expansion_count": SettingDef(

646 int,

647 nullable=False,

648 group="Retrieval",

649 help_text="Number of paraphrase expansions per query (0 disables expansion)",

650 ),

651 "expansion_similarity_threshold": SettingDef(

652 float,

653 nullable=False,

654 group="Retrieval",

655 help_text="Minimum cosine similarity an expansion must keep with the original query",

656 ),

657 "expansion_short_query_tokens": SettingDef(

658 int,

659 nullable=False,

660 group="Retrieval",

661 help_text="Queries at or below this token count skip expansion (saves a model call)",

662 ),

663 "expansion_guardrails": SettingDef(

664 bool,

665 nullable=False,

666 group="Retrieval",

667 help_text="Drop expansions that diverge from the original intent",

668 ),

669 "adaptive_threshold_step": SettingDef(

670 float,

671 nullable=False,

672 group="Retrieval",

673 help_text="Step size for adaptive relevance-score relaxation when initial recall is empty",

674 ),

675 "concept_graph": SettingDef(

676 bool,

677 nullable=False,

678 group="Retrieval",

679 help_text="Boost retrieval scores for chunks that share concepts with the query",

680 ),

681 "concept_boost_weight": SettingDef(

682 float,

683 nullable=False,

684 group="Retrieval",

685 help_text="Maximum boost (0-1) the concept graph can add to a chunk's relevance",

686 ),

687 "concept_boost_floor": SettingDef(

688 float,

689 nullable=False,

690 group="Retrieval",

691 help_text="Minimum cosine similarity needed before the concept graph boosts a chunk",

692 ),

693 "concept_max_per_chunk": SettingDef(

694 int,

695 nullable=False,

696 group="Retrieval",

697 help_text="Maximum concept tags stored per chunk (caps graph density)",

698 ),

699 "documents_dir": SettingDef(

700 str,

701 nullable=False,

702 group="System",

703 help_text="Local documents root that lilbee sync ingests (blank = data_root/documents)",

704 ),

705 "vault_base": SettingDef(

706 str,

707 nullable=True,

708 group="System",

709 help_text="Markdown vault root; results carry a vault-relative path (blank = none)",

710 ),

711 "sse_heartbeat_interval": SettingDef(

712 float,

713 nullable=False,

714 group="System",

715 help_text="Seconds between SSE keep-alive frames sent to idle HTTP stream clients",

716 hidden=True,

717 ),

718 "llm_provider": SettingDef(

719 str,

720 nullable=False,

721 group="API-Keys",

722 choices=("auto", "llama-cpp", "remote"),

723 help_text=(

724 "Provider routing: auto picks the first key present; force a specific one when set"

725 ),

726 ),

727 "remote_base_url": SettingDef(

728 str,

729 nullable=False,

730 group="API-Keys",

731 help_text="OpenAI-compatible base URL (Ollama default: http://localhost:11434)",

732 ),

733 "wiki_summary_max_tokens": SettingDef(

734 int,

735 nullable=False,

736 group="Wiki",

737 help_text="Maximum tokens generated per wiki page",

738 ),

739 "wiki_temperature": SettingDef(

740 float,

741 nullable=False,

742 group="Wiki",

743 help_text="Temperature used for wiki page synthesis (low = stay close to sources)",

744 ),

745}

Coverage for src / lilbee / cli / settings_map.py: 100%

30 statements