Coverage for src/lilbee/app/settings.py: 100%

1"""Canonical write boundary for lilbee configuration."""

3from __future__ import annotations

5import types

6from dataclasses import dataclass

7from pathlib import Path

8from typing import Any, Union, get_args, get_origin

10from pydantic_core import PydanticUndefined

12from lilbee.app.settings_map import SETTINGS_MAP, SettingDef, SettingGroup

13from lilbee.config_meta import (

14 MODEL_ROLE_FIELDS,

15 REINDEX_FIELDS,

16 WRITABLE_CONFIG_FIELDS,

17)

18from lilbee.core import settings as persistent_settings

19from lilbee.core.config import Config, cfg

20from lilbee.core.config.keys import (

21 LOAD_AFFECTING_KEYS,

22 PER_CALL_RELOADABLE_KEYS,

23 PROVIDER_API_KEYS,

24 PROVIDER_SWITCHING_KEYS,

25)

27_MIN_CHUNK_SIZE = 64

29# Path-typed writable fields whose pydantic "default" is the unresolved

30# sentinel ``Path()`` (a literal "."). The actual default is computed by

31# the model_validator at process start (data_root/documents, vault_base

32# stays as None). Resetting these via the boundary would corrupt the

33# install, so they are refused at the reset gate.

34_NO_RESET_FIELDS: frozenset[str] = frozenset({"documents_dir"})

37@dataclass(frozen=True)

38class SettingInfo:

39 """Externally-facing description of a single writable setting."""

41 key: str

42 value: Any

43 default: Any

44 type: str

45 nullable: bool

46 group: SettingGroup

47 help_text: str

48 choices: tuple[str, ...] | None

49 reindex_required: bool

52@dataclass(frozen=True)

53class SettingsUpdateResult:

54 """Outcome of an ``apply_settings_update`` call."""

56 updated: list[str]

57 reindex_required: bool

60_SCALAR_TYPE_NAMES: dict[type, str] = {

61 bool: "bool",

62 int: "int",

63 float: "float",

64 str: "str",

65 Path: "str",

66 type(None): "null",

67}

68_COLLECTION_ORIGINS = (list, frozenset, set, tuple)

69_UNION_ORIGINS = (Union, types.UnionType)

72def _annotation_name(annotation: Any) -> str:

73 """Render a pydantic field annotation as a short MCP-friendly type string."""

74 origin = get_origin(annotation)

75 if origin in _UNION_ORIGINS:

76 return "|".join(_annotation_name(a) for a in get_args(annotation))

77 scalar = _SCALAR_TYPE_NAMES.get(annotation)

78 if scalar is not None:

79 return scalar

80 if origin in _COLLECTION_ORIGINS:

81 return "list"

82 return getattr(annotation, "__name__", None) or str(annotation)

85def _setting_default(key: str) -> Any:

86 """Return the pydantic default for ``key``, or ``None`` if unset."""

87 info = Config.model_fields[key]

88 if info.default_factory is not None:

89 return info.default_factory() # type: ignore[call-arg]

90 if info.default is PydanticUndefined:

91 return None

92 return info.default

95def _is_write_only(key: str) -> bool:

96 """Return True for fields persisted but never read back (API keys, hf_token)."""

97 extra = Config.model_fields[key].json_schema_extra

98 if isinstance(extra, dict):

99 return bool(extra.get("write_only", False))

100 return False

101

102

103def _public_writable_keys() -> list[str]:

104 """Names of every writable config field minus write-only secrets."""

105 keys = set(WRITABLE_CONFIG_FIELDS) | set(MODEL_ROLE_FIELDS)

106 return sorted(k for k in keys if not _is_write_only(k))

107

108

109def _setting_info(key: str, definition: SettingDef | None) -> SettingInfo:

110 field_info = Config.model_fields[key]

111 nullable = _is_nullable(key)

112 group = definition.group if definition else SettingGroup.MODELS

113 help_text = definition.help_text if definition else ""

114 choices = definition.choices if definition else None

115 return SettingInfo(

116 key=key,

117 value=getattr(cfg, key),

118 default=_setting_default(key),

119 type=_annotation_name(field_info.annotation),

120 nullable=nullable,

121 group=group,

122 help_text=help_text,

123 choices=choices,

124 reindex_required=key in REINDEX_FIELDS,

125 )

126

127

128def _parse_group(group: SettingGroup | str) -> SettingGroup:

129 """Resolve a group value or label to a ``SettingGroup``. Case-insensitive on the value."""

130 if isinstance(group, SettingGroup):

131 return group

132 normalized = group.strip().lower()

133 for candidate in SettingGroup:

134 if candidate.value.lower() == normalized:

135 return candidate

136 raise ValueError(

137 f"Unknown setting group: {group!r}. Valid groups: "

138 f"{', '.join(g.value for g in SettingGroup)}"

139 )

140

141

142def list_settings(group: SettingGroup | str | None = None) -> list[SettingInfo]:

143 """List every writable non-secret setting, optionally filtered by group (case-insensitive)."""

144 infos = [_setting_info(key, SETTINGS_MAP.get(key)) for key in _public_writable_keys()]

145 if group is not None:

146 wanted = _parse_group(group)

147 infos = [info for info in infos if info.group == wanted]

148 return sorted(infos, key=lambda info: (info.group.value, info.key))

149

150

151def get_setting(key: str) -> SettingInfo:

152 """Return the ``SettingInfo`` for one writable non-secret key."""

153 if not _is_settable(key):

154 raise KeyError(f"Unknown or read-only setting: {key}")

155 if _is_write_only(key):

156 raise KeyError(f"Setting '{key}' is write-only and cannot be read back")

157 return _setting_info(key, SETTINGS_MAP.get(key))

158

159

160def _is_settable(key: str) -> bool:

161 return key in WRITABLE_CONFIG_FIELDS or key in MODEL_ROLE_FIELDS

162

163

164def _is_nullable(key: str) -> bool:

165 """Return True if ``key`` accepts ``None`` to clear the persisted entry."""

166 if key in WRITABLE_CONFIG_FIELDS:

167 return WRITABLE_CONFIG_FIELDS[key]

168 return False

169

170

171def _validate(updates: dict[str, Any]) -> None:

172 """Reject unknown keys, null on non-nullable, and out-of-range chunk sizes."""

173 for key, value in updates.items():

174 if not _is_settable(key):

175 raise ValueError(f"Unknown or read-only setting: {key}")

176 if value is None and not _is_nullable(key):

177 raise ValueError(f"Setting '{key}' does not accept null")

178 new_chunk_size = updates.get("chunk_size")

179 if isinstance(new_chunk_size, int) and new_chunk_size < _MIN_CHUNK_SIZE:

180 raise ValueError(f"chunk_size must be >= {_MIN_CHUNK_SIZE}")

181 effective_chunk_size = new_chunk_size if isinstance(new_chunk_size, int) else cfg.chunk_size

182 new_overlap = updates.get("chunk_overlap")

183 if isinstance(new_overlap, int) and new_overlap >= effective_chunk_size:

184 raise ValueError(

185 f"chunk_overlap ({new_overlap}) must be < chunk_size ({effective_chunk_size})"

186 )

187

188

189def _coerce_value(key: str, value: Any) -> Any:

190 """Canonicalize value before cfg assignment; model-role slots run task validation."""

191 if key in MODEL_ROLE_FIELDS and isinstance(value, str):

192 # heavy: role_validator pulls catalog + modelhub transitively (~300 ms)

193 from lilbee.modelhub.role_validator import validate_model_task_assignment

194

195 return validate_model_task_assignment(key, value)

196 return value

197

198

199def _apply_with_rollback(

200 updates: dict[str, Any],

201) -> tuple[dict[str, str], list[str], dict[str, Any]]:

202 """Set each key on cfg with snapshot/rollback. Returns (persist, delete, snapshot)."""

203 snapshot = {k: getattr(cfg, k) for k in updates}

204 to_persist: dict[str, str] = {}

205 to_delete: list[str] = []

206 try:

207 for key, raw in updates.items():

208 if raw is None:

209 setattr(cfg, key, None)

210 to_delete.append(key)

211 continue

212 setattr(cfg, key, _coerce_value(key, raw))

213 normalized = getattr(cfg, key)

214 if isinstance(normalized, list):

215 to_persist[key] = "\n".join(str(x) for x in normalized)

216 else:

217 to_persist[key] = str(normalized)

218 except Exception:

219 _restore_snapshot(snapshot)

220 raise

221 return to_persist, to_delete, snapshot

222

223

224def _restore_snapshot(snapshot: dict[str, Any]) -> None:

225 for key, value in snapshot.items():

226 setattr(cfg, key, value)

227

228

229def _invalidate_caches(changed_keys: set[str]) -> None:

230 """Drop every read-side cache whose freshness depends on a changed setting."""

231 if not changed_keys:

232 return

233 if changed_keys & MODEL_ROLE_FIELDS:

234 # heavy: model_info reads GGUF headers via llama-cpp (~130 ms)

235 from lilbee.modelhub.model_info import invalidate_cache as invalidate_arch_cache

236

237 invalidate_arch_cache()

238 load_affecting = (changed_keys & LOAD_AFFECTING_KEYS) - PER_CALL_RELOADABLE_KEYS

239 if load_affecting:

240 # heavy: app.services pulls llama_cpp + lancedb (~70 ms)

241 from lilbee.app.services import peek_services

242

243 services = peek_services()

244 if services is not None:

245 # model_path=None drops every loaded role; the changed key may be

246 # role-agnostic (num_ctx) or role-specific, and per-role granularity

247 # would force a key->role map that adds nothing over a full drop.

248 services.provider.invalidate_load_cache()

249 if changed_keys & PROVIDER_API_KEYS:

250 # heavy: sdk_llm_provider pulls litellm fanout (~145 ms)

251 from lilbee.providers.sdk_llm_provider import inject_provider_keys

252

253 inject_provider_keys()

254 if changed_keys & PROVIDER_SWITCHING_KEYS:

255 # Swap requires reconstructing the provider singleton via

256 # providers.factory.create_provider, only called at services init.

257 from lilbee.app.services import reset_services

258

259 reset_services()

260

261

262def apply_settings_update(

263 updates: dict[str, Any],

264 *,

265 allow_model_roles: bool = True,

266) -> SettingsUpdateResult:

267 """Validate, apply, persist, and invalidate caches for a batch of updates.

268

269 Atomic on validation: a rejection rolls every field back and writes

270 nothing. Atomic on disk failure: an ``OSError`` from the TOML write

271 also restores the in-memory snapshot before re-raising. Cache

272 invalidation runs only after a successful persist.

273

274 Pass ``allow_model_roles=False`` to reject ``chat_model`` /

275 ``embedding_model`` / ``vision_model`` / ``reranker_model`` at the

276 boundary; the HTTP PATCH /api/config surface uses this to route role

277 writes through PUT /api/models/<role>.

278 """

279 if not allow_model_roles:

280 rejected = MODEL_ROLE_FIELDS & set(updates)

281 if rejected:

282 offender = sorted(rejected)[0]

283 raise ValueError(

284 f"'{offender}' must be set through the dedicated model route, "

285 "not the general settings update."

286 )

287 _validate(updates)

288 embed_in_batch = "embedding_model" in updates

289 if embed_in_batch:

290 # Pin the OLD ref into store meta before mutation, otherwise the

291 # next read lazy-initializes meta from the NEW cfg and silently

292 # hides the dimension drift. Runs even when the value is unchanged

293 # so a legacy meta row is always canonicalized on the first swap

294 # attempt.

295 _pin_legacy_store_meta()

296 to_persist, to_delete, snapshot = _apply_with_rollback(updates)

297 try:

298 if to_persist:

299 persistent_settings.update_values(cfg.data_root, to_persist)

300 if to_delete:

301 persistent_settings.delete_values(cfg.data_root, to_delete)

302 except OSError:

303 _restore_snapshot(snapshot)

304 raise

305 _invalidate_caches(set(updates))

306 reindex_required = bool(REINDEX_FIELDS & set(updates))

307 if embed_in_batch:

308 reindex_required = reindex_required or _embed_reindex_required(updates["embedding_model"])

309 return SettingsUpdateResult(

310 updated=sorted(updates),

311 reindex_required=reindex_required,

312 )

313

314

315def _pin_legacy_store_meta() -> None:

316 """Pin the current embedding ref into store meta before swapping it."""

317 # heavy: ~100ms (lance + store init); only paid when embedding_model is in the batch.

318 from lilbee.app.services import get_services

319

320 get_services().store.initialize_meta_if_legacy()

321

322

323def _embed_reindex_required(new_ref: str) -> bool:

324 """Compare *new_ref* to the persisted store meta; True if rebuild needed."""

325 from lilbee.app.services import get_services

326 from lilbee.data.store.lance_helpers import refs_compatible

327

328 store = get_services().store

329 store.canonicalize_meta_if_legacy()

330 meta = store.get_meta()

331 if meta is None:

332 return False

333 return not refs_compatible(

334 meta["embedding_model"], new_ref, meta["embedding_dim"], meta["embedding_dim"]

335 )

336

337

338def reset_settings(keys: list[str], *, skip_unresettable: bool = False) -> SettingsUpdateResult:

339 """Reset each key to its pydantic default and apply through the write boundary.

340

341 Fields whose default is a known sentinel (currently ``documents_dir``,

342 which resolves to ``data_root/documents`` at process start) are

343 refused so a reset doesn't write the literal sentinel back. Pass

344 ``skip_unresettable=True`` for bulk-reset gestures that should drop

345 those fields rather than failing the whole batch.

346 """

347 for key in keys:

348 if not _is_settable(key):

349 raise ValueError(f"Unknown or read-only setting: {key}")

350 if key in _NO_RESET_FIELDS and not skip_unresettable:

351 raise ValueError(

352 f"'{key}' has no resettable default; pass an explicit value via settings_set."

353 )

354 updates: dict[str, Any] = {}

355 for key in keys:

356 if key in _NO_RESET_FIELDS:

357 continue

358 default = _setting_default(key)

359 if default is None and _is_nullable(key):

360 updates[key] = None

361 else:

362 updates[key] = default

363 return apply_settings_update(updates)

Coverage for src / lilbee / app / settings.py: 100%

198 statements