Coverage for src/lilbee/cli/commands/setup.py: 100%

1"""Token (server auth), HuggingFace login, self-check, and crawler-setup commands."""

3from __future__ import annotations

5import asyncio

6import importlib

7import json

8from pathlib import Path

9from typing import Any

11import typer

13from lilbee.cli import theme

14from lilbee.cli.app import (

15 apply_overrides,

16 console,

17 data_dir_option,

18 global_option,

19)

20from lilbee.cli.helpers import json_output

21from lilbee.cli.tui import messages as msg

22from lilbee.core.config import cfg

23from lilbee.crawler import CrawlerBrowserError, bootstrap_chromium, chromium_installed

24from lilbee.runtime.progress import EventType, SetupProgressEvent

26_SELF_CHECK_CHAT_REPO = "Qwen/Qwen3-0.6B-GGUF"

27_SELF_CHECK_CHAT_FILE = "Qwen3-0.6B-Q8_0.gguf"

28_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF"

29_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf"

32def _download_self_check_model(repo: str, filename: str) -> Path:

33 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only).

35 Avoids huggingface_hub / httpx entirely. Inside the Nuitka --onefile

36 binary, huggingface_hub's retry path has re-entered a closed httpx client

37 after transient DNS failures on macOS runners. urllib is synchronous,

38 lives in the stdlib, and has no long-lived client to close.

39 """

40 import tempfile

41 import urllib.request

43 url = f"https://huggingface.co/{repo}/resolve/main/{filename}"

44 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-"))

45 dest = dest_dir / filename

46 console.print(f"Downloading {url}")

47 last_exc: BaseException | None = None

48 for attempt in range(3):

49 try:

50 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 literal https url

51 dest.write_bytes(response.read())

52 return dest

53 except (OSError, urllib.error.URLError) as exc:

54 last_exc = exc

55 console.print(f"download attempt {attempt + 1} failed: {exc!r}")

56 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}")

59_self_check_chat_path_option = typer.Option(

60 None,

61 "--chat-model-path",

62 help="Path to a chat GGUF file. Skips the HuggingFace download.",

63)

64_self_check_embed_path_option = typer.Option(

65 None,

66 "--embed-model-path",

67 help="Path to an embedding GGUF file. Skips the HuggingFace download.",

68)

69_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.")

70_self_check_skip_embedding_option = typer.Option(

71 False,

72 "--skip-embedding",

73 help="Skip the embedding-model leg of the self-check.",

74)

77def _self_check_emit_failure(error: str) -> None:

78 if cfg.json_mode:

79 json_output({"ok": False, "error": error})

80 else:

81 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}")

84def _resolved_provider_kwargs() -> dict[str, Any]:

85 """Snapshot of the provider-stack knobs self-check exercises.

87 Echoed back in the JSON payload + human readout so users can confirm

88 which dynamic ctx / FA / KV cache / GPU layers values their install

89 chose without grepping debug logs.

90 """

91 return {

92 "num_ctx": cfg.num_ctx,

93 "num_ctx_max": cfg.num_ctx_max,

94 "flash_attention": cfg.flash_attention,

95 "kv_cache_type": cfg.kv_cache_type.value,

96 "n_gpu_layers": cfg.n_gpu_layers,

97 "main_gpu": cfg.main_gpu,

98 "gpu_devices": cfg.gpu_devices,

99 }

100

101

102def self_check_cmd(

103 chat_model_path: Path | None = _self_check_chat_path_option,

104 embed_model_path: Path | None = _self_check_embed_path_option,

105 max_tokens: int = _self_check_max_tokens_option,

106 skip_embedding: bool = _self_check_skip_embedding_option,

107) -> None:

108 """Verify the installation can load llama.cpp and run real inference.

109

110 Routes both legs through :func:`lilbee.providers.llama_cpp.provider.load_llama`

111 so the dynamic-``n_ctx`` picker, flash-attention default, KV cache type,

112 ``n_gpu_layers`` resolution, and OOM retry path all run -- i.e. the same

113 provider stack a real ``lilbee ask`` / ``lilbee chat`` exercises. Failure

114 here means either the vendored shared libraries don't load or one of the

115 cfg-driven provider knobs is misconfigured for the host.

116

117 Two legs:

118

119 1. **Chat**: downloads ``Qwen3-0.6B-Q8_0.gguf`` (~500MB),

120 runs ``load_llama(..., mode=LoaderMode.CHAT)`` so the dynamic-ctx picker /

121 flash-attention default / KV cache mapping fire, then issues a tiny

122 ``create_completion``.

123 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB),

124 runs ``load_llama(..., mode=LoaderMode.EMBED)`` so the embed-mode ctx clamp

125 fires, then issues ``create_embedding``. Catches the "Memory is not

126 initialized" assert from llama-cpp-python <0.3.19, where BERT-style

127 encoders trip ``kv_cache_clear`` on a context that never allocated

128 memory.

129

130 Exits 0 on success, 1 on any failure. Intended for post-install

131 verification and as the end-to-end gate in release CI.

132 """

133 from typing import cast

134

135 from lilbee.providers.llama_cpp.provider import load_llama

136 from lilbee.providers.model_cache import LoaderMode

137

138 try:

139 chat_path = chat_model_path or _download_self_check_model(

140 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE

141 )

142 console.print(f"Loading chat model {chat_path}")

143

144 llm = load_llama(chat_path, mode=LoaderMode.CHAT)

145 # stream=False (default) returns a dict, not an iterator, but

146 # create_completion's return type is a union; cast to Any so the

147 # indexing below type-checks without forcing llama_cpp to be a

148 # typecheck-time dep of lilbee.

149 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens))

150 text: str = out["choices"][0]["text"]

151 except Exception as exc:

152 _self_check_emit_failure(repr(exc))

153 raise typer.Exit(1) from exc

154

155 if not text.strip():

156 _self_check_emit_failure("empty inference response")

157 raise typer.Exit(1)

158

159 embedding_dims: int | None = None

160 if not skip_embedding:

161 try:

162 embed_path = embed_model_path or _download_self_check_model(

163 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE

164 )

165 console.print(f"Loading embedding model {embed_path}")

166 enc = load_llama(embed_path, mode=LoaderMode.EMBED)

167 emb = cast(Any, enc.create_embedding(input=["test"]))

168 vec = emb["data"][0]["embedding"]

169 except Exception as exc:

170 _self_check_emit_failure(repr(exc))

171 raise typer.Exit(1) from exc

172

173 if not vec:

174 _self_check_emit_failure("empty embedding vector")

175 raise typer.Exit(1)

176 embedding_dims = len(vec)

177

178 provider_kwargs = _resolved_provider_kwargs()

179 if cfg.json_mode:

180 payload: dict[str, Any] = {

181 "ok": True,

182 "chat_response": text,

183 "chat_model": str(chat_path),

184 "provider": provider_kwargs,

185 }

186 if embedding_dims is not None:

187 payload["embedding_dims"] = embedding_dims

188 json_output(payload)

189 else:

190 console.print(f"Chat response: {text!r}")

191 if embedding_dims is not None:

192 console.print(f"Embedding dims: {embedding_dims}")

193 console.print(

194 f"Provider: num_ctx={provider_kwargs['num_ctx']} "

195 f"num_ctx_max={provider_kwargs['num_ctx_max']} "

196 f"flash_attention={provider_kwargs['flash_attention']} "

197 f"kv_cache_type={provider_kwargs['kv_cache_type']} "

198 f"n_gpu_layers={provider_kwargs['n_gpu_layers']} "

199 f"main_gpu={provider_kwargs['main_gpu']} "

200 f"gpu_devices={provider_kwargs['gpu_devices']}"

201 )

202 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]")

203

204

205_SELF_CHECK_EXTRAS = ("litellm", "crawl4ai", "spacy", "graspologic_native")

206

207

208def self_check_extras_cmd() -> None:

209 """Verify optional extras (crawler, litellm, graph) are bundled and importable."""

210 results: dict[str, Any] = {}

211 failed: list[str] = []

212 for name in _SELF_CHECK_EXTRAS:

213 try:

214 importlib.import_module(name)

215 results[name] = True

216 except ImportError as exc:

217 results[name] = False

218 results[f"{name}_error"] = str(exc)

219 failed.append(name)

220

221 if cfg.json_mode:

222 json_output({"ok": not failed, **results})

223 else:

224 for name in _SELF_CHECK_EXTRAS:

225 ok = results.get(name) is True

226 tag = (

227 f"[{theme.ACCENT}]ok[/{theme.ACCENT}]"

228 if ok

229 else f"[{theme.ERROR}]MISSING[/{theme.ERROR}]"

230 )

231 console.print(f" {name}: {tag}")

232 if not ok:

233 console.print(f" {results.get(f'{name}_error', '')}")

234

235 if failed:

236 raise typer.Exit(1)

237

238

239def token(

240 data_dir: Path | None = data_dir_option,

241 use_global: bool = global_option,

242) -> None:

243 """Print the auth token for a running server."""

244 from lilbee.server.auth import server_json_path

245

246 apply_overrides(data_dir=data_dir, use_global=use_global)

247 path = server_json_path()

248 if not path.exists():

249 if cfg.json_mode:

250 json_output({"error": "No running server found"})

251 else:

252 console.print("No running server found (server.json missing).")

253 raise SystemExit(1)

254 try:

255 data = json.loads(path.read_text())

256 tok = data.get("token", "")

257 except (json.JSONDecodeError, OSError) as exc:

258 if cfg.json_mode:

259 json_output({"error": f"Could not read server.json: {exc}"})

260 else:

261 console.print(

262 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}"

263 )

264 raise SystemExit(1) from None

265 if cfg.json_mode:

266 json_output({"token": tok})

267 return

268 console.print(tok)

269

270

271def login() -> None:

272 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.)."""

273 import webbrowser

274

275 from huggingface_hub import get_token

276 from huggingface_hub import login as hf_login

277

278 if get_token():

279 typer.echo("Already logged in to HuggingFace.")

280 if not typer.confirm("Log in again?", default=False):

281 return

282

283 typer.echo("Opening HuggingFace token page in your browser...")

284 typer.echo("Create a token with 'Read' access, then paste it below.\n")

285 webbrowser.open("https://huggingface.co/settings/tokens")

286

287 token = typer.prompt("Paste your HuggingFace token", hide_input=True)

288 if not token.strip():

289 typer.echo("No token provided.", err=True)

290 raise typer.Exit(1)

291

292 hf_login(token=token.strip(), add_to_git_credential=False)

293 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.")

294

295

296setup_app = typer.Typer(help="One-time setup for optional runtime components.")

297

298

299@setup_app.command(name="crawler")

300def setup_crawler_cmd() -> None:

301 """Install Playwright's Chromium browser, needed for /crawl.

302

303 No-op when Chromium is already present. Emits a simple progress

304 readout; use '--json' mode on the top-level 'lilbee' command to get

305 a single JSON blob with the final install state instead.

306 """

307 if chromium_installed():

308 if cfg.json_mode:

309 typer.echo(json.dumps({"component": "chromium", "already_installed": True}))

310 else:

311 typer.echo("Chromium already installed.")

312 return

313

314 last_pct: list[int] = [-1]

315

316 def _on_progress(event_type: object, data: object) -> None:

317 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent):

318 return

319 total = data.total_bytes or 0

320 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0

321 if pct != last_pct[0] and not cfg.json_mode:

322 last_pct[0] = pct

323 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True)

324

325 try:

326 asyncio.run(bootstrap_chromium(on_progress=_on_progress))

327 except CrawlerBrowserError as exc:

328 if cfg.json_mode:

329 typer.echo(json.dumps({"component": "chromium", "error": str(exc)}))

330 else:

331 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED)

332 raise typer.Exit(code=1) from exc

333

334 if cfg.json_mode:

335 typer.echo(json.dumps({"component": "chromium", "installed": True}))

336 else:

337 typer.echo("Chromium installed.")

Coverage for src / lilbee / cli / commands / setup.py: 100%

174 statements