Coverage for src / lilbee / cli / commands / setup.py: 100%

174 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Token (server auth), HuggingFace login, self-check, and crawler-setup commands.""" 

2 

3from __future__ import annotations 

4 

5import asyncio 

6import importlib 

7import json 

8from pathlib import Path 

9from typing import Any 

10 

11import typer 

12 

13from lilbee.cli import theme 

14from lilbee.cli.app import ( 

15 apply_overrides, 

16 console, 

17 data_dir_option, 

18 global_option, 

19) 

20from lilbee.cli.helpers import json_output 

21from lilbee.cli.tui import messages as msg 

22from lilbee.core.config import cfg 

23from lilbee.crawler import CrawlerBrowserError, bootstrap_chromium, chromium_installed 

24from lilbee.runtime.progress import EventType, SetupProgressEvent 

25 

26_SELF_CHECK_CHAT_REPO = "Qwen/Qwen3-0.6B-GGUF" 

27_SELF_CHECK_CHAT_FILE = "Qwen3-0.6B-Q8_0.gguf" 

28_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF" 

29_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf" 

30 

31 

32def _download_self_check_model(repo: str, filename: str) -> Path: 

33 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only). 

34 

35 Avoids huggingface_hub / httpx entirely. Inside the Nuitka --onefile 

36 binary, huggingface_hub's retry path has re-entered a closed httpx client 

37 after transient DNS failures on macOS runners. urllib is synchronous, 

38 lives in the stdlib, and has no long-lived client to close. 

39 """ 

40 import tempfile 

41 import urllib.request 

42 

43 url = f"https://huggingface.co/{repo}/resolve/main/{filename}" 

44 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-")) 

45 dest = dest_dir / filename 

46 console.print(f"Downloading {url}") 

47 last_exc: BaseException | None = None 

48 for attempt in range(3): 

49 try: 

50 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 literal https url 

51 dest.write_bytes(response.read()) 

52 return dest 

53 except (OSError, urllib.error.URLError) as exc: 

54 last_exc = exc 

55 console.print(f"download attempt {attempt + 1} failed: {exc!r}") 

56 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}") 

57 

58 

59_self_check_chat_path_option = typer.Option( 

60 None, 

61 "--chat-model-path", 

62 help="Path to a chat GGUF file. Skips the HuggingFace download.", 

63) 

64_self_check_embed_path_option = typer.Option( 

65 None, 

66 "--embed-model-path", 

67 help="Path to an embedding GGUF file. Skips the HuggingFace download.", 

68) 

69_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.") 

70_self_check_skip_embedding_option = typer.Option( 

71 False, 

72 "--skip-embedding", 

73 help="Skip the embedding-model leg of the self-check.", 

74) 

75 

76 

77def _self_check_emit_failure(error: str) -> None: 

78 if cfg.json_mode: 

79 json_output({"ok": False, "error": error}) 

80 else: 

81 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}") 

82 

83 

84def _resolved_provider_kwargs() -> dict[str, Any]: 

85 """Snapshot of the provider-stack knobs self-check exercises. 

86 

87 Echoed back in the JSON payload + human readout so users can confirm 

88 which dynamic ctx / FA / KV cache / GPU layers values their install 

89 chose without grepping debug logs. 

90 """ 

91 return { 

92 "num_ctx": cfg.num_ctx, 

93 "num_ctx_max": cfg.num_ctx_max, 

94 "flash_attention": cfg.flash_attention, 

95 "kv_cache_type": cfg.kv_cache_type.value, 

96 "n_gpu_layers": cfg.n_gpu_layers, 

97 "main_gpu": cfg.main_gpu, 

98 "gpu_devices": cfg.gpu_devices, 

99 } 

100 

101 

102def self_check_cmd( 

103 chat_model_path: Path | None = _self_check_chat_path_option, 

104 embed_model_path: Path | None = _self_check_embed_path_option, 

105 max_tokens: int = _self_check_max_tokens_option, 

106 skip_embedding: bool = _self_check_skip_embedding_option, 

107) -> None: 

108 """Verify the installation can load llama.cpp and run real inference. 

109 

110 Routes both legs through :func:`lilbee.providers.llama_cpp.provider.load_llama` 

111 so the dynamic-``n_ctx`` picker, flash-attention default, KV cache type, 

112 ``n_gpu_layers`` resolution, and OOM retry path all run -- i.e. the same 

113 provider stack a real ``lilbee ask`` / ``lilbee chat`` exercises. Failure 

114 here means either the vendored shared libraries don't load or one of the 

115 cfg-driven provider knobs is misconfigured for the host. 

116 

117 Two legs: 

118 

119 1. **Chat**: downloads ``Qwen3-0.6B-Q8_0.gguf`` (~500MB), 

120 runs ``load_llama(..., mode=LoaderMode.CHAT)`` so the dynamic-ctx picker / 

121 flash-attention default / KV cache mapping fire, then issues a tiny 

122 ``create_completion``. 

123 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB), 

124 runs ``load_llama(..., mode=LoaderMode.EMBED)`` so the embed-mode ctx clamp 

125 fires, then issues ``create_embedding``. Catches the "Memory is not 

126 initialized" assert from llama-cpp-python <0.3.19, where BERT-style 

127 encoders trip ``kv_cache_clear`` on a context that never allocated 

128 memory. 

129 

130 Exits 0 on success, 1 on any failure. Intended for post-install 

131 verification and as the end-to-end gate in release CI. 

132 """ 

133 from typing import cast 

134 

135 from lilbee.providers.llama_cpp.provider import load_llama 

136 from lilbee.providers.model_cache import LoaderMode 

137 

138 try: 

139 chat_path = chat_model_path or _download_self_check_model( 

140 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE 

141 ) 

142 console.print(f"Loading chat model {chat_path}") 

143 

144 llm = load_llama(chat_path, mode=LoaderMode.CHAT) 

145 # stream=False (default) returns a dict, not an iterator, but 

146 # create_completion's return type is a union; cast to Any so the 

147 # indexing below type-checks without forcing llama_cpp to be a 

148 # typecheck-time dep of lilbee. 

149 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens)) 

150 text: str = out["choices"][0]["text"] 

151 except Exception as exc: 

152 _self_check_emit_failure(repr(exc)) 

153 raise typer.Exit(1) from exc 

154 

155 if not text.strip(): 

156 _self_check_emit_failure("empty inference response") 

157 raise typer.Exit(1) 

158 

159 embedding_dims: int | None = None 

160 if not skip_embedding: 

161 try: 

162 embed_path = embed_model_path or _download_self_check_model( 

163 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE 

164 ) 

165 console.print(f"Loading embedding model {embed_path}") 

166 enc = load_llama(embed_path, mode=LoaderMode.EMBED) 

167 emb = cast(Any, enc.create_embedding(input=["test"])) 

168 vec = emb["data"][0]["embedding"] 

169 except Exception as exc: 

170 _self_check_emit_failure(repr(exc)) 

171 raise typer.Exit(1) from exc 

172 

173 if not vec: 

174 _self_check_emit_failure("empty embedding vector") 

175 raise typer.Exit(1) 

176 embedding_dims = len(vec) 

177 

178 provider_kwargs = _resolved_provider_kwargs() 

179 if cfg.json_mode: 

180 payload: dict[str, Any] = { 

181 "ok": True, 

182 "chat_response": text, 

183 "chat_model": str(chat_path), 

184 "provider": provider_kwargs, 

185 } 

186 if embedding_dims is not None: 

187 payload["embedding_dims"] = embedding_dims 

188 json_output(payload) 

189 else: 

190 console.print(f"Chat response: {text!r}") 

191 if embedding_dims is not None: 

192 console.print(f"Embedding dims: {embedding_dims}") 

193 console.print( 

194 f"Provider: num_ctx={provider_kwargs['num_ctx']} " 

195 f"num_ctx_max={provider_kwargs['num_ctx_max']} " 

196 f"flash_attention={provider_kwargs['flash_attention']} " 

197 f"kv_cache_type={provider_kwargs['kv_cache_type']} " 

198 f"n_gpu_layers={provider_kwargs['n_gpu_layers']} " 

199 f"main_gpu={provider_kwargs['main_gpu']} " 

200 f"gpu_devices={provider_kwargs['gpu_devices']}" 

201 ) 

202 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]") 

203 

204 

205_SELF_CHECK_EXTRAS = ("litellm", "crawl4ai", "spacy", "graspologic_native") 

206 

207 

208def self_check_extras_cmd() -> None: 

209 """Verify optional extras (crawler, litellm, graph) are bundled and importable.""" 

210 results: dict[str, Any] = {} 

211 failed: list[str] = [] 

212 for name in _SELF_CHECK_EXTRAS: 

213 try: 

214 importlib.import_module(name) 

215 results[name] = True 

216 except ImportError as exc: 

217 results[name] = False 

218 results[f"{name}_error"] = str(exc) 

219 failed.append(name) 

220 

221 if cfg.json_mode: 

222 json_output({"ok": not failed, **results}) 

223 else: 

224 for name in _SELF_CHECK_EXTRAS: 

225 ok = results.get(name) is True 

226 tag = ( 

227 f"[{theme.ACCENT}]ok[/{theme.ACCENT}]" 

228 if ok 

229 else f"[{theme.ERROR}]MISSING[/{theme.ERROR}]" 

230 ) 

231 console.print(f" {name}: {tag}") 

232 if not ok: 

233 console.print(f" {results.get(f'{name}_error', '')}") 

234 

235 if failed: 

236 raise typer.Exit(1) 

237 

238 

239def token( 

240 data_dir: Path | None = data_dir_option, 

241 use_global: bool = global_option, 

242) -> None: 

243 """Print the auth token for a running server.""" 

244 from lilbee.server.auth import server_json_path 

245 

246 apply_overrides(data_dir=data_dir, use_global=use_global) 

247 path = server_json_path() 

248 if not path.exists(): 

249 if cfg.json_mode: 

250 json_output({"error": "No running server found"}) 

251 else: 

252 console.print("No running server found (server.json missing).") 

253 raise SystemExit(1) 

254 try: 

255 data = json.loads(path.read_text()) 

256 tok = data.get("token", "") 

257 except (json.JSONDecodeError, OSError) as exc: 

258 if cfg.json_mode: 

259 json_output({"error": f"Could not read server.json: {exc}"}) 

260 else: 

261 console.print( 

262 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}" 

263 ) 

264 raise SystemExit(1) from None 

265 if cfg.json_mode: 

266 json_output({"token": tok}) 

267 return 

268 console.print(tok) 

269 

270 

271def login() -> None: 

272 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.).""" 

273 import webbrowser 

274 

275 from huggingface_hub import get_token 

276 from huggingface_hub import login as hf_login 

277 

278 if get_token(): 

279 typer.echo("Already logged in to HuggingFace.") 

280 if not typer.confirm("Log in again?", default=False): 

281 return 

282 

283 typer.echo("Opening HuggingFace token page in your browser...") 

284 typer.echo("Create a token with 'Read' access, then paste it below.\n") 

285 webbrowser.open("https://huggingface.co/settings/tokens") 

286 

287 token = typer.prompt("Paste your HuggingFace token", hide_input=True) 

288 if not token.strip(): 

289 typer.echo("No token provided.", err=True) 

290 raise typer.Exit(1) 

291 

292 hf_login(token=token.strip(), add_to_git_credential=False) 

293 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.") 

294 

295 

296setup_app = typer.Typer(help="One-time setup for optional runtime components.") 

297 

298 

299@setup_app.command(name="crawler") 

300def setup_crawler_cmd() -> None: 

301 """Install Playwright's Chromium browser, needed for /crawl. 

302 

303 No-op when Chromium is already present. Emits a simple progress 

304 readout; use '--json' mode on the top-level 'lilbee' command to get 

305 a single JSON blob with the final install state instead. 

306 """ 

307 if chromium_installed(): 

308 if cfg.json_mode: 

309 typer.echo(json.dumps({"component": "chromium", "already_installed": True})) 

310 else: 

311 typer.echo("Chromium already installed.") 

312 return 

313 

314 last_pct: list[int] = [-1] 

315 

316 def _on_progress(event_type: object, data: object) -> None: 

317 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent): 

318 return 

319 total = data.total_bytes or 0 

320 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0 

321 if pct != last_pct[0] and not cfg.json_mode: 

322 last_pct[0] = pct 

323 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True) 

324 

325 try: 

326 asyncio.run(bootstrap_chromium(on_progress=_on_progress)) 

327 except CrawlerBrowserError as exc: 

328 if cfg.json_mode: 

329 typer.echo(json.dumps({"component": "chromium", "error": str(exc)})) 

330 else: 

331 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED) 

332 raise typer.Exit(code=1) from exc 

333 

334 if cfg.json_mode: 

335 typer.echo(json.dumps({"component": "chromium", "installed": True})) 

336 else: 

337 typer.echo("Chromium installed.")