Coverage for src / lilbee / cli / commands / setup.py: 100%

174 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""Token (server auth), HuggingFace login, self-check, and crawler-setup commands.""" 

2 

3from __future__ import annotations 

4 

5import asyncio 

6import importlib 

7import json 

8from pathlib import Path 

9from typing import Any 

10 

11import typer 

12 

13from lilbee.cli import theme 

14from lilbee.cli.app import ( 

15 apply_overrides, 

16 console, 

17 data_dir_option, 

18 global_option, 

19) 

20from lilbee.cli.helpers import json_output 

21from lilbee.cli.tui import messages as msg 

22from lilbee.core.config import cfg 

23from lilbee.crawler import CrawlerBrowserError, bootstrap_chromium, chromium_installed 

24from lilbee.runtime.progress import EventType, SetupProgressEvent 

25 

26_SELF_CHECK_CHAT_REPO = "Qwen/Qwen3-0.6B-GGUF" 

27_SELF_CHECK_CHAT_FILE = "Qwen3-0.6B-Q8_0.gguf" 

28_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF" 

29_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf" 

30 

31 

32def _download_self_check_model(repo: str, filename: str) -> Path: 

33 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only). 

34 

35 Avoids huggingface_hub / httpx entirely. Inside the Nuitka --onefile 

36 binary, huggingface_hub's retry path has re-entered a closed httpx client 

37 after transient DNS failures on macOS runners. urllib is synchronous, 

38 lives in the stdlib, and has no long-lived client to close. 

39 """ 

40 import tempfile 

41 import urllib.request 

42 

43 url = f"https://huggingface.co/{repo}/resolve/main/{filename}" 

44 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-")) 

45 dest = dest_dir / filename 

46 console.print(f"Downloading {url}") 

47 last_exc: BaseException | None = None 

48 for attempt in range(3): 

49 try: 

50 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 literal https url 

51 dest.write_bytes(response.read()) 

52 return dest 

53 except (OSError, urllib.error.URLError) as exc: 

54 last_exc = exc 

55 console.print(f"download attempt {attempt + 1} failed: {exc!r}") 

56 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}") 

57 

58 

59_self_check_chat_path_option = typer.Option( 

60 None, 

61 "--chat-model-path", 

62 help="Path to a chat GGUF file. Skips the HuggingFace download.", 

63) 

64_self_check_embed_path_option = typer.Option( 

65 None, 

66 "--embed-model-path", 

67 help="Path to an embedding GGUF file. Skips the HuggingFace download.", 

68) 

69_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.") 

70_self_check_skip_embedding_option = typer.Option( 

71 False, 

72 "--skip-embedding", 

73 help="Skip the embedding-model leg of the self-check.", 

74) 

75 

76 

77def _self_check_emit_failure(error: str) -> None: 

78 if cfg.json_mode: 

79 json_output({"ok": False, "error": error}) 

80 else: 

81 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}") 

82 

83 

84def _resolved_provider_kwargs() -> dict[str, Any]: 

85 """Snapshot of the provider-stack knobs self-check exercises. 

86 

87 Echoed back in the JSON payload + human readout so users can confirm 

88 which dynamic ctx / FA / KV cache / GPU layers values their install 

89 chose without grepping debug logs. 

90 """ 

91 return { 

92 "num_ctx": cfg.num_ctx, 

93 "num_ctx_max": cfg.num_ctx_max, 

94 "chat_n_ctx_target": cfg.chat_n_ctx_target, 

95 "flash_attention": cfg.flash_attention, 

96 "kv_cache_type": cfg.kv_cache_type.value, 

97 "n_gpu_layers": cfg.n_gpu_layers, 

98 "main_gpu": cfg.main_gpu, 

99 "gpu_devices": cfg.gpu_devices, 

100 } 

101 

102 

103def self_check_cmd( 

104 chat_model_path: Path | None = _self_check_chat_path_option, 

105 embed_model_path: Path | None = _self_check_embed_path_option, 

106 max_tokens: int = _self_check_max_tokens_option, 

107 skip_embedding: bool = _self_check_skip_embedding_option, 

108) -> None: 

109 """Verify the installation can load llama.cpp and run real inference. 

110 

111 Routes both legs through :func:`lilbee.providers.llama_cpp.provider.load_llama` 

112 so the dynamic-``n_ctx`` picker, flash-attention default, KV cache type, 

113 ``n_gpu_layers`` resolution, and OOM retry path all run -- i.e. the same 

114 provider stack a real ``lilbee ask`` / ``lilbee chat`` exercises. Failure 

115 here means either the vendored shared libraries don't load or one of the 

116 cfg-driven provider knobs is misconfigured for the host. 

117 

118 Two legs: 

119 

120 1. **Chat**: downloads ``Qwen3-0.6B-Q8_0.gguf`` (~500MB), 

121 runs ``load_llama(..., mode=LoaderMode.CHAT)`` so the dynamic-ctx picker / 

122 flash-attention default / KV cache mapping fire, then issues a tiny 

123 ``create_completion``. 

124 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB), 

125 runs ``load_llama(..., mode=LoaderMode.EMBED)`` so the embed-mode ctx clamp 

126 fires, then issues ``create_embedding``. Catches the "Memory is not 

127 initialized" assert from llama-cpp-python <0.3.19, where BERT-style 

128 encoders trip ``kv_cache_clear`` on a context that never allocated 

129 memory. 

130 

131 Exits 0 on success, 1 on any failure. Intended for post-install 

132 verification and as the end-to-end gate in release CI. 

133 """ 

134 from typing import cast 

135 

136 from lilbee.providers.llama_cpp.provider import load_llama 

137 from lilbee.providers.model_cache import LoaderMode 

138 

139 try: 

140 chat_path = chat_model_path or _download_self_check_model( 

141 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE 

142 ) 

143 console.print(f"Loading chat model {chat_path}") 

144 

145 llm = load_llama(chat_path, mode=LoaderMode.CHAT) 

146 # stream=False (default) returns a dict, not an iterator, but 

147 # create_completion's return type is a union; cast to Any so the 

148 # indexing below type-checks without forcing llama_cpp to be a 

149 # typecheck-time dep of lilbee. 

150 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens)) 

151 text: str = out["choices"][0]["text"] 

152 except Exception as exc: 

153 _self_check_emit_failure(repr(exc)) 

154 raise typer.Exit(1) from exc 

155 

156 if not text.strip(): 

157 _self_check_emit_failure("empty inference response") 

158 raise typer.Exit(1) 

159 

160 embedding_dims: int | None = None 

161 if not skip_embedding: 

162 try: 

163 embed_path = embed_model_path or _download_self_check_model( 

164 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE 

165 ) 

166 console.print(f"Loading embedding model {embed_path}") 

167 enc = load_llama(embed_path, mode=LoaderMode.EMBED) 

168 emb = cast(Any, enc.create_embedding(input=["test"])) 

169 vec = emb["data"][0]["embedding"] 

170 except Exception as exc: 

171 _self_check_emit_failure(repr(exc)) 

172 raise typer.Exit(1) from exc 

173 

174 if not vec: 

175 _self_check_emit_failure("empty embedding vector") 

176 raise typer.Exit(1) 

177 embedding_dims = len(vec) 

178 

179 provider_kwargs = _resolved_provider_kwargs() 

180 if cfg.json_mode: 

181 payload: dict[str, Any] = { 

182 "ok": True, 

183 "chat_response": text, 

184 "chat_model": str(chat_path), 

185 "provider": provider_kwargs, 

186 } 

187 if embedding_dims is not None: 

188 payload["embedding_dims"] = embedding_dims 

189 json_output(payload) 

190 else: 

191 console.print(f"Chat response: {text!r}") 

192 if embedding_dims is not None: 

193 console.print(f"Embedding dims: {embedding_dims}") 

194 console.print( 

195 f"Provider: num_ctx={provider_kwargs['num_ctx']} " 

196 f"num_ctx_max={provider_kwargs['num_ctx_max']} " 

197 f"chat_n_ctx_target={provider_kwargs['chat_n_ctx_target']} " 

198 f"flash_attention={provider_kwargs['flash_attention']} " 

199 f"kv_cache_type={provider_kwargs['kv_cache_type']} " 

200 f"n_gpu_layers={provider_kwargs['n_gpu_layers']} " 

201 f"main_gpu={provider_kwargs['main_gpu']} " 

202 f"gpu_devices={provider_kwargs['gpu_devices']}" 

203 ) 

204 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]") 

205 

206 

207_SELF_CHECK_EXTRAS = ("litellm", "crawl4ai", "spacy", "graspologic_native") 

208 

209 

210def self_check_extras_cmd() -> None: 

211 """Verify optional extras (crawler, litellm, graph) are bundled and importable.""" 

212 results: dict[str, Any] = {} 

213 failed: list[str] = [] 

214 for name in _SELF_CHECK_EXTRAS: 

215 try: 

216 importlib.import_module(name) 

217 results[name] = True 

218 except ImportError as exc: 

219 results[name] = False 

220 results[f"{name}_error"] = str(exc) 

221 failed.append(name) 

222 

223 if cfg.json_mode: 

224 json_output({"ok": not failed, **results}) 

225 else: 

226 for name in _SELF_CHECK_EXTRAS: 

227 ok = results.get(name) is True 

228 tag = ( 

229 f"[{theme.ACCENT}]ok[/{theme.ACCENT}]" 

230 if ok 

231 else f"[{theme.ERROR}]MISSING[/{theme.ERROR}]" 

232 ) 

233 console.print(f" {name}: {tag}") 

234 if not ok: 

235 console.print(f" {results.get(f'{name}_error', '')}") 

236 

237 if failed: 

238 raise typer.Exit(1) 

239 

240 

241def token( 

242 data_dir: Path | None = data_dir_option, 

243 use_global: bool = global_option, 

244) -> None: 

245 """Print the auth token for a running server.""" 

246 from lilbee.server.auth import server_json_path 

247 

248 apply_overrides(data_dir=data_dir, use_global=use_global) 

249 path = server_json_path() 

250 if not path.exists(): 

251 if cfg.json_mode: 

252 json_output({"error": "No running server found"}) 

253 else: 

254 console.print("No running server found (server.json missing).") 

255 raise SystemExit(1) 

256 try: 

257 data = json.loads(path.read_text()) 

258 tok = data.get("token", "") 

259 except (json.JSONDecodeError, OSError) as exc: 

260 if cfg.json_mode: 

261 json_output({"error": f"Could not read server.json: {exc}"}) 

262 else: 

263 console.print( 

264 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}" 

265 ) 

266 raise SystemExit(1) from None 

267 if cfg.json_mode: 

268 json_output({"token": tok}) 

269 return 

270 console.print(tok) 

271 

272 

273def login() -> None: 

274 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.).""" 

275 import webbrowser 

276 

277 from huggingface_hub import get_token 

278 from huggingface_hub import login as hf_login 

279 

280 if get_token(): 

281 typer.echo("Already logged in to HuggingFace.") 

282 if not typer.confirm("Log in again?", default=False): 

283 return 

284 

285 typer.echo("Opening HuggingFace token page in your browser...") 

286 typer.echo("Create a token with 'Read' access, then paste it below.\n") 

287 webbrowser.open("https://huggingface.co/settings/tokens") 

288 

289 token = typer.prompt("Paste your HuggingFace token", hide_input=True) 

290 if not token.strip(): 

291 typer.echo("No token provided.", err=True) 

292 raise typer.Exit(1) 

293 

294 hf_login(token=token.strip(), add_to_git_credential=False) 

295 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.") 

296 

297 

298setup_app = typer.Typer(help="One-time setup for optional runtime components.") 

299 

300 

301@setup_app.command(name="crawler") 

302def setup_crawler_cmd() -> None: 

303 """Install Playwright's Chromium browser, needed for /crawl. 

304 

305 No-op when Chromium is already present. Emits a simple progress 

306 readout; use '--json' mode on the top-level 'lilbee' command to get 

307 a single JSON blob with the final install state instead. 

308 """ 

309 if chromium_installed(): 

310 if cfg.json_mode: 

311 typer.echo(json.dumps({"component": "chromium", "already_installed": True})) 

312 else: 

313 typer.echo("Chromium already installed.") 

314 return 

315 

316 last_pct: list[int] = [-1] 

317 

318 def _on_progress(event_type: object, data: object) -> None: 

319 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent): 

320 return 

321 total = data.total_bytes or 0 

322 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0 

323 if pct != last_pct[0] and not cfg.json_mode: 

324 last_pct[0] = pct 

325 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True) 

326 

327 try: 

328 asyncio.run(bootstrap_chromium(on_progress=_on_progress)) 

329 except CrawlerBrowserError as exc: 

330 if cfg.json_mode: 

331 typer.echo(json.dumps({"component": "chromium", "error": str(exc)})) 

332 else: 

333 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED) 

334 raise typer.Exit(code=1) from exc 

335 

336 if cfg.json_mode: 

337 typer.echo(json.dumps({"component": "chromium", "installed": True})) 

338 else: 

339 typer.echo("Chromium installed.")