Coverage for src / lilbee / cli / commands / setup.py: 100%
174 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Token (server auth), HuggingFace login, self-check, and crawler-setup commands."""
3from __future__ import annotations
5import asyncio
6import importlib
7import json
8from pathlib import Path
9from typing import Any
11import typer
13from lilbee.cli import theme
14from lilbee.cli.app import (
15 apply_overrides,
16 console,
17 data_dir_option,
18 global_option,
19)
20from lilbee.cli.helpers import json_output
21from lilbee.cli.tui import messages as msg
22from lilbee.core.config import cfg
23from lilbee.crawler import CrawlerBrowserError, bootstrap_chromium, chromium_installed
24from lilbee.runtime.progress import EventType, SetupProgressEvent
26_SELF_CHECK_CHAT_REPO = "Qwen/Qwen3-0.6B-GGUF"
27_SELF_CHECK_CHAT_FILE = "Qwen3-0.6B-Q8_0.gguf"
28_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF"
29_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf"
32def _download_self_check_model(repo: str, filename: str) -> Path:
33 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only).
35 Avoids huggingface_hub / httpx entirely. Inside the Nuitka --onefile
36 binary, huggingface_hub's retry path has re-entered a closed httpx client
37 after transient DNS failures on macOS runners. urllib is synchronous,
38 lives in the stdlib, and has no long-lived client to close.
39 """
40 import tempfile
41 import urllib.request
43 url = f"https://huggingface.co/{repo}/resolve/main/{filename}"
44 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-"))
45 dest = dest_dir / filename
46 console.print(f"Downloading {url}")
47 last_exc: BaseException | None = None
48 for attempt in range(3):
49 try:
50 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 literal https url
51 dest.write_bytes(response.read())
52 return dest
53 except (OSError, urllib.error.URLError) as exc:
54 last_exc = exc
55 console.print(f"download attempt {attempt + 1} failed: {exc!r}")
56 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}")
59_self_check_chat_path_option = typer.Option(
60 None,
61 "--chat-model-path",
62 help="Path to a chat GGUF file. Skips the HuggingFace download.",
63)
64_self_check_embed_path_option = typer.Option(
65 None,
66 "--embed-model-path",
67 help="Path to an embedding GGUF file. Skips the HuggingFace download.",
68)
69_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.")
70_self_check_skip_embedding_option = typer.Option(
71 False,
72 "--skip-embedding",
73 help="Skip the embedding-model leg of the self-check.",
74)
77def _self_check_emit_failure(error: str) -> None:
78 if cfg.json_mode:
79 json_output({"ok": False, "error": error})
80 else:
81 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}")
84def _resolved_provider_kwargs() -> dict[str, Any]:
85 """Snapshot of the provider-stack knobs self-check exercises.
87 Echoed back in the JSON payload + human readout so users can confirm
88 which dynamic ctx / FA / KV cache / GPU layers values their install
89 chose without grepping debug logs.
90 """
91 return {
92 "num_ctx": cfg.num_ctx,
93 "num_ctx_max": cfg.num_ctx_max,
94 "flash_attention": cfg.flash_attention,
95 "kv_cache_type": cfg.kv_cache_type.value,
96 "n_gpu_layers": cfg.n_gpu_layers,
97 "main_gpu": cfg.main_gpu,
98 "gpu_devices": cfg.gpu_devices,
99 }
102def self_check_cmd(
103 chat_model_path: Path | None = _self_check_chat_path_option,
104 embed_model_path: Path | None = _self_check_embed_path_option,
105 max_tokens: int = _self_check_max_tokens_option,
106 skip_embedding: bool = _self_check_skip_embedding_option,
107) -> None:
108 """Verify the installation can load llama.cpp and run real inference.
110 Routes both legs through :func:`lilbee.providers.llama_cpp.provider.load_llama`
111 so the dynamic-``n_ctx`` picker, flash-attention default, KV cache type,
112 ``n_gpu_layers`` resolution, and OOM retry path all run -- i.e. the same
113 provider stack a real ``lilbee ask`` / ``lilbee chat`` exercises. Failure
114 here means either the vendored shared libraries don't load or one of the
115 cfg-driven provider knobs is misconfigured for the host.
117 Two legs:
119 1. **Chat**: downloads ``Qwen3-0.6B-Q8_0.gguf`` (~500MB),
120 runs ``load_llama(..., mode=LoaderMode.CHAT)`` so the dynamic-ctx picker /
121 flash-attention default / KV cache mapping fire, then issues a tiny
122 ``create_completion``.
123 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB),
124 runs ``load_llama(..., mode=LoaderMode.EMBED)`` so the embed-mode ctx clamp
125 fires, then issues ``create_embedding``. Catches the "Memory is not
126 initialized" assert from llama-cpp-python <0.3.19, where BERT-style
127 encoders trip ``kv_cache_clear`` on a context that never allocated
128 memory.
130 Exits 0 on success, 1 on any failure. Intended for post-install
131 verification and as the end-to-end gate in release CI.
132 """
133 from typing import cast
135 from lilbee.providers.llama_cpp.provider import load_llama
136 from lilbee.providers.model_cache import LoaderMode
138 try:
139 chat_path = chat_model_path or _download_self_check_model(
140 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE
141 )
142 console.print(f"Loading chat model {chat_path}")
144 llm = load_llama(chat_path, mode=LoaderMode.CHAT)
145 # stream=False (default) returns a dict, not an iterator, but
146 # create_completion's return type is a union; cast to Any so the
147 # indexing below type-checks without forcing llama_cpp to be a
148 # typecheck-time dep of lilbee.
149 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens))
150 text: str = out["choices"][0]["text"]
151 except Exception as exc:
152 _self_check_emit_failure(repr(exc))
153 raise typer.Exit(1) from exc
155 if not text.strip():
156 _self_check_emit_failure("empty inference response")
157 raise typer.Exit(1)
159 embedding_dims: int | None = None
160 if not skip_embedding:
161 try:
162 embed_path = embed_model_path or _download_self_check_model(
163 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE
164 )
165 console.print(f"Loading embedding model {embed_path}")
166 enc = load_llama(embed_path, mode=LoaderMode.EMBED)
167 emb = cast(Any, enc.create_embedding(input=["test"]))
168 vec = emb["data"][0]["embedding"]
169 except Exception as exc:
170 _self_check_emit_failure(repr(exc))
171 raise typer.Exit(1) from exc
173 if not vec:
174 _self_check_emit_failure("empty embedding vector")
175 raise typer.Exit(1)
176 embedding_dims = len(vec)
178 provider_kwargs = _resolved_provider_kwargs()
179 if cfg.json_mode:
180 payload: dict[str, Any] = {
181 "ok": True,
182 "chat_response": text,
183 "chat_model": str(chat_path),
184 "provider": provider_kwargs,
185 }
186 if embedding_dims is not None:
187 payload["embedding_dims"] = embedding_dims
188 json_output(payload)
189 else:
190 console.print(f"Chat response: {text!r}")
191 if embedding_dims is not None:
192 console.print(f"Embedding dims: {embedding_dims}")
193 console.print(
194 f"Provider: num_ctx={provider_kwargs['num_ctx']} "
195 f"num_ctx_max={provider_kwargs['num_ctx_max']} "
196 f"flash_attention={provider_kwargs['flash_attention']} "
197 f"kv_cache_type={provider_kwargs['kv_cache_type']} "
198 f"n_gpu_layers={provider_kwargs['n_gpu_layers']} "
199 f"main_gpu={provider_kwargs['main_gpu']} "
200 f"gpu_devices={provider_kwargs['gpu_devices']}"
201 )
202 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]")
205_SELF_CHECK_EXTRAS = ("litellm", "crawl4ai", "spacy", "graspologic_native")
208def self_check_extras_cmd() -> None:
209 """Verify optional extras (crawler, litellm, graph) are bundled and importable."""
210 results: dict[str, Any] = {}
211 failed: list[str] = []
212 for name in _SELF_CHECK_EXTRAS:
213 try:
214 importlib.import_module(name)
215 results[name] = True
216 except ImportError as exc:
217 results[name] = False
218 results[f"{name}_error"] = str(exc)
219 failed.append(name)
221 if cfg.json_mode:
222 json_output({"ok": not failed, **results})
223 else:
224 for name in _SELF_CHECK_EXTRAS:
225 ok = results.get(name) is True
226 tag = (
227 f"[{theme.ACCENT}]ok[/{theme.ACCENT}]"
228 if ok
229 else f"[{theme.ERROR}]MISSING[/{theme.ERROR}]"
230 )
231 console.print(f" {name}: {tag}")
232 if not ok:
233 console.print(f" {results.get(f'{name}_error', '')}")
235 if failed:
236 raise typer.Exit(1)
239def token(
240 data_dir: Path | None = data_dir_option,
241 use_global: bool = global_option,
242) -> None:
243 """Print the auth token for a running server."""
244 from lilbee.server.auth import server_json_path
246 apply_overrides(data_dir=data_dir, use_global=use_global)
247 path = server_json_path()
248 if not path.exists():
249 if cfg.json_mode:
250 json_output({"error": "No running server found"})
251 else:
252 console.print("No running server found (server.json missing).")
253 raise SystemExit(1)
254 try:
255 data = json.loads(path.read_text())
256 tok = data.get("token", "")
257 except (json.JSONDecodeError, OSError) as exc:
258 if cfg.json_mode:
259 json_output({"error": f"Could not read server.json: {exc}"})
260 else:
261 console.print(
262 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}"
263 )
264 raise SystemExit(1) from None
265 if cfg.json_mode:
266 json_output({"token": tok})
267 return
268 console.print(tok)
271def login() -> None:
272 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.)."""
273 import webbrowser
275 from huggingface_hub import get_token
276 from huggingface_hub import login as hf_login
278 if get_token():
279 typer.echo("Already logged in to HuggingFace.")
280 if not typer.confirm("Log in again?", default=False):
281 return
283 typer.echo("Opening HuggingFace token page in your browser...")
284 typer.echo("Create a token with 'Read' access, then paste it below.\n")
285 webbrowser.open("https://huggingface.co/settings/tokens")
287 token = typer.prompt("Paste your HuggingFace token", hide_input=True)
288 if not token.strip():
289 typer.echo("No token provided.", err=True)
290 raise typer.Exit(1)
292 hf_login(token=token.strip(), add_to_git_credential=False)
293 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.")
296setup_app = typer.Typer(help="One-time setup for optional runtime components.")
299@setup_app.command(name="crawler")
300def setup_crawler_cmd() -> None:
301 """Install Playwright's Chromium browser, needed for /crawl.
303 No-op when Chromium is already present. Emits a simple progress
304 readout; use '--json' mode on the top-level 'lilbee' command to get
305 a single JSON blob with the final install state instead.
306 """
307 if chromium_installed():
308 if cfg.json_mode:
309 typer.echo(json.dumps({"component": "chromium", "already_installed": True}))
310 else:
311 typer.echo("Chromium already installed.")
312 return
314 last_pct: list[int] = [-1]
316 def _on_progress(event_type: object, data: object) -> None:
317 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent):
318 return
319 total = data.total_bytes or 0
320 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0
321 if pct != last_pct[0] and not cfg.json_mode:
322 last_pct[0] = pct
323 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True)
325 try:
326 asyncio.run(bootstrap_chromium(on_progress=_on_progress))
327 except CrawlerBrowserError as exc:
328 if cfg.json_mode:
329 typer.echo(json.dumps({"component": "chromium", "error": str(exc)}))
330 else:
331 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED)
332 raise typer.Exit(code=1) from exc
334 if cfg.json_mode:
335 typer.echo(json.dumps({"component": "chromium", "installed": True}))
336 else:
337 typer.echo("Chromium installed.")