Coverage for src / lilbee / cli / commands / setup.py: 100%
174 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Token (server auth), HuggingFace login, self-check, and crawler-setup commands."""
3from __future__ import annotations
5import asyncio
6import importlib
7import json
8from pathlib import Path
9from typing import Any
11import typer
13from lilbee.cli import theme
14from lilbee.cli.app import (
15 apply_overrides,
16 console,
17 data_dir_option,
18 global_option,
19)
20from lilbee.cli.helpers import json_output
21from lilbee.cli.tui import messages as msg
22from lilbee.core.config import cfg
23from lilbee.crawler import CrawlerBrowserError, bootstrap_chromium, chromium_installed
24from lilbee.runtime.progress import EventType, SetupProgressEvent
26_SELF_CHECK_CHAT_REPO = "Qwen/Qwen3-0.6B-GGUF"
27_SELF_CHECK_CHAT_FILE = "Qwen3-0.6B-Q8_0.gguf"
28_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF"
29_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf"
32def _download_self_check_model(repo: str, filename: str) -> Path:
33 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only).
35 Avoids huggingface_hub / httpx entirely. Inside the Nuitka --onefile
36 binary, huggingface_hub's retry path has re-entered a closed httpx client
37 after transient DNS failures on macOS runners. urllib is synchronous,
38 lives in the stdlib, and has no long-lived client to close.
39 """
40 import tempfile
41 import urllib.request
43 url = f"https://huggingface.co/{repo}/resolve/main/{filename}"
44 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-"))
45 dest = dest_dir / filename
46 console.print(f"Downloading {url}")
47 last_exc: BaseException | None = None
48 for attempt in range(3):
49 try:
50 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 literal https url
51 dest.write_bytes(response.read())
52 return dest
53 except (OSError, urllib.error.URLError) as exc:
54 last_exc = exc
55 console.print(f"download attempt {attempt + 1} failed: {exc!r}")
56 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}")
59_self_check_chat_path_option = typer.Option(
60 None,
61 "--chat-model-path",
62 help="Path to a chat GGUF file. Skips the HuggingFace download.",
63)
64_self_check_embed_path_option = typer.Option(
65 None,
66 "--embed-model-path",
67 help="Path to an embedding GGUF file. Skips the HuggingFace download.",
68)
69_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.")
70_self_check_skip_embedding_option = typer.Option(
71 False,
72 "--skip-embedding",
73 help="Skip the embedding-model leg of the self-check.",
74)
77def _self_check_emit_failure(error: str) -> None:
78 if cfg.json_mode:
79 json_output({"ok": False, "error": error})
80 else:
81 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}")
84def _resolved_provider_kwargs() -> dict[str, Any]:
85 """Snapshot of the provider-stack knobs self-check exercises.
87 Echoed back in the JSON payload + human readout so users can confirm
88 which dynamic ctx / FA / KV cache / GPU layers values their install
89 chose without grepping debug logs.
90 """
91 return {
92 "num_ctx": cfg.num_ctx,
93 "num_ctx_max": cfg.num_ctx_max,
94 "chat_n_ctx_target": cfg.chat_n_ctx_target,
95 "flash_attention": cfg.flash_attention,
96 "kv_cache_type": cfg.kv_cache_type.value,
97 "n_gpu_layers": cfg.n_gpu_layers,
98 "main_gpu": cfg.main_gpu,
99 "gpu_devices": cfg.gpu_devices,
100 }
103def self_check_cmd(
104 chat_model_path: Path | None = _self_check_chat_path_option,
105 embed_model_path: Path | None = _self_check_embed_path_option,
106 max_tokens: int = _self_check_max_tokens_option,
107 skip_embedding: bool = _self_check_skip_embedding_option,
108) -> None:
109 """Verify the installation can load llama.cpp and run real inference.
111 Routes both legs through :func:`lilbee.providers.llama_cpp.provider.load_llama`
112 so the dynamic-``n_ctx`` picker, flash-attention default, KV cache type,
113 ``n_gpu_layers`` resolution, and OOM retry path all run -- i.e. the same
114 provider stack a real ``lilbee ask`` / ``lilbee chat`` exercises. Failure
115 here means either the vendored shared libraries don't load or one of the
116 cfg-driven provider knobs is misconfigured for the host.
118 Two legs:
120 1. **Chat**: downloads ``Qwen3-0.6B-Q8_0.gguf`` (~500MB),
121 runs ``load_llama(..., mode=LoaderMode.CHAT)`` so the dynamic-ctx picker /
122 flash-attention default / KV cache mapping fire, then issues a tiny
123 ``create_completion``.
124 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB),
125 runs ``load_llama(..., mode=LoaderMode.EMBED)`` so the embed-mode ctx clamp
126 fires, then issues ``create_embedding``. Catches the "Memory is not
127 initialized" assert from llama-cpp-python <0.3.19, where BERT-style
128 encoders trip ``kv_cache_clear`` on a context that never allocated
129 memory.
131 Exits 0 on success, 1 on any failure. Intended for post-install
132 verification and as the end-to-end gate in release CI.
133 """
134 from typing import cast
136 from lilbee.providers.llama_cpp.provider import load_llama
137 from lilbee.providers.model_cache import LoaderMode
139 try:
140 chat_path = chat_model_path or _download_self_check_model(
141 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE
142 )
143 console.print(f"Loading chat model {chat_path}")
145 llm = load_llama(chat_path, mode=LoaderMode.CHAT)
146 # stream=False (default) returns a dict, not an iterator, but
147 # create_completion's return type is a union; cast to Any so the
148 # indexing below type-checks without forcing llama_cpp to be a
149 # typecheck-time dep of lilbee.
150 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens))
151 text: str = out["choices"][0]["text"]
152 except Exception as exc:
153 _self_check_emit_failure(repr(exc))
154 raise typer.Exit(1) from exc
156 if not text.strip():
157 _self_check_emit_failure("empty inference response")
158 raise typer.Exit(1)
160 embedding_dims: int | None = None
161 if not skip_embedding:
162 try:
163 embed_path = embed_model_path or _download_self_check_model(
164 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE
165 )
166 console.print(f"Loading embedding model {embed_path}")
167 enc = load_llama(embed_path, mode=LoaderMode.EMBED)
168 emb = cast(Any, enc.create_embedding(input=["test"]))
169 vec = emb["data"][0]["embedding"]
170 except Exception as exc:
171 _self_check_emit_failure(repr(exc))
172 raise typer.Exit(1) from exc
174 if not vec:
175 _self_check_emit_failure("empty embedding vector")
176 raise typer.Exit(1)
177 embedding_dims = len(vec)
179 provider_kwargs = _resolved_provider_kwargs()
180 if cfg.json_mode:
181 payload: dict[str, Any] = {
182 "ok": True,
183 "chat_response": text,
184 "chat_model": str(chat_path),
185 "provider": provider_kwargs,
186 }
187 if embedding_dims is not None:
188 payload["embedding_dims"] = embedding_dims
189 json_output(payload)
190 else:
191 console.print(f"Chat response: {text!r}")
192 if embedding_dims is not None:
193 console.print(f"Embedding dims: {embedding_dims}")
194 console.print(
195 f"Provider: num_ctx={provider_kwargs['num_ctx']} "
196 f"num_ctx_max={provider_kwargs['num_ctx_max']} "
197 f"chat_n_ctx_target={provider_kwargs['chat_n_ctx_target']} "
198 f"flash_attention={provider_kwargs['flash_attention']} "
199 f"kv_cache_type={provider_kwargs['kv_cache_type']} "
200 f"n_gpu_layers={provider_kwargs['n_gpu_layers']} "
201 f"main_gpu={provider_kwargs['main_gpu']} "
202 f"gpu_devices={provider_kwargs['gpu_devices']}"
203 )
204 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]")
207_SELF_CHECK_EXTRAS = ("litellm", "crawl4ai", "spacy", "graspologic_native")
210def self_check_extras_cmd() -> None:
211 """Verify optional extras (crawler, litellm, graph) are bundled and importable."""
212 results: dict[str, Any] = {}
213 failed: list[str] = []
214 for name in _SELF_CHECK_EXTRAS:
215 try:
216 importlib.import_module(name)
217 results[name] = True
218 except ImportError as exc:
219 results[name] = False
220 results[f"{name}_error"] = str(exc)
221 failed.append(name)
223 if cfg.json_mode:
224 json_output({"ok": not failed, **results})
225 else:
226 for name in _SELF_CHECK_EXTRAS:
227 ok = results.get(name) is True
228 tag = (
229 f"[{theme.ACCENT}]ok[/{theme.ACCENT}]"
230 if ok
231 else f"[{theme.ERROR}]MISSING[/{theme.ERROR}]"
232 )
233 console.print(f" {name}: {tag}")
234 if not ok:
235 console.print(f" {results.get(f'{name}_error', '')}")
237 if failed:
238 raise typer.Exit(1)
241def token(
242 data_dir: Path | None = data_dir_option,
243 use_global: bool = global_option,
244) -> None:
245 """Print the auth token for a running server."""
246 from lilbee.server.auth import server_json_path
248 apply_overrides(data_dir=data_dir, use_global=use_global)
249 path = server_json_path()
250 if not path.exists():
251 if cfg.json_mode:
252 json_output({"error": "No running server found"})
253 else:
254 console.print("No running server found (server.json missing).")
255 raise SystemExit(1)
256 try:
257 data = json.loads(path.read_text())
258 tok = data.get("token", "")
259 except (json.JSONDecodeError, OSError) as exc:
260 if cfg.json_mode:
261 json_output({"error": f"Could not read server.json: {exc}"})
262 else:
263 console.print(
264 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}"
265 )
266 raise SystemExit(1) from None
267 if cfg.json_mode:
268 json_output({"token": tok})
269 return
270 console.print(tok)
273def login() -> None:
274 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.)."""
275 import webbrowser
277 from huggingface_hub import get_token
278 from huggingface_hub import login as hf_login
280 if get_token():
281 typer.echo("Already logged in to HuggingFace.")
282 if not typer.confirm("Log in again?", default=False):
283 return
285 typer.echo("Opening HuggingFace token page in your browser...")
286 typer.echo("Create a token with 'Read' access, then paste it below.\n")
287 webbrowser.open("https://huggingface.co/settings/tokens")
289 token = typer.prompt("Paste your HuggingFace token", hide_input=True)
290 if not token.strip():
291 typer.echo("No token provided.", err=True)
292 raise typer.Exit(1)
294 hf_login(token=token.strip(), add_to_git_credential=False)
295 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.")
298setup_app = typer.Typer(help="One-time setup for optional runtime components.")
301@setup_app.command(name="crawler")
302def setup_crawler_cmd() -> None:
303 """Install Playwright's Chromium browser, needed for /crawl.
305 No-op when Chromium is already present. Emits a simple progress
306 readout; use '--json' mode on the top-level 'lilbee' command to get
307 a single JSON blob with the final install state instead.
308 """
309 if chromium_installed():
310 if cfg.json_mode:
311 typer.echo(json.dumps({"component": "chromium", "already_installed": True}))
312 else:
313 typer.echo("Chromium already installed.")
314 return
316 last_pct: list[int] = [-1]
318 def _on_progress(event_type: object, data: object) -> None:
319 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent):
320 return
321 total = data.total_bytes or 0
322 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0
323 if pct != last_pct[0] and not cfg.json_mode:
324 last_pct[0] = pct
325 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True)
327 try:
328 asyncio.run(bootstrap_chromium(on_progress=_on_progress))
329 except CrawlerBrowserError as exc:
330 if cfg.json_mode:
331 typer.echo(json.dumps({"component": "chromium", "error": str(exc)}))
332 else:
333 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED)
334 raise typer.Exit(code=1) from exc
336 if cfg.json_mode:
337 typer.echo(json.dumps({"component": "chromium", "installed": True}))
338 else:
339 typer.echo("Chromium installed.")