Coverage for src / lilbee / runtime / hardware.py: 100%
42 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Hardware-fit signaling and per-row size-variant grouping for the catalog."""
3from __future__ import annotations
5from dataclasses import dataclass
6from enum import StrEnum
8from pydantic import BaseModel
10from lilbee.catalog.models import ModelFamily
11from lilbee.core.config import cfg
13_BYTES_PER_GB = 1024**3
14_FITS_HEADROOM_BYTES = 1 * _BYTES_PER_GB
17class FitLevel(StrEnum):
18 FITS = "fits"
19 TIGHT = "tight"
20 WONT_RUN = "wont_run"
23@dataclass(frozen=True)
24class FitChip:
25 level: FitLevel
26 headroom_gb: float
29def compute_fit(model_size_bytes: int, available_bytes: int) -> FitChip:
30 """Classify how a model footprint fits the available memory budget.
32 Headroom_gb is positive when the model fits and negative when it
33 won't. The 1 GB band between FITS and TIGHT leaves room for the
34 inference runtime, KV cache, and OS overhead beyond the raw weight
35 file.
36 """
37 headroom_bytes = available_bytes - model_size_bytes
38 headroom_gb = headroom_bytes / _BYTES_PER_GB
39 if headroom_bytes >= _FITS_HEADROOM_BYTES:
40 level = FitLevel.FITS
41 elif headroom_bytes >= 0:
42 level = FitLevel.TIGHT
43 else:
44 level = FitLevel.WONT_RUN
45 return FitChip(level=level, headroom_gb=headroom_gb)
48def available_memory_for_fit() -> int | None:
49 """Bytes available to a model after ``cfg.gpu_memory_fraction``, or None on probe failure.
51 Single entry point so the TUI and the HTTP catalog handler classify fit
52 against the same number; otherwise the same model would chip differently in
53 each surface.
54 """
55 try:
56 from lilbee.providers.model_cache import get_available_memory
58 return get_available_memory(cfg.gpu_memory_fraction)
59 except Exception:
60 return None
63class SizeVariantInfo(BaseModel):
64 """One size/quant of a model family, serialised for HTTP responses."""
66 size_label: str
67 params: str
68 size_gb: float
69 ref: str
72def family_size_variants(family: ModelFamily) -> list[SizeVariantInfo]:
73 """Build the per-row size-variant strip for a featured ModelFamily, smallest first."""
74 variants = sorted(family.variants, key=lambda v: v.size_mb)
75 return [
76 SizeVariantInfo(
77 size_label=_size_variant_label(v.param_count, v.quant),
78 params=v.param_count,
79 size_gb=v.size_mb / 1024,
80 ref=v.hf_repo,
81 )
82 for v in variants
83 ]
86def _size_variant_label(param_count: str, quant: str) -> str:
87 """Render the compact label for one size variant (``8B Q4_K_M``)."""
88 pieces = [p for p in (param_count, quant) if p]
89 return " ".join(pieces) if pieces else "--"