Coverage for src/lilbee/runtime/hardware.py: 100%

1"""Hardware-fit signaling and per-row size-variant grouping for the catalog."""

3from __future__ import annotations

5from dataclasses import dataclass

6from enum import StrEnum

8from pydantic import BaseModel

10from lilbee.catalog.models import ModelFamily

11from lilbee.core.config import cfg

13_BYTES_PER_GB = 1024**3

14_FITS_HEADROOM_BYTES = 1 * _BYTES_PER_GB

17class FitLevel(StrEnum):

18 FITS = "fits"

19 TIGHT = "tight"

20 WONT_RUN = "wont_run"

23@dataclass(frozen=True)

24class FitChip:

25 level: FitLevel

26 headroom_gb: float

29def compute_fit(model_size_bytes: int, available_bytes: int) -> FitChip:

30 """Classify how a model footprint fits the available memory budget.

32 Headroom_gb is positive when the model fits and negative when it

33 won't. The 1 GB band between FITS and TIGHT leaves room for the

34 inference runtime, KV cache, and OS overhead beyond the raw weight

35 file.

36 """

37 headroom_bytes = available_bytes - model_size_bytes

38 headroom_gb = headroom_bytes / _BYTES_PER_GB

39 if headroom_bytes >= _FITS_HEADROOM_BYTES:

40 level = FitLevel.FITS

41 elif headroom_bytes >= 0:

42 level = FitLevel.TIGHT

43 else:

44 level = FitLevel.WONT_RUN

45 return FitChip(level=level, headroom_gb=headroom_gb)

48def available_memory_for_fit() -> int | None:

49 """Bytes available to a model after ``cfg.gpu_memory_fraction``, or None on probe failure.

51 Single entry point so the TUI and the HTTP catalog handler classify fit

52 against the same number; otherwise the same model would chip differently in

53 each surface.

54 """

55 try:

56 from lilbee.providers.model_cache import get_available_memory

58 return get_available_memory(cfg.gpu_memory_fraction)

59 except Exception:

60 return None

63class SizeVariantInfo(BaseModel):

64 """One size/quant of a model family, serialised for HTTP responses."""

66 size_label: str

67 params: str

68 size_gb: float

69 ref: str

72def family_size_variants(family: ModelFamily) -> list[SizeVariantInfo]:

73 """Build the per-row size-variant strip for a featured ModelFamily, smallest first."""

74 variants = sorted(family.variants, key=lambda v: v.size_mb)

75 return [

76 SizeVariantInfo(

77 size_label=_size_variant_label(v.param_count, v.quant),

78 params=v.param_count,

79 size_gb=v.size_mb / 1024,

80 ref=v.hf_repo,

81 )

82 for v in variants

83 ]

86def _size_variant_label(param_count: str, quant: str) -> str:

87 """Render the compact label for one size variant (``8B Q4_K_M``)."""

88 pieces = [p for p in (param_count, quant) if p]

89 return " ".join(pieces) if pieces else "--"

Coverage for src / lilbee / runtime / hardware.py: 100%

42 statements