Coverage for src / lilbee / runtime / hardware.py: 100%

42 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Hardware-fit signaling and per-row size-variant grouping for the catalog.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass 

6from enum import StrEnum 

7 

8from pydantic import BaseModel 

9 

10from lilbee.catalog.models import ModelFamily 

11from lilbee.core.config import cfg 

12 

13_BYTES_PER_GB = 1024**3 

14_FITS_HEADROOM_BYTES = 1 * _BYTES_PER_GB 

15 

16 

17class FitLevel(StrEnum): 

18 FITS = "fits" 

19 TIGHT = "tight" 

20 WONT_RUN = "wont_run" 

21 

22 

23@dataclass(frozen=True) 

24class FitChip: 

25 level: FitLevel 

26 headroom_gb: float 

27 

28 

29def compute_fit(model_size_bytes: int, available_bytes: int) -> FitChip: 

30 """Classify how a model footprint fits the available memory budget. 

31 

32 Headroom_gb is positive when the model fits and negative when it 

33 won't. The 1 GB band between FITS and TIGHT leaves room for the 

34 inference runtime, KV cache, and OS overhead beyond the raw weight 

35 file. 

36 """ 

37 headroom_bytes = available_bytes - model_size_bytes 

38 headroom_gb = headroom_bytes / _BYTES_PER_GB 

39 if headroom_bytes >= _FITS_HEADROOM_BYTES: 

40 level = FitLevel.FITS 

41 elif headroom_bytes >= 0: 

42 level = FitLevel.TIGHT 

43 else: 

44 level = FitLevel.WONT_RUN 

45 return FitChip(level=level, headroom_gb=headroom_gb) 

46 

47 

48def available_memory_for_fit() -> int | None: 

49 """Bytes available to a model after ``cfg.gpu_memory_fraction``, or None on probe failure. 

50 

51 Single entry point so the TUI and the HTTP catalog handler classify fit 

52 against the same number; otherwise the same model would chip differently in 

53 each surface. 

54 """ 

55 try: 

56 from lilbee.providers.model_cache import get_available_memory 

57 

58 return get_available_memory(cfg.gpu_memory_fraction) 

59 except Exception: 

60 return None 

61 

62 

63class SizeVariantInfo(BaseModel): 

64 """One size/quant of a model family, serialised for HTTP responses.""" 

65 

66 size_label: str 

67 params: str 

68 size_gb: float 

69 ref: str 

70 

71 

72def family_size_variants(family: ModelFamily) -> list[SizeVariantInfo]: 

73 """Build the per-row size-variant strip for a featured ModelFamily, smallest first.""" 

74 variants = sorted(family.variants, key=lambda v: v.size_mb) 

75 return [ 

76 SizeVariantInfo( 

77 size_label=_size_variant_label(v.param_count, v.quant), 

78 params=v.param_count, 

79 size_gb=v.size_mb / 1024, 

80 ref=v.hf_repo, 

81 ) 

82 for v in variants 

83 ] 

84 

85 

86def _size_variant_label(param_count: str, quant: str) -> str: 

87 """Render the compact label for one size variant (``8B Q4_K_M``).""" 

88 pieces = [p for p in (param_count, quant) if p] 

89 return " ".join(pieces) if pieces else "--"