Coverage for src / lilbee / catalog / families.py: 100%

32 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Group featured models into display families.""" 

2 

3import re 

4 

5from lilbee.catalog.featured import ( 

6 FEATURED_CHAT, 

7 FEATURED_EMBEDDING, 

8 FEATURED_RERANK, 

9 FEATURED_VISION, 

10) 

11from lilbee.catalog.formatting import clean_display_name, extract_quant 

12from lilbee.catalog.models import CatalogModel, ModelFamily, ModelVariant 

13from lilbee.catalog.types import ModelTask 

14 

15_FAMILY_NAME_RE = re.compile(r"^(.+?)\s+\d") 

16 

17 

18def _extract_family_name(model_name: str) -> str: 

19 """Extract the family name by stripping the trailing parameter count. 

20 Applies clean_display_name first to strip -GGUF, -Instruct, etc. 

21 

22 "Qwen3 8B" -> "Qwen3", "Qwen3-Coder 30B A3B" -> "Qwen3-Coder", 

23 "Nomic Embed Text v1.5" -> "Nomic Embed Text v1.5" (no trailing number pattern). 

24 """ 

25 cleaned = clean_display_name(model_name) 

26 m = _FAMILY_NAME_RE.match(cleaned) 

27 return m.group(1) if m else cleaned 

28 

29 

30def _catalog_to_variant(model: CatalogModel) -> ModelVariant: 

31 """Convert a CatalogModel to a ModelVariant.""" 

32 # Local import to avoid pulling formatting helpers into hf_client/featured. 

33 from lilbee.catalog.formatting import derive_param_count 

34 

35 return ModelVariant( 

36 hf_repo=model.hf_repo, 

37 filename=model.gguf_filename, 

38 param_count=derive_param_count(model), 

39 quant=extract_quant(model.gguf_filename), 

40 size_mb=int(model.size_gb * 1024), 

41 recommended=model.recommended, 

42 ) 

43 

44 

45def _family_slug(display_name: str) -> str: 

46 """Stable slug for a family, derived from its display name.""" 

47 return _extract_family_name(display_name).lower().replace(" ", "-") 

48 

49 

50def _build_families(models: tuple[CatalogModel, ...], task: ModelTask) -> list[ModelFamily]: 

51 """Group CatalogModels into families by display-derived family name.""" 

52 groups: dict[str, list[CatalogModel]] = {} 

53 order: list[str] = [] 

54 for m in models: 

55 family = _extract_family_name(m.display_name) 

56 if family not in groups: 

57 order.append(family) 

58 groups.setdefault(family, []).append(m) 

59 

60 families: list[ModelFamily] = [] 

61 for family_name in order: 

62 members = groups[family_name] 

63 representative = next((m for m in members if m.recommended), members[0]) 

64 variants = [_catalog_to_variant(m) for m in members] 

65 families.append( 

66 ModelFamily( 

67 slug=_family_slug(representative.display_name), 

68 name=family_name, 

69 task=task, 

70 description=representative.description, 

71 variants=tuple(variants), 

72 ) 

73 ) 

74 return families 

75 

76 

77def get_families() -> list[ModelFamily]: 

78 """Get all featured models grouped into families. 

79 Returns families ordered: chat, then embedding, then vision, then reranker. 

80 Within each family, variants are ordered smallest to largest, with 

81 the largest marked as recommended (for multi-variant families). 

82 """ 

83 return ( 

84 _build_families(FEATURED_CHAT, ModelTask.CHAT) 

85 + _build_families(FEATURED_EMBEDDING, ModelTask.EMBEDDING) 

86 + _build_families(FEATURED_VISION, ModelTask.VISION) 

87 + _build_families(FEATURED_RERANK, ModelTask.RERANK) 

88 )