Coverage for src / lilbee / catalog / formatting.py: 100%
68 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Display-name, quantization, and enrichment helpers."""
3import re
4from dataclasses import dataclass
6from lilbee.catalog.models import CatalogModel, CatalogResult
7from lilbee.catalog.refs import hf_repo_from_ref
8from lilbee.catalog.types import ModelCompat, ModelSource, ModelTask
10PARAM_COUNT_RE = re.compile(r"(\d+\.?\d*B)", re.IGNORECASE)
12# One alternation strips every kind of trailing noise from a display name:
13# name suffixes (anywhere they precede ``-`` or end-of-string), trailing GGUF
14# quant tokens (``-Q4_K_M``, ``-F16`` ...), and trailing date stamps (``-2507``).
15_DISPLAY_NAME_NOISE = re.compile(
16 r"-(?:GGUF|Instruct|Chat|Embedding|Embed|qat)(?=-|$)"
17 r"|-(?:Q\d[A-Z0-9_]*|F16|F32)$"
18 r"|-\d{4}$",
19 re.IGNORECASE,
20)
21_DISPLAY_NAME_META_PREFIX = re.compile(r"^Meta-", re.IGNORECASE)
23# A native GGUF ref of the form ``<owner>/<repo>/<file>.gguf`` has at least
24# two ``/`` separators; one-slash refs are bare repo IDs.
25_NATIVE_GGUF_REF_MIN_SLASHES = 2
28def clean_display_name(repo_id: str) -> str:
29 """Derive a human-friendly display name from a HuggingFace repo ID.
31 Examples:
32 "Qwen/Qwen2.5-7B-Instruct-GGUF" -> "Qwen2.5 7B"
33 "meta-llama/Meta-Llama-3-8B" -> "Llama 3 8B"
34 "unsloth/embeddinggemma-300M-qat-GGUF" -> "embeddinggemma 300M"
35 "ggml-org/all-MiniLM-L6-v2-Embedding-Q8_0" -> "all MiniLM L6 v2"
36 """
37 name = repo_id.split("/")[-1]
38 while True:
39 stripped = _DISPLAY_NAME_NOISE.sub("", name)
40 if stripped == name:
41 break
42 name = stripped
43 name = _DISPLAY_NAME_META_PREFIX.sub("", name)
44 name = name.replace("-", " ").strip()
45 return re.sub(r"\s+", " ", name)
48def download_task_name(ref: str) -> str:
49 """Catalog display label for *ref*, matching ``CatalogModel.display_name``.
51 Strips a trailing ``.gguf`` filename from native GGUF refs and runs
52 :func:`clean_display_name` on the repo portion so the result is the
53 exact string a queued or active DOWNLOAD task carries in
54 ``Task.name``. Returns ``""`` for refs without an ``<owner>/<repo>``
55 shape (empty, provider-prefixed without a slash, bare strings).
56 """
57 if not ref or "/" not in ref:
58 return ""
59 repo = hf_repo_from_ref(ref)
60 if "/" not in repo:
61 return ""
62 return clean_display_name(repo)
65def display_label_for_ref(ref: str) -> str:
66 """Render any model ref as a short, human-friendly UI label.
68 - Native HF ref (``<repo>/<file>.gguf``): cleaned repo name.
69 - Provider-prefixed (``ollama/``, ``openai/`` ...): the part after the prefix.
70 - Anything else: returned unchanged.
71 """
72 if not ref:
73 return ""
74 if ref.endswith(".gguf") and ref.count("/") >= _NATIVE_GGUF_REF_MIN_SLASHES:
75 return clean_display_name(ref.rsplit("/", 1)[0])
76 if "/" in ref:
77 return ref.split("/", 1)[1]
78 return ref
81def extract_quant(filename: str) -> str:
82 """Extract the GGUF quantization label (e.g. ``Q4_K_M``) from a filename."""
83 m = re.search(r"(Q\d[A-Z0-9_]*)", filename, re.IGNORECASE)
84 return m.group(1).upper() if m else ""
87QUANT_TIERS: dict[str, str] = {
88 "Q2_K": "compact",
89 "Q3_K_S": "compact",
90 "Q3_K_M": "compact",
91 "Q3_K_L": "compact",
92 "Q4_K_S": "balanced",
93 "Q4_K_M": "balanced",
94 "Q4_0": "balanced",
95 "Q5_K_S": "high quality",
96 "Q5_K_M": "high quality",
97 "Q6_K": "high quality",
98 "Q8_0": "full precision",
99 "F16": "unquantized",
100 "F32": "unquantized",
101}
104def quant_tier(quant: str) -> str:
105 """Map a quantization label to a human-readable quality tier."""
106 if not quant:
107 return "--"
108 return QUANT_TIERS.get(quant, "--")
111def derive_param_count(model: CatalogModel) -> str:
112 """Parse the ``7B``-style param count from the display name; ``""`` if absent."""
113 match = PARAM_COUNT_RE.search(model.display_name)
114 return match.group(1) if match else ""
117@dataclass(frozen=True)
118class EnrichedModel:
119 """A catalog model enriched with display metadata and install status."""
121 hf_repo: str
122 gguf_filename: str
123 size_gb: float
124 min_ram_gb: float
125 description: str
126 featured: bool
127 downloads: int
128 task: ModelTask
129 display_name: str
130 param_count: str
131 quality_tier: str
132 installed: bool
133 source: ModelSource
134 architecture: str
135 compat: ModelCompat
138def enrich_catalog(result: CatalogResult, installed_refs: set[str]) -> list[EnrichedModel]:
139 """Enrich catalog models with display names, quality tiers, and install status.
141 *installed_refs* contains the ``hf_repo/filename`` refs returned by
142 ``model_manager.list_installed()``. A repo is considered installed
143 when at least one of its quants has a manifest.
144 """
145 installed_repos = {hf_repo_from_ref(ref) for ref in installed_refs}
146 enriched: list[EnrichedModel] = []
147 for m in result.models:
148 enriched.append(
149 EnrichedModel(
150 hf_repo=m.hf_repo,
151 gguf_filename=m.gguf_filename,
152 size_gb=m.size_gb,
153 min_ram_gb=m.min_ram_gb,
154 description=m.description,
155 featured=m.featured,
156 downloads=m.downloads,
157 task=m.task,
158 display_name=m.display_name,
159 param_count=derive_param_count(m),
160 quality_tier=quant_tier(extract_quant(m.gguf_filename)),
161 installed=m.hf_repo in installed_repos,
162 source=ModelSource.NATIVE,
163 architecture=m.architecture,
164 compat=m.compat,
165 )
166 )
167 return enriched