Coverage for src / lilbee / catalog / hf_client.py: 100%
115 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""HuggingFace API client with TTL cache."""
3from __future__ import annotations
5import logging
6import os
7import threading
8import time
9from http import HTTPStatus
11import httpx
12from huggingface_hub import ModelInfo
13from huggingface_hub.hf_api import RepoSibling
15from lilbee.catalog.models import CatalogModel, HfGgufMeta, HfPage
17log = logging.getLogger(__name__)
19# Substrings dropped from huggingface_hub's request / file-download loggers.
20# These advisories aren't actionable in a local TUI: HF prints an
21# unauthenticated-requests notice on every public pull, and the file_download
22# logger re-warns on every retry the library schedules. The catalog surfaces
23# the final download failure with a clear message, so per-attempt warnings
24# are noise.
25_HF_SUPPRESS_SUBSTRINGS = (
26 "unauthenticated requests to the HF Hub",
27 "Error while downloading from",
28 "Trying to resume download",
29)
31_HF_FILTERED_LOGGER_NAMES = (
32 "huggingface_hub.utils._http",
33 "huggingface_hub.file_download",
34)
37class _HfSubstringFilter(logging.Filter):
38 """Drop huggingface_hub log records whose message contains a suppressed substring."""
40 def __init__(self, needles: tuple[str, ...]) -> None:
41 super().__init__()
42 self._needles = needles
44 def filter(self, record: logging.LogRecord) -> bool:
45 return not any(n in record.getMessage() for n in self._needles)
48def install_hf_log_filter() -> None:
49 """Attach the substring filter to huggingface_hub's chatty loggers.
51 Called automatically when this module is imported (see the module-top
52 invocation below) so the filter is in place before any catalog HTTP
53 call can emit a warning. Exposed as a function so tests can re-apply.
54 """
55 hf_filter = _HfSubstringFilter(_HF_SUPPRESS_SUBSTRINGS)
56 for name in _HF_FILTERED_LOGGER_NAMES:
57 logging.getLogger(name).addFilter(hf_filter)
60# Install the filter at module import. All HF HTTP traffic in lilbee
61# routes through this module, so installing here always beats the first
62# huggingface_hub warning to the punch.
63install_hf_log_filter()
65HF_API_URL = "https://huggingface.co/api/models"
67DEFAULT_TIMEOUT = 30.0
69# Fields requested from the HF listing API via ``?expand=``. Without this
70# expand, the default response omits siblings, cardData, and gguf.
71_HF_EXPAND_FIELDS: list[str] = ["gguf", "siblings", "downloads", "pipeline_tag", "cardData"]
73# HF ``?search=`` is a single space-tokenized substring match on the model id.
74# Multiple ``search=`` params are silently ignored, so the user's query is
75# space-joined onto the GGUF filter into one param value.
76_HF_GGUF_SEARCH_TERM = "GGUF"
78_EMPTY_HF_PAGE = HfPage(models=[], has_more=False)
80_BYTES_PER_GB = 1024**3
83def hf_token() -> str | None:
84 """Read HuggingFace token from env vars or huggingface_hub login cache."""
85 token = os.environ.get("LILBEE_HF_TOKEN") or os.environ.get("HF_TOKEN") or None
86 if token:
87 return token
88 try:
89 from huggingface_hub import get_token
91 return get_token()
92 except Exception:
93 return None
96def hf_headers() -> dict[str, str]:
97 """Build HTTP headers for HuggingFace API requests."""
98 token = hf_token()
99 if token:
100 return {"Authorization": f"Bearer {token}"}
101 return {}
104def _hf_search_value(search: str) -> str:
105 """Build the HF ``search=`` value: GGUF plus the user's tokens, space-joined."""
106 tokens = [_HF_GGUF_SEARCH_TERM, *search.split()]
107 return " ".join(tokens)
110def _has_gguf_siblings(siblings: list[RepoSibling]) -> bool:
111 """Return True if the sibling list contains at least one .gguf file."""
112 return any(s.rfilename.endswith(".gguf") for s in siblings)
115def _estimate_size_from_siblings(siblings: list[RepoSibling]) -> float:
116 """Estimate model size in GB from the largest GGUF file in siblings."""
117 max_bytes = 0
118 for sib in siblings:
119 if sib.rfilename.endswith(".gguf"):
120 max_bytes = max(max_bytes, sib.size or 0)
121 if max_bytes > 0:
122 return round(max_bytes / _BYTES_PER_GB, 1)
123 return 0.0 # unknown: display as "?" in UI
126class HfClient:
127 """HuggingFace catalog API client with a per-instance TTL cache.
129 Holds the per-process cache of catalog pages keyed by query
130 parameters. The cache TTL and capacity are class-level so tests can
131 override them via subclassing if needed; the cache state itself is
132 per-instance so ``reset_services()`` discards a stale instance
133 along with its cache.
134 """
136 CACHE_TTL: float = 300.0
137 CACHE_MAX_ENTRIES: int = 50
138 # Rate-limit the "Failed to fetch models" warning so an offline user
139 # doesn't see one line per UI tick. First failure surfaces immediately;
140 # repeats within the window stay at DEBUG.
141 FETCH_FAILURE_WARN_INTERVAL_S: float = 300.0
143 def __init__(self) -> None:
144 self._cache: dict[str, tuple[float, HfPage]] = {}
145 self._cache_lock = threading.Lock()
146 # -inf, not 0.0: on a freshly booted machine ``time.monotonic()`` can be
147 # smaller than the window, which would push the first failure to DEBUG.
148 self._last_fetch_failure_warn: float = float("-inf")
150 def fetch_models(
151 self,
152 pipeline_tag: str = "text-generation",
153 sort: str = "downloads",
154 limit: int = 50,
155 offset: int = 0,
156 library: str | None = None,
157 search: str = "",
158 ) -> HfPage:
159 """Fetch GGUF models from HuggingFace API with TTL cache.
161 Returns an ``HfPage`` with a ``has_more`` flag derived from the
162 ``Link: <...>; rel="next"`` response header (RFC 5988), the same
163 mechanism the ``huggingface_hub`` library uses internally.
164 """
165 # Local import to avoid a cycle: query imports hf_client (this
166 # module), and hf_client uses pipeline_to_task from query.
167 from lilbee.catalog.query import pipeline_to_task
169 search_value = _hf_search_value(search)
170 cache_key = f"{pipeline_tag}:{sort}:{limit}:{offset}:{library}:{search_value}"
171 now = time.monotonic()
172 with self._cache_lock:
173 expired = [k for k, (ts, _) in self._cache.items() if now - ts >= self.CACHE_TTL]
174 for k in expired:
175 del self._cache[k]
177 cached = self._cache.get(cache_key)
178 if cached and now - cached[0] < self.CACHE_TTL:
179 return cached[1]
181 params = httpx.QueryParams(
182 pipeline_tag=pipeline_tag,
183 search=search_value,
184 sort=sort,
185 limit=limit,
186 skip=offset,
187 expand=_HF_EXPAND_FIELDS,
188 )
189 if library:
190 params = params.add("library", library)
191 try:
192 resp = httpx.get(
193 HF_API_URL, params=params, timeout=DEFAULT_TIMEOUT, headers=hf_headers()
194 )
195 if resp.status_code >= HTTPStatus.BAD_REQUEST:
196 log.warning("HuggingFace API returned HTTP %d", resp.status_code)
197 return _EMPTY_HF_PAGE
198 data = resp.json()
199 except (httpx.HTTPError, ValueError) as exc:
200 self._log_fetch_failure(exc)
201 return _EMPTY_HF_PAGE
203 has_more = "next" in resp.links
205 models: list[CatalogModel] = []
206 for raw in data:
207 if not raw.get("id"):
208 continue
209 item = ModelInfo(**raw)
210 card_desc = item.card_data.get("description", "") if item.card_data else ""
211 gguf_meta = HfGgufMeta(**(item.gguf or {}))
212 if gguf_meta.total > 0:
213 size_gb = round(gguf_meta.total / _BYTES_PER_GB, 1)
214 else:
215 size_gb = _estimate_size_from_siblings(item.siblings or [])
216 task = pipeline_to_task(item.pipeline_tag or "")
217 models.append(
218 CatalogModel(
219 hf_repo=item.id,
220 gguf_filename="*.gguf",
221 size_gb=size_gb,
222 min_ram_gb=round(max(2.0, size_gb * 1.5), 1),
223 description=card_desc[:120] if card_desc else "",
224 featured=False,
225 downloads=item.downloads or 0,
226 task=task,
227 )
228 )
229 page = HfPage(models=models, has_more=has_more)
230 with self._cache_lock:
231 self._cache[cache_key] = (now, page)
232 if len(self._cache) > self.CACHE_MAX_ENTRIES:
233 oldest_key = min(self._cache, key=lambda k: self._cache[k][0])
234 del self._cache[oldest_key]
235 return page
237 def _log_fetch_failure(self, exc: Exception) -> None:
238 """Log an HF fetch failure, rate-limited so offline use doesn't spam.
240 First failure of each ``FETCH_FAILURE_WARN_INTERVAL_S`` window logs
241 at WARNING; repeats within the window log at DEBUG. The interval
242 starts from the last WARNING so a flapping network produces one
243 line every five minutes, not one per UI tick.
244 """
245 now = time.monotonic()
246 if now - self._last_fetch_failure_warn >= self.FETCH_FAILURE_WARN_INTERVAL_S:
247 log.warning("Failed to fetch models from HuggingFace: %s", exc)
248 self._last_fetch_failure_warn = now
249 else:
250 log.debug("Suppressed repeat HF fetch failure: %s", exc)