Coverage for src/lilbee/catalog/hf

1"""HuggingFace API client with TTL cache."""

3from __future__ import annotations

5import logging

6import os

7import threading

8import time

9from http import HTTPStatus

11import httpx

12from huggingface_hub import ModelInfo

13from huggingface_hub.hf_api import RepoSibling

15from lilbee.catalog.models import CatalogModel, HfGgufMeta, HfPage

17log = logging.getLogger(__name__)

19# Substrings dropped from huggingface_hub's request / file-download loggers.

20# These advisories aren't actionable in a local TUI: HF prints an

21# unauthenticated-requests notice on every public pull, and the file_download

22# logger re-warns on every retry the library schedules. The catalog surfaces

23# the final download failure with a clear message, so per-attempt warnings

24# are noise.

25_HF_SUPPRESS_SUBSTRINGS = (

26 "unauthenticated requests to the HF Hub",

27 "Error while downloading from",

28 "Trying to resume download",

29)

31_HF_FILTERED_LOGGER_NAMES = (

32 "huggingface_hub.utils._http",

33 "huggingface_hub.file_download",

34)

37class _HfSubstringFilter(logging.Filter):

38 """Drop huggingface_hub log records whose message contains a suppressed substring."""

40 def __init__(self, needles: tuple[str, ...]) -> None:

41 super().__init__()

42 self._needles = needles

44 def filter(self, record: logging.LogRecord) -> bool:

45 return not any(n in record.getMessage() for n in self._needles)

48def install_hf_log_filter() -> None:

49 """Attach the substring filter to huggingface_hub's chatty loggers.

51 Called automatically when this module is imported (see the module-top

52 invocation below) so the filter is in place before any catalog HTTP

53 call can emit a warning. Exposed as a function so tests can re-apply.

54 """

55 hf_filter = _HfSubstringFilter(_HF_SUPPRESS_SUBSTRINGS)

56 for name in _HF_FILTERED_LOGGER_NAMES:

57 logging.getLogger(name).addFilter(hf_filter)

60# Install the filter at module import. All HF HTTP traffic in lilbee

61# routes through this module, so installing here always beats the first

62# huggingface_hub warning to the punch.

63install_hf_log_filter()

65HF_API_URL = "https://huggingface.co/api/models"

67DEFAULT_TIMEOUT = 30.0

69# Fields requested from the HF listing API via ``?expand=``. Without this

70# expand, the default response omits siblings, cardData, and gguf.

71_HF_EXPAND_FIELDS: list[str] = ["gguf", "siblings", "downloads", "pipeline_tag", "cardData"]

73# HF ``?search=`` is a single space-tokenized substring match on the model id.

74# Multiple ``search=`` params are silently ignored, so the user's query is

75# space-joined onto the GGUF filter into one param value.

76_HF_GGUF_SEARCH_TERM = "GGUF"

78_EMPTY_HF_PAGE = HfPage(models=[], has_more=False)

80_BYTES_PER_GB = 1024**3

83def hf_token() -> str | None:

84 """Read HuggingFace token from env vars or huggingface_hub login cache."""

85 token = os.environ.get("LILBEE_HF_TOKEN") or os.environ.get("HF_TOKEN") or None

86 if token:

87 return token

88 try:

89 from huggingface_hub import get_token

91 return get_token()

92 except Exception:

93 return None

96def hf_headers() -> dict[str, str]:

97 """Build HTTP headers for HuggingFace API requests."""

98 token = hf_token()

99 if token:

100 return {"Authorization": f"Bearer {token}"}

101 return {}

102

103

104def _hf_search_value(search: str) -> str:

105 """Build the HF ``search=`` value: GGUF plus the user's tokens, space-joined."""

106 tokens = [_HF_GGUF_SEARCH_TERM, *search.split()]

107 return " ".join(tokens)

108

109

110def _has_gguf_siblings(siblings: list[RepoSibling]) -> bool:

111 """Return True if the sibling list contains at least one .gguf file."""

112 return any(s.rfilename.endswith(".gguf") for s in siblings)

113

114

115def _estimate_size_from_siblings(siblings: list[RepoSibling]) -> float:

116 """Estimate model size in GB from the largest GGUF file in siblings."""

117 max_bytes = 0

118 for sib in siblings:

119 if sib.rfilename.endswith(".gguf"):

120 max_bytes = max(max_bytes, sib.size or 0)

121 if max_bytes > 0:

122 return round(max_bytes / _BYTES_PER_GB, 1)

123 return 0.0 # unknown: display as "?" in UI

124

125

126class HfClient:

127 """HuggingFace catalog API client with a per-instance TTL cache.

128

129 Holds the per-process cache of catalog pages keyed by query

130 parameters. The cache TTL and capacity are class-level so tests can

131 override them via subclassing if needed; the cache state itself is

132 per-instance so ``reset_services()`` discards a stale instance

133 along with its cache.

134 """

135

136 CACHE_TTL: float = 300.0

137 CACHE_MAX_ENTRIES: int = 50

138 # Rate-limit the "Failed to fetch models" warning so an offline user

139 # doesn't see one line per UI tick. First failure surfaces immediately;

140 # repeats within the window stay at DEBUG.

141 FETCH_FAILURE_WARN_INTERVAL_S: float = 300.0

142

143 def __init__(self) -> None:

144 self._cache: dict[str, tuple[float, HfPage]] = {}

145 self._cache_lock = threading.Lock()

146 # -inf, not 0.0: on a freshly booted machine ``time.monotonic()`` can be

147 # smaller than the window, which would push the first failure to DEBUG.

148 self._last_fetch_failure_warn: float = float("-inf")

149

150 def fetch_models(

151 self,

152 pipeline_tag: str = "text-generation",

153 sort: str = "downloads",

154 limit: int = 50,

155 offset: int = 0,

156 library: str | None = None,

157 search: str = "",

158 ) -> HfPage:

159 """Fetch GGUF models from HuggingFace API with TTL cache.

160

161 Returns an ``HfPage`` with a ``has_more`` flag derived from the

162 ``Link: <...>; rel="next"`` response header (RFC 5988), the same

163 mechanism the ``huggingface_hub`` library uses internally.

164 """

165 # Local import to avoid a cycle: query imports hf_client (this

166 # module), and hf_client uses pipeline_to_task from query.

167 from lilbee.catalog.query import pipeline_to_task

168

169 search_value = _hf_search_value(search)

170 cache_key = f"{pipeline_tag}:{sort}:{limit}:{offset}:{library}:{search_value}"

171 now = time.monotonic()

172 with self._cache_lock:

173 expired = [k for k, (ts, _) in self._cache.items() if now - ts >= self.CACHE_TTL]

174 for k in expired:

175 del self._cache[k]

176

177 cached = self._cache.get(cache_key)

178 if cached and now - cached[0] < self.CACHE_TTL:

179 return cached[1]

180

181 params = httpx.QueryParams(

182 pipeline_tag=pipeline_tag,

183 search=search_value,

184 sort=sort,

185 limit=limit,

186 skip=offset,

187 expand=_HF_EXPAND_FIELDS,

188 )

189 if library:

190 params = params.add("library", library)

191 try:

192 resp = httpx.get(

193 HF_API_URL, params=params, timeout=DEFAULT_TIMEOUT, headers=hf_headers()

194 )

195 if resp.status_code >= HTTPStatus.BAD_REQUEST:

196 log.warning("HuggingFace API returned HTTP %d", resp.status_code)

197 return _EMPTY_HF_PAGE

198 data = resp.json()

199 except (httpx.HTTPError, ValueError) as exc:

200 self._log_fetch_failure(exc)

201 return _EMPTY_HF_PAGE

202

203 has_more = "next" in resp.links

204

205 models: list[CatalogModel] = []

206 for raw in data:

207 if not raw.get("id"):

208 continue

209 item = ModelInfo(**raw)

210 card_desc = item.card_data.get("description", "") if item.card_data else ""

211 gguf_meta = HfGgufMeta(**(item.gguf or {}))

212 if gguf_meta.total > 0:

213 size_gb = round(gguf_meta.total / _BYTES_PER_GB, 1)

214 else:

215 size_gb = _estimate_size_from_siblings(item.siblings or [])

216 task = pipeline_to_task(item.pipeline_tag or "")

217 models.append(

218 CatalogModel(

219 hf_repo=item.id,

220 gguf_filename="*.gguf",

221 size_gb=size_gb,

222 min_ram_gb=round(max(2.0, size_gb * 1.5), 1),

223 description=card_desc[:120] if card_desc else "",

224 featured=False,

225 downloads=item.downloads or 0,

226 task=task,

227 )

228 )

229 page = HfPage(models=models, has_more=has_more)

230 with self._cache_lock:

231 self._cache[cache_key] = (now, page)

232 if len(self._cache) > self.CACHE_MAX_ENTRIES:

233 oldest_key = min(self._cache, key=lambda k: self._cache[k][0])

234 del self._cache[oldest_key]

235 return page

236

237 def _log_fetch_failure(self, exc: Exception) -> None:

238 """Log an HF fetch failure, rate-limited so offline use doesn't spam.

239

240 First failure of each ``FETCH_FAILURE_WARN_INTERVAL_S`` window logs

241 at WARNING; repeats within the window log at DEBUG. The interval

242 starts from the last WARNING so a flapping network produces one

243 line every five minutes, not one per UI tick.

244 """

245 now = time.monotonic()

246 if now - self._last_fetch_failure_warn >= self.FETCH_FAILURE_WARN_INTERVAL_S:

247 log.warning("Failed to fetch models from HuggingFace: %s", exc)

248 self._last_fetch_failure_warn = now

249 else:

250 log.debug("Suppressed repeat HF fetch failure: %s", exc)

Coverage for src / lilbee / catalog / hf_client.py: 100%

115 statements