Coverage for src / lilbee / providers / llama_cpp / gguf_meta.py: 100%
86 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""GGUF metadata helpers: header reads, mmproj sidecar lookup, projector type."""
3from __future__ import annotations
5import logging
6from pathlib import Path
7from typing import Any
9from gguf import GGUFReader, GGUFValueType
11from lilbee.catalog.header_probe import GGUF_ARCH_KEY
12from lilbee.providers.base import ProviderError
13from lilbee.providers.llama_cpp.abort_signal import abort_callback, clear_abort
14from lilbee.providers.llama_cpp.log_dispatch import (
15 import_llama_cpp,
16 install_llama_log_handler,
17 suppress_native_stderr,
18)
20log = logging.getLogger(__name__)
22_HF_BLOBS_DIR_NAME = "blobs"
23_HF_SNAPSHOTS_DIR_NAME = "snapshots"
24_CLIP_PROJECTOR_TYPE_KEY = "clip.projector_type"
27def train_ctx_from_meta(
28 meta: dict[str, str] | None,
29 *,
30 fallback: int,
31 model_path: Path,
32) -> int:
33 """Resolve ``<arch>.context_length`` from GGUF metadata, clamping junk to ``fallback``.
35 Some published GGUFs (nomic-embed, certain Qwen3 and vision builds)
36 report ``context_length=0`` in their headers. Passing zero into
37 ``Llama(n_ctx=...)`` cascades into ``n_batch=0`` / ``n_ubatch=0``,
38 which trips ggml's Vulkan dispatch into undefined behaviour and
39 surfaces as STATUS_HEAP_CORRUPTION on Windows. Unparseable values
40 and non-positive integers both route to ``fallback``.
41 """
42 if not meta:
43 return fallback
44 raw = meta.get("context_length", str(fallback))
45 try:
46 value = int(raw)
47 except (TypeError, ValueError):
48 log.warning(
49 "GGUF %s has unparseable context_length=%r; using %d",
50 model_path.name,
51 raw,
52 fallback,
53 )
54 return fallback
55 if value <= 0:
56 log.warning(
57 "GGUF %s reports context_length=%d; using %d to avoid n_batch=0 crash",
58 model_path.name,
59 value,
60 fallback,
61 )
62 return fallback
63 return value
66def read_gguf_metadata(model_path: Path) -> dict[str, str] | None:
67 """Read metadata from a GGUF file's headers via llama-cpp-python.
69 Returns a dict with keys like ``architecture``, ``context_length``,
70 ``embedding_length``, ``chat_template``, ``file_type``, plus the
71 KV-cache-shape fields (``block_count``, ``head_count_kv``,
72 ``head_count``, ``key_length``, ``value_length``) used to size n_ctx
73 against host memory.
74 """
75 Llama = import_llama_cpp().Llama # noqa: N806
77 # Fresh abort flag: a prior request_abort() must not latch and break
78 # this metadata read, which is on the path of every model swap.
79 clear_abort()
80 install_llama_log_handler()
81 kwargs: dict[str, Any] = {
82 "model_path": str(model_path),
83 "vocab_only": True,
84 "verbose": False,
85 "n_gpu_layers": 0,
86 }
87 kwargs.setdefault("abort_callback", abort_callback)
88 llm = suppress_native_stderr(Llama, **kwargs)
89 try:
90 raw = llm.metadata or {}
91 result: dict[str, str] = {}
92 if GGUF_ARCH_KEY in raw:
93 result["architecture"] = str(raw[GGUF_ARCH_KEY])
94 arch = raw.get(GGUF_ARCH_KEY, "llama")
95 ctx_key = f"{arch}.context_length"
96 if ctx_key in raw:
97 result["context_length"] = str(raw[ctx_key])
98 emb_key = f"{arch}.embedding_length"
99 if emb_key in raw:
100 result["embedding_length"] = str(raw[emb_key])
101 for arch_key, out_key in (
102 (f"{arch}.block_count", "block_count"),
103 (f"{arch}.attention.head_count_kv", "head_count_kv"),
104 (f"{arch}.attention.head_count", "head_count"),
105 (f"{arch}.attention.key_length", "key_length"),
106 (f"{arch}.attention.value_length", "value_length"),
107 ):
108 if arch_key in raw:
109 result[out_key] = str(raw[arch_key])
110 if "tokenizer.chat_template" in raw:
111 result["chat_template"] = str(raw["tokenizer.chat_template"])
112 if "general.file_type" in raw:
113 result["file_type"] = str(raw["general.file_type"])
114 if "general.name" in raw:
115 result["name"] = str(raw["general.name"])
116 return result or None
117 finally:
118 llm.close()
121def _find_mmproj_in_hf_snapshots(model_dir: Path) -> Path | None:
122 """Walk an HF-cache ``blobs/`` dir up to its sibling ``snapshots/`` tree."""
123 if model_dir.name != _HF_BLOBS_DIR_NAME:
124 return None
125 snapshots_dir = model_dir.parent / _HF_SNAPSHOTS_DIR_NAME
126 if not snapshots_dir.is_dir():
127 return None
128 for snapshot in snapshots_dir.iterdir():
129 candidates = sorted(snapshot.glob("*mmproj*.gguf"))
130 if candidates:
131 return candidates[0]
132 return None
135def _find_mmproj_in_flat_dir(model_dir: Path) -> Path | None:
136 """Glob ``*mmproj*.gguf`` siblings of a model GGUF (sideloaded layout)."""
137 candidates = sorted(model_dir.glob("*mmproj*.gguf"))
138 return candidates[0] if candidates else None
141def find_mmproj_for_model(model_path: Path) -> Path:
142 """Find the mmproj (CLIP projection) file for a vision model.
144 Resolution order: (1) catalog lookup scoped to ``FEATURED_VISION``,
145 (2) HuggingFace-cache ``snapshots/`` sibling of ``blobs/``,
146 (3) same-directory glob for flat sideloaded layouts.
147 Raises ``ProviderError`` if none find a file.
148 """
149 from lilbee.catalog import find_mmproj_file
151 found = (
152 find_mmproj_file(model_path.stem)
153 or _find_mmproj_in_hf_snapshots(model_path.parent)
154 or _find_mmproj_in_flat_dir(model_path.parent)
155 )
156 if found is not None:
157 return found
159 raise ProviderError(
160 f"No mmproj (CLIP projection) file found for vision model {model_path.name}. "
161 f"Download the mmproj file to {model_path.parent} or re-download the vision "
162 "model through the catalog to get both files.",
163 provider="llama-cpp",
164 )
167def read_mmproj_projector_type(mmproj_path: Path) -> str | None:
168 """Read ``clip.projector_type`` from a GGUF mmproj without loading the model."""
169 try:
170 reader = GGUFReader(str(mmproj_path))
171 field = reader.get_field(_CLIP_PROJECTOR_TYPE_KEY)
172 except Exception:
173 log.debug("Failed to read mmproj metadata from %s", mmproj_path, exc_info=True)
174 return None
175 if field is None or field.types[-1] != GGUFValueType.STRING:
176 return None
177 return bytes(field.parts[field.data[0]]).decode("utf-8", errors="replace")