Coverage for src / lilbee / providers / llama_cpp / gguf_meta.py: 100%

86 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""GGUF metadata helpers: header reads, mmproj sidecar lookup, projector type.""" 

2 

3from __future__ import annotations 

4 

5import logging 

6from pathlib import Path 

7from typing import Any 

8 

9from gguf import GGUFReader, GGUFValueType 

10 

11from lilbee.catalog.header_probe import GGUF_ARCH_KEY 

12from lilbee.providers.base import ProviderError 

13from lilbee.providers.llama_cpp.abort_signal import abort_callback, clear_abort 

14from lilbee.providers.llama_cpp.log_dispatch import ( 

15 import_llama_cpp, 

16 install_llama_log_handler, 

17 suppress_native_stderr, 

18) 

19 

20log = logging.getLogger(__name__) 

21 

22_HF_BLOBS_DIR_NAME = "blobs" 

23_HF_SNAPSHOTS_DIR_NAME = "snapshots" 

24_CLIP_PROJECTOR_TYPE_KEY = "clip.projector_type" 

25 

26 

27def train_ctx_from_meta( 

28 meta: dict[str, str] | None, 

29 *, 

30 fallback: int, 

31 model_path: Path, 

32) -> int: 

33 """Resolve ``<arch>.context_length`` from GGUF metadata, clamping junk to ``fallback``. 

34 

35 Some published GGUFs (nomic-embed, certain Qwen3 and vision builds) 

36 report ``context_length=0`` in their headers. Passing zero into 

37 ``Llama(n_ctx=...)`` cascades into ``n_batch=0`` / ``n_ubatch=0``, 

38 which trips ggml's Vulkan dispatch into undefined behaviour and 

39 surfaces as STATUS_HEAP_CORRUPTION on Windows. Unparseable values 

40 and non-positive integers both route to ``fallback``. 

41 """ 

42 if not meta: 

43 return fallback 

44 raw = meta.get("context_length", str(fallback)) 

45 try: 

46 value = int(raw) 

47 except (TypeError, ValueError): 

48 log.warning( 

49 "GGUF %s has unparseable context_length=%r; using %d", 

50 model_path.name, 

51 raw, 

52 fallback, 

53 ) 

54 return fallback 

55 if value <= 0: 

56 log.warning( 

57 "GGUF %s reports context_length=%d; using %d to avoid n_batch=0 crash", 

58 model_path.name, 

59 value, 

60 fallback, 

61 ) 

62 return fallback 

63 return value 

64 

65 

66def read_gguf_metadata(model_path: Path) -> dict[str, str] | None: 

67 """Read metadata from a GGUF file's headers via llama-cpp-python. 

68 

69 Returns a dict with keys like ``architecture``, ``context_length``, 

70 ``embedding_length``, ``chat_template``, ``file_type``, plus the 

71 KV-cache-shape fields (``block_count``, ``head_count_kv``, 

72 ``head_count``, ``key_length``, ``value_length``) used to size n_ctx 

73 against host memory. 

74 """ 

75 Llama = import_llama_cpp().Llama # noqa: N806 

76 

77 # Fresh abort flag: a prior request_abort() must not latch and break 

78 # this metadata read, which is on the path of every model swap. 

79 clear_abort() 

80 install_llama_log_handler() 

81 kwargs: dict[str, Any] = { 

82 "model_path": str(model_path), 

83 "vocab_only": True, 

84 "verbose": False, 

85 "n_gpu_layers": 0, 

86 } 

87 kwargs.setdefault("abort_callback", abort_callback) 

88 llm = suppress_native_stderr(Llama, **kwargs) 

89 try: 

90 raw = llm.metadata or {} 

91 result: dict[str, str] = {} 

92 if GGUF_ARCH_KEY in raw: 

93 result["architecture"] = str(raw[GGUF_ARCH_KEY]) 

94 arch = raw.get(GGUF_ARCH_KEY, "llama") 

95 ctx_key = f"{arch}.context_length" 

96 if ctx_key in raw: 

97 result["context_length"] = str(raw[ctx_key]) 

98 emb_key = f"{arch}.embedding_length" 

99 if emb_key in raw: 

100 result["embedding_length"] = str(raw[emb_key]) 

101 for arch_key, out_key in ( 

102 (f"{arch}.block_count", "block_count"), 

103 (f"{arch}.attention.head_count_kv", "head_count_kv"), 

104 (f"{arch}.attention.head_count", "head_count"), 

105 (f"{arch}.attention.key_length", "key_length"), 

106 (f"{arch}.attention.value_length", "value_length"), 

107 ): 

108 if arch_key in raw: 

109 result[out_key] = str(raw[arch_key]) 

110 if "tokenizer.chat_template" in raw: 

111 result["chat_template"] = str(raw["tokenizer.chat_template"]) 

112 if "general.file_type" in raw: 

113 result["file_type"] = str(raw["general.file_type"]) 

114 if "general.name" in raw: 

115 result["name"] = str(raw["general.name"]) 

116 return result or None 

117 finally: 

118 llm.close() 

119 

120 

121def _find_mmproj_in_hf_snapshots(model_dir: Path) -> Path | None: 

122 """Walk an HF-cache ``blobs/`` dir up to its sibling ``snapshots/`` tree.""" 

123 if model_dir.name != _HF_BLOBS_DIR_NAME: 

124 return None 

125 snapshots_dir = model_dir.parent / _HF_SNAPSHOTS_DIR_NAME 

126 if not snapshots_dir.is_dir(): 

127 return None 

128 for snapshot in snapshots_dir.iterdir(): 

129 candidates = sorted(snapshot.glob("*mmproj*.gguf")) 

130 if candidates: 

131 return candidates[0] 

132 return None 

133 

134 

135def _find_mmproj_in_flat_dir(model_dir: Path) -> Path | None: 

136 """Glob ``*mmproj*.gguf`` siblings of a model GGUF (sideloaded layout).""" 

137 candidates = sorted(model_dir.glob("*mmproj*.gguf")) 

138 return candidates[0] if candidates else None 

139 

140 

141def find_mmproj_for_model(model_path: Path) -> Path: 

142 """Find the mmproj (CLIP projection) file for a vision model. 

143 

144 Resolution order: (1) catalog lookup scoped to ``FEATURED_VISION``, 

145 (2) HuggingFace-cache ``snapshots/`` sibling of ``blobs/``, 

146 (3) same-directory glob for flat sideloaded layouts. 

147 Raises ``ProviderError`` if none find a file. 

148 """ 

149 from lilbee.catalog import find_mmproj_file 

150 

151 found = ( 

152 find_mmproj_file(model_path.stem) 

153 or _find_mmproj_in_hf_snapshots(model_path.parent) 

154 or _find_mmproj_in_flat_dir(model_path.parent) 

155 ) 

156 if found is not None: 

157 return found 

158 

159 raise ProviderError( 

160 f"No mmproj (CLIP projection) file found for vision model {model_path.name}. " 

161 f"Download the mmproj file to {model_path.parent} or re-download the vision " 

162 "model through the catalog to get both files.", 

163 provider="llama-cpp", 

164 ) 

165 

166 

167def read_mmproj_projector_type(mmproj_path: Path) -> str | None: 

168 """Read ``clip.projector_type`` from a GGUF mmproj without loading the model.""" 

169 try: 

170 reader = GGUFReader(str(mmproj_path)) 

171 field = reader.get_field(_CLIP_PROJECTOR_TYPE_KEY) 

172 except Exception: 

173 log.debug("Failed to read mmproj metadata from %s", mmproj_path, exc_info=True) 

174 return None 

175 if field is None or field.types[-1] != GGUFValueType.STRING: 

176 return None 

177 return bytes(field.parts[field.data[0]]).decode("utf-8", errors="replace")