Coverage for src / lilbee / modelhub / model_info.py: 100%

67 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Public API for reading model architecture metadata from GGUF files.""" 

2 

3from __future__ import annotations 

4 

5import logging 

6from dataclasses import dataclass 

7 

8from lilbee.core.config import cfg 

9 

10log = logging.getLogger(__name__) 

11 

12 

13@dataclass 

14class ModelArchInfo: 

15 """Architecture metadata for installed models.""" 

16 

17 chat_arch: str = "unknown" 

18 embed_arch: str = "unknown" 

19 vision_projector: str = "unknown" 

20 active_handler: str = "not loaded" 

21 

22 

23# Cache: (chat_model_ref, embed_model_ref, vision_model_ref) -> ModelArchInfo. 

24# Reading GGUF headers is hundreds of ms cold (file open + parse + first 

25# llama_cpp import); the result is stable as long as the configured refs 

26# stay the same. Status screen visits, MCP status calls, and any other 

27# read-side caller share this cache. ``invalidate_cache`` lets settings 

28# updates clear it explicitly when a model ref changes. 

29_arch_cache: dict[tuple[str, str, str], ModelArchInfo] = {} 

30 

31 

32def _cache_key() -> tuple[str, str, str]: 

33 return (cfg.chat_model or "", cfg.embedding_model or "", cfg.vision_model or "") 

34 

35 

36def invalidate_cache() -> None: 

37 """Drop the architecture cache. Call when a model ref changes.""" 

38 _arch_cache.clear() 

39 

40 

41def get_model_architecture() -> ModelArchInfo: 

42 """Return architecture metadata for the currently configured models. 

43 

44 Memoized on (chat_model, embed_model, vision_model). First call 

45 reads GGUF headers for each; subsequent calls under the same refs 

46 return the cached result instantly. Falls back gracefully if 

47 llama-cpp-python is not installed or models are not available. 

48 """ 

49 key = _cache_key() 

50 cached = _arch_cache.get(key) 

51 if cached is not None: 

52 return cached 

53 info = ModelArchInfo() 

54 try: 

55 import lilbee.providers.llama_cpp # noqa: F401 

56 

57 info = _read_chat_arch(info) 

58 info = _read_embed_arch(info) 

59 info = _read_vision_arch(info) 

60 except ImportError: 

61 pass # llama_cpp is optional; arch info degrades gracefully 

62 _arch_cache[key] = info 

63 return info 

64 

65 

66def _read_chat_arch(info: ModelArchInfo) -> ModelArchInfo: 

67 """Read chat model architecture from GGUF metadata.""" 

68 try: 

69 from lilbee.providers.llama_cpp.gguf_meta import read_gguf_metadata 

70 from lilbee.providers.llama_cpp.provider import resolve_model_path 

71 

72 path = resolve_model_path(cfg.chat_model) 

73 meta = read_gguf_metadata(path) 

74 if meta: 

75 info.chat_arch = meta.get("architecture", "unknown") 

76 info.active_handler = "llama-cpp" 

77 except Exception: 

78 log.debug("Failed to read chat model architecture", exc_info=True) 

79 return info 

80 

81 

82def _read_embed_arch(info: ModelArchInfo) -> ModelArchInfo: 

83 """Read embedding model architecture from GGUF metadata.""" 

84 try: 

85 from lilbee.providers.llama_cpp.gguf_meta import read_gguf_metadata 

86 from lilbee.providers.llama_cpp.provider import resolve_model_path 

87 

88 path = resolve_model_path(cfg.embedding_model) 

89 meta = read_gguf_metadata(path) 

90 if meta: 

91 info.embed_arch = meta.get("architecture", "unknown") 

92 except Exception: 

93 log.debug("Failed to read embedding model architecture", exc_info=True) 

94 return info 

95 

96 

97def _read_vision_arch(info: ModelArchInfo) -> ModelArchInfo: 

98 """Read vision projector type from GGUF metadata for ``cfg.vision_model``. 

99 

100 Reads the vision model name from the global ``cfg`` singleton (same 

101 pattern as :func:`_read_chat_arch` / :func:`_read_embed_arch`) rather 

102 than taking it as a parameter. The chat model is never inspected for 

103 vision capability here: role separation is explicit. Returns the 

104 input unchanged when no vision model is configured. 

105 """ 

106 if not cfg.vision_model: 

107 return info 

108 try: 

109 from lilbee.providers.llama_cpp.gguf_meta import ( 

110 find_mmproj_for_model, 

111 read_mmproj_projector_type, 

112 ) 

113 from lilbee.providers.llama_cpp.provider import resolve_model_path 

114 

115 path = resolve_model_path(cfg.vision_model) 

116 mmproj = find_mmproj_for_model(path) 

117 proj_type = read_mmproj_projector_type(mmproj) 

118 info.vision_projector = proj_type or "unknown" 

119 except Exception: 

120 log.debug("Failed to read vision projector type", exc_info=True) 

121 return info