Coverage for src / lilbee / providers / llama_cpp / gpu_select.py: 100%
181 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Best-GPU autodetection for the Vulkan backend.
3On a host with multiple GPUs (typical dual-GPU laptop: discrete NVIDIA
4plus integrated AMD/Intel), Vulkan device ordering is driver- and
5OS-dependent. llama.cpp's Vulkan backend enumerates all discrete AND
6integrated adapters in the order Vulkan's ICD loader returns them
7(see ``ggml-vulkan.cpp::ggml_vk_instance_init``: both
8``eDiscreteGpu`` and ``eIntegratedGpu`` are added without sorting),
9so a model can land on the integrated GPU and stall against shared
10system memory.
12This module probes the Vulkan loader directly via ``ctypes`` to
13enumerate adapters, ranks them by ``VkPhysicalDeviceType`` (discrete
14> integrated > virtual > CPU), and returns the index that should be
15pinned via ``GGML_VK_VISIBLE_DEVICES``. Going through ``ctypes``
16instead of a subprocess avoids any dependency on the Vulkan SDK
17(``vulkaninfo`` isn't installed on stock Windows or macOS), so the
18autodetect works on every machine that already has a Vulkan driver.
20CUDA and ROCm enumeration are deliberately out of scope: CUDA only
21sees NVIDIA devices and HIP/ROCm only sees AMD devices, so neither
22backend exhibits the dual-GPU mis-pick problem. The Vulkan probe
23result is applied to ``GGML_VK_VISIBLE_DEVICES`` alone; applying it
24to ``CUDA_VISIBLE_DEVICES`` would risk hiding the only CUDA device
25on a CUDA wheel + dual-GPU host.
26"""
28from __future__ import annotations
30import ctypes
31import ctypes.util
32import fnmatch
33import logging
34import ntpath
35import os
36import sys
37from ctypes import POINTER, byref, c_char, c_char_p, c_uint8, c_uint32, c_void_p
38from dataclasses import dataclass
39from enum import IntEnum, StrEnum
41from lilbee.providers.llama_cpp.vulkan_icd_discovery import (
42 iter_vulkan_manifest_paths,
43)
45log = logging.getLogger(__name__)
47# vk.h constants. Mirrored here so we don't drag a vulkan-headers
48# dependency in for four magic numbers. See the upstream definitions in
49# https://github.com/KhronosGroup/Vulkan-Headers/blob/main/include/vulkan/vulkan_core.h
50# (VkStructureType enum and the VK_API_VERSION_1_0 / VK_SUCCESS macros).
51_VK_STRUCTURE_TYPE_APPLICATION_INFO = 0
52_VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1
53_VK_SUCCESS = 0
54_VK_API_VERSION_1_0 = (1 << 22) | (0 << 12) | 0
57class VkDeviceType(IntEnum):
58 """``VkPhysicalDeviceType`` enum from vulkan_core.h.
60 Values match the C ABI verbatim; the loader writes one of these
61 into the ``deviceType`` field of ``VkPhysicalDeviceProperties``.
62 """
64 OTHER = 0
65 INTEGRATED_GPU = 1
66 DISCRETE_GPU = 2
67 VIRTUAL_GPU = 3
68 CPU = 4
71# Preference order for picking the best adapter; higher is better.
72# Software rendering (CPU) is never the right pick, so it ranks 0
73# and ``_pick_best_device`` rejects it.
74_DEVICE_TYPE_RANK: dict[VkDeviceType, int] = {
75 VkDeviceType.DISCRETE_GPU: 4,
76 VkDeviceType.INTEGRATED_GPU: 3,
77 VkDeviceType.VIRTUAL_GPU: 2,
78 VkDeviceType.OTHER: 1,
79 VkDeviceType.CPU: 0,
80}
83def _rank_for(device_type: int) -> int:
84 """Lookup the rank for a ``deviceType`` value, ``0`` if the driver returns an unknown one."""
85 try:
86 return _DEVICE_TYPE_RANK[VkDeviceType(device_type)]
87 except ValueError:
88 return 0
91# vk.h sizes for the inline char arrays inside VkPhysicalDeviceProperties.
92# Both constants are part of the Vulkan 1.0 ABI and frozen forever; see
93# VK_MAX_PHYSICAL_DEVICE_NAME_SIZE and VK_UUID_SIZE in
94# https://github.com/KhronosGroup/Vulkan-Headers/blob/main/include/vulkan/vulkan_core.h
95_VK_MAX_PHYSICAL_DEVICE_NAME_SIZE = 256
96_VK_UUID_SIZE = 16
99@dataclass(frozen=True)
100class VulkanDevice:
101 """One Vulkan adapter as reported by the loader."""
103 index: int
104 device_type: int
105 device_name: str
106 vendor_id: int
109class PCIVendorID(IntEnum):
110 """PCI-SIG vendor IDs for the GPU vendors that ship Vulkan ICDs.
112 Values are the canonical PCI vendor IDs that
113 ``VkPhysicalDeviceProperties.vendorID`` surfaces. They are issued by
114 PCI-SIG and frozen per company; see the public PCI vendor-ID
115 registry at https://pcisig.com/membership/member-companies (also
116 mirrored at https://devicehunt.com/all-pci-vendors). Only the
117 vendors we have explicit ICD-disable globs for are enumerated;
118 unknown vendors fall through the dispatch as no-op.
119 """
121 NVIDIA = 0x10DE # NVIDIA Corporation
122 AMD = 0x1002 # Advanced Micro Devices, Inc. [AMD/ATI]
123 INTEL = 0x8086 # Intel Corporation
126# Vulkan loader manifest filename globs, per vendor. The loader matches these
127# against the JSON manifest filename in its known-drivers list (see
128# https://github.com/KhronosGroup/Vulkan-Loader/blob/main/docs/LoaderInterfaceArchitecture.md).
129# Each vendor ships under multiple names across drivers/OSes; list every form
130# we may encounter so disabling one vendor's drivers doesn't half-disable them.
131_VENDOR_ICD_GLOBS: dict[PCIVendorID, tuple[str, ...]] = {
132 # nv-vk*.json (Windows), nvidia_*.json (Linux). Both match nv*.
133 PCIVendorID.NVIDIA: ("nv*",),
134 # amdvlk64.json (Windows AMDVLK), amd_icd*.json (Linux AMDVLK),
135 # amd-vulkan*.json (legacy AMDVLK builds), radeon_icd.*.json
136 # (Mesa RADV on Linux). Adding amd_icd* explicitly because no
137 # other glob covers the Linux AMDVLK manifest.
138 PCIVendorID.AMD: ("amdvlk*", "amd_icd*", "amd-vulkan*", "radeon*"),
139 # intel_icd.*.json (Mesa Intel ANV on Linux), igvk*.json (Windows).
140 PCIVendorID.INTEL: ("intel*", "igvk*"),
141}
144class VulkanIcdEnvVar(StrEnum):
145 """Every documented Vulkan loader env var that influences ICD selection.
147 Names are the verbatim loader env vars from the Khronos
148 LoaderInterfaceArchitecture spec; the StrEnum lets each member be
149 used directly as a ``str`` argument to ``os.environ.get`` /
150 ``os.environ.setdefault`` without ``.value`` plumbing. Any value
151 being non-empty in the environment is treated as a user override
152 and suppresses the dual-vendor auto-pin.
153 """
155 DRIVER_FILES = "VK_DRIVER_FILES"
156 ICD_FILENAMES = "VK_ICD_FILENAMES"
157 ADD_DRIVER_FILES = "VK_ADD_DRIVER_FILES"
158 LOADER_DRIVERS_DISABLE = "VK_LOADER_DRIVERS_DISABLE"
159 LOADER_DRIVERS_SELECT = "VK_LOADER_DRIVERS_SELECT"
162# Field layouts from the Vulkan 1.0 spec. ctypes maps the C structs
163# verbatim so the loader populates them directly; only the prefix
164# fields we read are commented (the trailing fields are kept for ABI
165# alignment, not consumed).
168class _VkApplicationInfo(ctypes.Structure):
169 _fields_ = [
170 ("sType", c_uint32),
171 ("pNext", c_void_p),
172 ("pApplicationName", c_char_p),
173 ("applicationVersion", c_uint32),
174 ("pEngineName", c_char_p),
175 ("engineVersion", c_uint32),
176 ("apiVersion", c_uint32),
177 ]
180class _VkInstanceCreateInfo(ctypes.Structure):
181 _fields_ = [
182 ("sType", c_uint32),
183 ("pNext", c_void_p),
184 ("flags", c_uint32),
185 ("pApplicationInfo", POINTER(_VkApplicationInfo)),
186 ("enabledLayerCount", c_uint32),
187 ("ppEnabledLayerNames", POINTER(c_char_p)),
188 ("enabledExtensionCount", c_uint32),
189 ("ppEnabledExtensionNames", POINTER(c_char_p)),
190 ]
193class _VkPhysicalDeviceLimits(ctypes.Structure):
194 # Opaque to us; we only need the parent struct's *layout* to match
195 # the driver-populated bytes so the loader can write a vendorID and
196 # deviceType into the prefix fields we actually read.
197 #
198 # Size = sum of every field in VkPhysicalDeviceLimits in
199 # https://github.com/KhronosGroup/Vulkan-Headers/blob/main/include/vulkan/vulkan_core.h
200 # (104 ULONG32s, plus alignment padding, totals 504 bytes for the
201 # Vulkan 1.0 ABI). The number is part of the frozen Vulkan 1.0 layout
202 # so it doesn't drift across driver versions.
203 _fields_ = [("_opaque", c_uint8 * 504)]
206class _VkPhysicalDeviceSparseProperties(ctypes.Structure):
207 # 5 ULONG32 booleans, also part of the Vulkan 1.0 ABI; see same header.
208 _fields_ = [("_opaque", c_uint32 * 5)]
211class _VkPhysicalDeviceProperties(ctypes.Structure):
212 _fields_ = [
213 ("apiVersion", c_uint32),
214 ("driverVersion", c_uint32),
215 ("vendorID", c_uint32),
216 ("deviceID", c_uint32),
217 ("deviceType", c_uint32),
218 ("deviceName", c_char * _VK_MAX_PHYSICAL_DEVICE_NAME_SIZE),
219 ("pipelineCacheUUID", c_uint8 * _VK_UUID_SIZE),
220 ("limits", _VkPhysicalDeviceLimits),
221 ("sparseProperties", _VkPhysicalDeviceSparseProperties),
222 ]
225def autoselect_best_gpu_index() -> str | None:
226 """Return the Vulkan device index of the best-available adapter, or ``None``.
228 Returns ``None`` when the Vulkan loader is unavailable, the probe
229 fails, or only one adapter is visible (no decision to make). The
230 string format matches ``GGML_VK_VISIBLE_DEVICES`` (``"0"`` /
231 ``"1"`` etc.). CUDA / HIP / ROCm enumeration are out of scope:
232 those backends are single-vendor and the env vars don't mean the
233 same thing as the Vulkan loader's enumeration order.
234 """
235 devices = _enumerate_vulkan_devices()
236 if devices is None:
237 return None
238 best = _pick_best_device(devices)
239 if best is None:
240 return None
241 # Only emit a pin when there's a real choice between adapter types:
242 # if every visible device has the same rank, the loader's default
243 # ordering is already correct and forcing the index would hide a
244 # user's manual override on rebuild.
245 ranks = {_rank_for(d.device_type) for d in devices}
246 if len(ranks) <= 1:
247 return None
248 return str(best.index)
251def _enumerate_vulkan_devices() -> list[VulkanDevice] | None:
252 """Open libvulkan, create a throwaway instance, enumerate adapters.
254 Returns ``None`` if the loader can't be found or any Vulkan call
255 fails; empty list ("loader present, no adapters") is a distinct
256 outcome and propagates back. The bootstrap calls this twice
257 (autoselect plus the dual-vendor ICD pin) at process startup; the
258 Vulkan probe is ms-scale, no caching needed.
259 """
260 lib = _load_vulkan_loader()
261 if lib is None:
262 return None
263 try:
264 return _list_devices_with_instance(lib)
265 except OSError:
266 # ctypes argument / call-site errors land here; treat as
267 # "probe failed" rather than crashing the host process.
268 return None
271def _load_vulkan_loader() -> ctypes.CDLL | None:
272 """Locate and load the Vulkan loader for the current platform.
274 Returns ``None`` when the loader isn't installed, which is the
275 expected outcome on stock macOS (we ship a Metal wheel there) and
276 on hosts without a Vulkan-capable driver.
277 """
278 candidates: tuple[str, ...]
279 if sys.platform == "win32":
280 candidates = ("vulkan-1.dll",)
281 elif sys.platform == "darwin":
282 # MoltenVK exposes a different ABI than libvulkan; lilbee's
283 # macOS wheel uses Metal directly, so skipping the probe on
284 # Darwin is correct.
285 return None
286 else:
287 candidates = ("libvulkan.so.1", "libvulkan.so")
289 for name in candidates:
290 try:
291 return ctypes.CDLL(name)
292 except OSError:
293 continue
294 # ctypes.util.find_library is a last-resort fallback for distros
295 # where the soname isn't directly loadable.
296 resolved = ctypes.util.find_library("vulkan")
297 if resolved is not None:
298 try:
299 return ctypes.CDLL(resolved)
300 except OSError:
301 return None
302 return None
305def _list_devices_with_instance(lib: ctypes.CDLL) -> list[VulkanDevice]:
306 """Create a temporary VkInstance, enumerate physical devices, destroy.
308 Mirrors what ``vulkaninfo --summary`` does internally. The
309 instance is short-lived (created and destroyed in the same call)
310 so the probe leaves no driver state behind.
311 """
312 create_instance, destroy_instance, enum_physical, get_properties = _resolve_vk_symbols(lib)
314 app_info = _VkApplicationInfo(
315 sType=_VK_STRUCTURE_TYPE_APPLICATION_INFO,
316 pNext=None,
317 pApplicationName=b"lilbee-gpu-probe",
318 applicationVersion=0,
319 pEngineName=b"lilbee",
320 engineVersion=0,
321 apiVersion=_VK_API_VERSION_1_0,
322 )
323 create_info = _VkInstanceCreateInfo(
324 sType=_VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
325 pNext=None,
326 flags=0,
327 pApplicationInfo=ctypes.pointer(app_info),
328 enabledLayerCount=0,
329 ppEnabledLayerNames=None,
330 enabledExtensionCount=0,
331 ppEnabledExtensionNames=None,
332 )
333 instance = c_void_p()
334 result = create_instance(byref(create_info), None, byref(instance))
335 if result != _VK_SUCCESS or not instance.value:
336 return []
338 try:
339 count = c_uint32(0)
340 result = enum_physical(instance, byref(count), None)
341 if result != _VK_SUCCESS or count.value == 0:
342 return []
343 handles = (c_void_p * count.value)()
344 result = enum_physical(instance, byref(count), handles)
345 if result != _VK_SUCCESS:
346 return []
347 devices: list[VulkanDevice] = []
348 for i in range(count.value):
349 props = _VkPhysicalDeviceProperties()
350 get_properties(handles[i], byref(props))
351 devices.append(
352 VulkanDevice(
353 index=i,
354 device_type=int(props.deviceType),
355 device_name=props.deviceName.decode("utf-8", errors="replace"),
356 vendor_id=int(props.vendorID),
357 )
358 )
359 return devices
360 finally:
361 destroy_instance(instance, None)
364def _resolve_vk_symbols(
365 lib: ctypes.CDLL,
366) -> tuple[ctypes._FuncPointer, ctypes._FuncPointer, ctypes._FuncPointer, ctypes._FuncPointer]:
367 """Look up the four Vulkan symbols this probe needs and stamp argtypes.
369 All argtypes / restypes are set here so ctypes uses the same
370 calling convention as the C ABI; missing this on Windows produces
371 silent stack corruption.
372 """
373 create_instance = lib.vkCreateInstance
374 create_instance.argtypes = [
375 POINTER(_VkInstanceCreateInfo),
376 c_void_p,
377 POINTER(c_void_p),
378 ]
379 create_instance.restype = c_uint32
381 destroy_instance = lib.vkDestroyInstance
382 destroy_instance.argtypes = [c_void_p, c_void_p]
383 destroy_instance.restype = None
385 enum_physical = lib.vkEnumeratePhysicalDevices
386 enum_physical.argtypes = [c_void_p, POINTER(c_uint32), POINTER(c_void_p)]
387 enum_physical.restype = c_uint32
389 get_properties = lib.vkGetPhysicalDeviceProperties
390 get_properties.argtypes = [c_void_p, POINTER(_VkPhysicalDeviceProperties)]
391 get_properties.restype = None
393 return create_instance, destroy_instance, enum_physical, get_properties
396def _pick_best_device(devices: list[VulkanDevice]) -> VulkanDevice | None:
397 """Return the highest-ranked device, preferring lower indexes on ties.
399 Sort is stable so the loader's enumeration order acts as the
400 tie-breaker; this matches user expectation that "device 0" wins
401 when two adapters are the same type.
402 """
403 if not devices:
404 return None
405 ranked = sorted(devices, key=lambda d: (-_rank_for(d.device_type), d.index))
406 best = ranked[0]
407 if _rank_for(best.device_type) <= 0:
408 return None
409 return best
412# Single-vendor boxes don't need a pin -- only that vendor's ICD loads,
413# no cross-vendor collision possible.
414_MIN_VENDORS_FOR_CONFLICT = 2
416# Pin priority on dual-vendor hosts. NVIDIA wins because the documented
417# crash signature is AMDVLK alongside NVIDIA (b473 QA, Khronos forum,
418# SHARK-Studio#1636) and NVIDIA is the more common dGPU on those boxes.
419# AMD-then-Intel covers AMD-discrete + Intel-iGPU laptops.
420_PREFERRED_VENDOR_ORDER: tuple[PCIVendorID, ...] = (
421 PCIVendorID.NVIDIA,
422 PCIVendorID.AMD,
423 PCIVendorID.INTEL,
424)
427def _icds_to_disable(best: PCIVendorID, all_vendors: set[PCIVendorID]) -> list[str]:
428 """Return the manifest globs for every known vendor except *best*."""
429 globs: list[str] = []
430 for vendor in sorted(all_vendors, key=int):
431 if vendor is best:
432 continue
433 globs.extend(_VENDOR_ICD_GLOBS[vendor])
434 return globs
437def _classify_manifest_vendor(manifest_filename: str) -> PCIVendorID | None:
438 """Map a manifest filename to its GPU vendor via ``_VENDOR_ICD_GLOBS``."""
439 name = manifest_filename.lower()
440 for vendor, globs in _VENDOR_ICD_GLOBS.items():
441 for glob in globs:
442 if fnmatch.fnmatchcase(name, glob.lower()):
443 return vendor
444 return None
447def _vulkan_vendors_present() -> set[PCIVendorID]:
448 """Vendors with at least one installed Vulkan ICD on this host."""
449 vendors: set[PCIVendorID] = set()
450 for manifest_path in iter_vulkan_manifest_paths():
451 # ntpath.basename splits on both '\\' and '/', so it handles
452 # Windows-registry paths and Linux Path.__str__() output uniformly.
453 filename = ntpath.basename(manifest_path)
454 vendor = _classify_manifest_vendor(filename)
455 if vendor is not None:
456 vendors.add(vendor)
457 return vendors
460def _select_best_vendor(vendors: set[PCIVendorID]) -> PCIVendorID | None:
461 """First match against ``_PREFERRED_VENDOR_ORDER``, or ``None`` if empty."""
462 for vendor in _PREFERRED_VENDOR_ORDER:
463 if vendor in vendors:
464 return vendor
465 return None
468def _platform_supports_icd_pin() -> bool:
469 """True on Windows + Linux, where dual-vendor ICD crashes are documented."""
470 return sys.platform == "win32" or sys.platform.startswith("linux")
473# References for the dual-vendor ICD mitigation below:
474# - Khronos Vulkan-Loader env var spec (VK_LOADER_DRIVERS_DISABLE / VK_DRIVER_FILES):
475# https://github.com/KhronosGroup/Vulkan-Loader/blob/main/docs/LoaderInterfaceArchitecture.md
476# - ICD manifest filename conventions and Windows registry discovery order:
477# https://github.com/KhronosGroup/Vulkan-Loader/blob/main/docs/LoaderDriverInterface.md
478# - "Failure in one ICD causes total failure of vkEnumeratePhysicalDevices":
479# https://github.com/KhronosGroup/Vulkan-Loader/issues/1467
480# - Khronos forum: amdvlk64.dll crashes in vkCreateInstance on mixed-vendor hosts:
481# https://community.khronos.org/t/crash-in-amdvlk64-dll-during-vkcreateinstance/105022
482# - SHARK-Studio #1636 (the same crash hits another Python ML inference tool):
483# https://github.com/nod-ai/SHARK-Studio/issues/1636
484# - Steam overlay multi-VkDevice crash on Linux (ValveSoftware/steam-for-linux#9120):
485# https://github.com/ValveSoftware/steam-for-linux/issues/9120
486# - Mesa RADV pipeline-creation heap corruption (ggml-org/llama.cpp#22128):
487# https://github.com/ggml-org/llama.cpp/issues/22128
488# - NVIDIA help article 5182, dual-vendor Vulkan apps on notebooks:
489# https://nvidia.custhelp.com/app/answers/detail/a_id/5182/
490# - Heroic Games Launcher ICD-selection issue (same mitigation pattern in prod):
491# https://github.com/Heroic-Games-Launcher/HeroicGamesLauncher/issues/3796
492# - Blender Vulkan backend startup failure on dual-vendor hosts:
493# https://projects.blender.org/blender/blender/issues/129917
494def disable_conflicting_vulkan_icds() -> str | None:
495 """Manifest-filename glob list of non-preferred ICDs to disable, or ``None``.
497 Preferred-vendor order is NVIDIA > AMD > Intel. Returns ``None`` (defer
498 to the loader's default) when the platform has no documented dual-vendor
499 crash class, when the user has already chosen a GPU via any Vulkan ICD
500 env var or ``cfg.gpu_devices``, or when discovery finds at most one
501 known vendor.
503 Discovery reads installed manifests from the registry (Windows) or the
504 XDG hierarchy (Linux). Calling ``vkCreateInstance`` to enumerate would
505 pre-load every vendor's ICD before the disable arrives -- on the b473
506 QA box, AMDVLK self-pinned its DLL and the disable landed too late.
507 """
508 from lilbee.core.config import cfg
510 if not _platform_supports_icd_pin():
511 return None
512 if any(os.environ.get(env_var) for env_var in VulkanIcdEnvVar):
513 return None
514 if cfg.gpu_devices:
515 return None
516 vendors = _vulkan_vendors_present()
517 if len(vendors) < _MIN_VENDORS_FOR_CONFLICT:
518 return None
519 best = _select_best_vendor(vendors)
520 if best is None: # pragma: no cover - invariant: vendors is non-empty here
521 return None
522 return ",".join(_icds_to_disable(best, vendors))