Coverage for src/lilbee/vision.py: 100%

1"""Helpers for PDF rasterisation and vision-model OCR.

3Multi-page vision OCR runs through ``LlamaCppProvider.pdf_ocr`` and the

4persistent worker pool; this module hosts the small helpers (page count,

5rasterisation, prompt + chat-message construction, and the shared

6:class:`PageText` / :class:`PdfOcrChunk` types) that both the worker and

7the parent need.

8"""

10import logging

11from collections.abc import Iterator

12from pathlib import Path

13from typing import NamedTuple

15log = logging.getLogger(__name__)

18class PageText(NamedTuple):

19 """Extracted text for a single PDF page."""

21 page: int

22 text: str

25class PdfOcrChunk(NamedTuple):

26 """One streaming PDF-OCR worker frame: page index, total pages, page text."""

28 page: int

29 total: int

30 text: str

33OCR_PROMPT = (

34 "Extract ALL text from this page as clean markdown. "

35 "Preserve table structure using markdown table syntax. "

36 "Include all rows, columns, headers, and page text exactly as shown."

37)

39_RASTER_DPI = 150

42def pdf_page_count(path: Path) -> int:

43 """Return the number of pages in a PDF without rasterizing."""

44 from kreuzberg import PdfPageIterator # lazy: heavy dependency

46 it = PdfPageIterator(str(path), dpi=_RASTER_DPI)

47 return len(it)

50def rasterize_pdf(path: Path) -> Iterator[tuple[int, bytes]]:

51 """Yield (0-based index, PNG bytes) for each page of a PDF."""

52 from kreuzberg import PdfPageIterator # lazy: heavy dependency

54 with PdfPageIterator(str(path), dpi=_RASTER_DPI) as pages:

55 yield from pages

58def _png_to_data_url(png_bytes: bytes) -> str:

59 """Convert raw PNG bytes to a base64 data URL for OpenAI-compatible messages."""

60 import base64

62 b64 = base64.b64encode(png_bytes).decode("ascii")

63 return f"data:image/png;base64,{b64}"

66def build_vision_messages(prompt: str, png_bytes: bytes) -> list[dict]:

67 """Build OpenAI-compatible messages with image content for vision models.

69 Uses the multipart content format expected by llama.cpp's mtmd pipeline.

70 """

71 return [

72 {

73 "role": "user",

74 "content": [

75 {"type": "image_url", "image_url": {"url": _png_to_data_url(png_bytes)}},

76 {"type": "text", "text": prompt},

77 ],

78 }

79 ]

Coverage for src / lilbee / vision.py: 100%

28 statements