Coverage for src / lilbee / app / ingest.py: 100%
40 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Copy files into the documents directory and OCR config helpers."""
3from __future__ import annotations
5import shutil
6from collections.abc import Generator
7from contextlib import contextmanager
8from dataclasses import dataclass, field
9from pathlib import Path
11from lilbee.core.config import cfg
12from lilbee.core.security import validate_path_within
13from lilbee.core.system import is_ignored_dir
16@dataclass
17class CopyResult:
18 """Result of copying files into the documents directory."""
20 copied: list[str] = field(default_factory=list)
21 skipped: list[str] = field(default_factory=list)
24def _copytree_ignore(directory: str, contents: list[str]) -> set[str]:
25 """Ignore callback for shutil.copytree that filters ignored directories."""
26 return {
27 name
28 for name in contents
29 if (Path(directory) / name).is_dir() and is_ignored_dir(name, cfg.ignore_dirs)
30 }
33def copy_files(paths: list[Path], *, force: bool = False) -> CopyResult:
34 """Copy paths into documents dir. Returns structured result (no console output)."""
35 cfg.documents_dir.mkdir(parents=True, exist_ok=True)
36 result = CopyResult()
37 for p in paths:
38 dest = cfg.documents_dir / p.name
39 validate_path_within(dest, cfg.documents_dir)
40 if dest.exists() and not force:
41 result.skipped.append(p.name)
42 continue
43 if p.is_dir():
44 shutil.copytree(p, dest, dirs_exist_ok=True, ignore=_copytree_ignore, symlinks=False)
45 else:
46 shutil.copy2(p, dest)
47 result.copied.append(p.name)
48 return result
51@contextmanager
52def temporary_ocr_config(
53 enable_ocr: bool | None = None,
54 ocr_timeout: float | None = None,
55) -> Generator[None, None, None]:
56 """Temporarily override OCR config for the duration of the block."""
57 old_ocr, old_timeout = cfg.enable_ocr, cfg.ocr_timeout
58 try:
59 if enable_ocr is not None:
60 cfg.enable_ocr = enable_ocr
61 if ocr_timeout is not None:
62 cfg.ocr_timeout = ocr_timeout
63 yield
64 finally:
65 cfg.enable_ocr = old_ocr
66 cfg.ocr_timeout = old_timeout