Coverage for src / lilbee / app / ingest.py: 100%

40 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Copy files into the documents directory and OCR config helpers.""" 

2 

3from __future__ import annotations 

4 

5import shutil 

6from collections.abc import Generator 

7from contextlib import contextmanager 

8from dataclasses import dataclass, field 

9from pathlib import Path 

10 

11from lilbee.core.config import cfg 

12from lilbee.core.security import validate_path_within 

13from lilbee.core.system import is_ignored_dir 

14 

15 

16@dataclass 

17class CopyResult: 

18 """Result of copying files into the documents directory.""" 

19 

20 copied: list[str] = field(default_factory=list) 

21 skipped: list[str] = field(default_factory=list) 

22 

23 

24def _copytree_ignore(directory: str, contents: list[str]) -> set[str]: 

25 """Ignore callback for shutil.copytree that filters ignored directories.""" 

26 return { 

27 name 

28 for name in contents 

29 if (Path(directory) / name).is_dir() and is_ignored_dir(name, cfg.ignore_dirs) 

30 } 

31 

32 

33def copy_files(paths: list[Path], *, force: bool = False) -> CopyResult: 

34 """Copy paths into documents dir. Returns structured result (no console output).""" 

35 cfg.documents_dir.mkdir(parents=True, exist_ok=True) 

36 result = CopyResult() 

37 for p in paths: 

38 dest = cfg.documents_dir / p.name 

39 validate_path_within(dest, cfg.documents_dir) 

40 if dest.exists() and not force: 

41 result.skipped.append(p.name) 

42 continue 

43 if p.is_dir(): 

44 shutil.copytree(p, dest, dirs_exist_ok=True, ignore=_copytree_ignore, symlinks=False) 

45 else: 

46 shutil.copy2(p, dest) 

47 result.copied.append(p.name) 

48 return result 

49 

50 

51@contextmanager 

52def temporary_ocr_config( 

53 enable_ocr: bool | None = None, 

54 ocr_timeout: float | None = None, 

55) -> Generator[None, None, None]: 

56 """Temporarily override OCR config for the duration of the block.""" 

57 old_ocr, old_timeout = cfg.enable_ocr, cfg.ocr_timeout 

58 try: 

59 if enable_ocr is not None: 

60 cfg.enable_ocr = enable_ocr 

61 if ocr_timeout is not None: 

62 cfg.ocr_timeout = ocr_timeout 

63 yield 

64 finally: 

65 cfg.enable_ocr = old_ocr 

66 cfg.ocr_timeout = old_timeout