Coverage for src / lilbee / data / ingest / code.py: 100%

15 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Code-file ingestion via tree-sitter chunking.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6 

7from lilbee.app.services import get_services 

8from lilbee.data.code_chunker import CodeChunk, chunk_code 

9from lilbee.data.ingest.types import ChunkRecord 

10from lilbee.data.store import CHUNK_TYPE_RAW 

11from lilbee.runtime.progress import DetailedProgressCallback, noop_callback 

12 

13 

14def ingest_code_sync( 

15 path: Path, 

16 source_name: str, 

17 on_progress: DetailedProgressCallback = noop_callback, 

18) -> list[ChunkRecord]: 

19 """Parse code with tree-sitter, chunk, embed, and return store-ready records.""" 

20 code_chunks: list[CodeChunk] = chunk_code(path) 

21 if not code_chunks: 

22 return [] 

23 

24 texts = [cc.chunk for cc in code_chunks] 

25 embedder = get_services().embedder 

26 vectors = embedder.embed_batch(texts, source=source_name, on_progress=on_progress) 

27 

28 return [ 

29 ChunkRecord( 

30 source=source_name, 

31 content_type="code", 

32 chunk_type=CHUNK_TYPE_RAW, 

33 page_start=0, 

34 page_end=0, 

35 line_start=cc.line_start, 

36 line_end=cc.line_end, 

37 chunk=cc.chunk, 

38 chunk_index=cc.chunk_index, 

39 vector=vec, 

40 ) 

41 for cc, vec in zip(code_chunks, vectors, strict=True) 

42 ]