Coverage for src / lilbee / data / ingest / code.py: 100%
15 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Code-file ingestion via tree-sitter chunking."""
3from __future__ import annotations
5from pathlib import Path
7from lilbee.app.services import get_services
8from lilbee.data.code_chunker import CodeChunk, chunk_code
9from lilbee.data.ingest.types import ChunkRecord
10from lilbee.data.store import CHUNK_TYPE_RAW
11from lilbee.runtime.progress import DetailedProgressCallback, noop_callback
14def ingest_code_sync(
15 path: Path,
16 source_name: str,
17 on_progress: DetailedProgressCallback = noop_callback,
18) -> list[ChunkRecord]:
19 """Parse code with tree-sitter, chunk, embed, and return store-ready records."""
20 code_chunks: list[CodeChunk] = chunk_code(path)
21 if not code_chunks:
22 return []
24 texts = [cc.chunk for cc in code_chunks]
25 embedder = get_services().embedder
26 vectors = embedder.embed_batch(texts, source=source_name, on_progress=on_progress)
28 return [
29 ChunkRecord(
30 source=source_name,
31 content_type="code",
32 chunk_type=CHUNK_TYPE_RAW,
33 page_start=0,
34 page_end=0,
35 line_start=cc.line_start,
36 line_end=cc.line_end,
37 chunk=cc.chunk,
38 chunk_index=cc.chunk_index,
39 vector=vec,
40 )
41 for cc, vec in zip(code_chunks, vectors, strict=True)
42 ]