Coverage for src / lilbee / data / store / schema.py: 100%

8 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""PyArrow schemas for the LanceDB tables managed by the store.""" 

2 

3from __future__ import annotations 

4 

5import pyarrow as pa 

6 

7 

8def _meta_schema() -> pa.Schema: 

9 return pa.schema( 

10 [ 

11 pa.field("embedding_model", pa.utf8()), 

12 pa.field("embedding_dim", pa.int32()), 

13 pa.field("schema_version", pa.int32()), 

14 pa.field("updated_at", pa.utf8()), 

15 ] 

16 ) 

17 

18 

19def _sources_schema() -> pa.Schema: 

20 return pa.schema( 

21 [ 

22 pa.field("filename", pa.utf8()), 

23 pa.field("file_hash", pa.utf8()), 

24 pa.field("ingested_at", pa.utf8()), 

25 pa.field("chunk_count", pa.int32()), 

26 pa.field("source_type", pa.utf8()), 

27 ] 

28 ) 

29 

30 

31def _citations_schema() -> pa.Schema: 

32 return pa.schema( 

33 [ 

34 pa.field("wiki_source", pa.utf8()), 

35 pa.field("wiki_chunk_index", pa.int32()), 

36 pa.field("citation_key", pa.utf8()), 

37 pa.field("claim_type", pa.utf8()), 

38 pa.field("source_filename", pa.utf8()), 

39 pa.field("source_hash", pa.utf8()), 

40 pa.field("page_start", pa.int32()), 

41 pa.field("page_end", pa.int32()), 

42 pa.field("line_start", pa.int32()), 

43 pa.field("line_end", pa.int32()), 

44 pa.field("excerpt", pa.utf8()), 

45 pa.field("created_at", pa.utf8()), 

46 ] 

47 )