Coverage for src / lilbee / data / store / schema.py: 100%
8 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""PyArrow schemas for the LanceDB tables managed by the store."""
3from __future__ import annotations
5import pyarrow as pa
8def _meta_schema() -> pa.Schema:
9 return pa.schema(
10 [
11 pa.field("embedding_model", pa.utf8()),
12 pa.field("embedding_dim", pa.int32()),
13 pa.field("schema_version", pa.int32()),
14 pa.field("updated_at", pa.utf8()),
15 ]
16 )
19def _sources_schema() -> pa.Schema:
20 return pa.schema(
21 [
22 pa.field("filename", pa.utf8()),
23 pa.field("file_hash", pa.utf8()),
24 pa.field("ingested_at", pa.utf8()),
25 pa.field("chunk_count", pa.int32()),
26 pa.field("source_type", pa.utf8()),
27 ]
28 )
31def _citations_schema() -> pa.Schema:
32 return pa.schema(
33 [
34 pa.field("wiki_source", pa.utf8()),
35 pa.field("wiki_chunk_index", pa.int32()),
36 pa.field("citation_key", pa.utf8()),
37 pa.field("claim_type", pa.utf8()),
38 pa.field("source_filename", pa.utf8()),
39 pa.field("source_hash", pa.utf8()),
40 pa.field("page_start", pa.int32()),
41 pa.field("page_end", pa.int32()),
42 pa.field("line_start", pa.int32()),
43 pa.field("line_end", pa.int32()),
44 pa.field("excerpt", pa.utf8()),
45 pa.field("created_at", pa.utf8()),
46 ]
47 )