Coverage for src / lilbee / data / store / schema.py: 100%
10 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""PyArrow schemas for the LanceDB tables managed by the store."""
3from __future__ import annotations
5import pyarrow as pa
8def _meta_schema() -> pa.Schema:
9 return pa.schema(
10 [
11 pa.field("embedding_model", pa.utf8()),
12 pa.field("embedding_dim", pa.int32()),
13 pa.field("schema_version", pa.int32()),
14 pa.field("updated_at", pa.utf8()),
15 ]
16 )
19def _sources_schema() -> pa.Schema:
20 return pa.schema(
21 [
22 pa.field("filename", pa.utf8()),
23 pa.field("file_hash", pa.utf8()),
24 pa.field("ingested_at", pa.utf8()),
25 pa.field("chunk_count", pa.int32()),
26 pa.field("source_type", pa.utf8()),
27 ]
28 )
31def _page_texts_schema() -> pa.Schema:
32 return pa.schema(
33 [
34 pa.field("source", pa.utf8()),
35 pa.field("page", pa.int32()),
36 pa.field("text", pa.utf8()),
37 pa.field("content_type", pa.utf8()),
38 ]
39 )
42def _citations_schema() -> pa.Schema:
43 return pa.schema(
44 [
45 pa.field("wiki_source", pa.utf8()),
46 pa.field("wiki_chunk_index", pa.int32()),
47 pa.field("citation_key", pa.utf8()),
48 pa.field("claim_type", pa.utf8()),
49 pa.field("source_filename", pa.utf8()),
50 pa.field("source_hash", pa.utf8()),
51 pa.field("page_start", pa.int32()),
52 pa.field("page_end", pa.int32()),
53 pa.field("line_start", pa.int32()),
54 pa.field("line_end", pa.int32()),
55 pa.field("excerpt", pa.utf8()),
56 pa.field("created_at", pa.utf8()),
57 ]
58 )