Coverage for src / lilbee / api.py: 100%
96 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-06-28 01:01 +0000
1"""Programmatic access to lilbee's retrieval pipeline.
3Retrieval only -- no LLM chat. Search your indexed documents from Python.
4Optional features (concept graph, reranker) activate automatically when
5their dependencies are installed.
7Usage::
9 from lilbee import Lilbee
11 bee = Lilbee("./docs")
12 bee.sync()
13 results = bee.search("authentication")
14"""
16from __future__ import annotations
18import asyncio
19from collections.abc import Iterator
20from contextlib import contextmanager
21from pathlib import Path
22from typing import TYPE_CHECKING
24# app.ingest stays at module top: it is a thin wrapper over shutil + the
25# config singleton (~50ms cumulative beyond core.config). data.ingest is
26# deferred at each callsite below because it transitively imports spaCy via
27# the wiki package and adds ~3s on first touch.
28from lilbee.app.ingest import copy_files
29from lilbee.app.services import reset_services
30from lilbee.core.config import Config, cfg
31from lilbee.data.store import MemoryKind, MemoryRow, Store
32from lilbee.providers.factory import create_provider
33from lilbee.retrieval.concepts import ConceptGraph
34from lilbee.retrieval.embedder import Embedder
35from lilbee.retrieval.query import Searcher
36from lilbee.retrieval.reranker import Reranker
38if TYPE_CHECKING:
39 from lilbee.data.ingest import SyncResult
40 from lilbee.data.store import SearchChunk
41 from lilbee.providers.base import LLMProvider
44@contextmanager
45def _swap_config(target: Config) -> Iterator[None]:
46 """Temporarily replace the global cfg fields with *target*'s values.
47 Not thread-safe -- sequential use only.
48 """
49 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields}
50 for name in type(target).model_fields:
51 setattr(cfg, name, getattr(target, name))
52 reset_services()
53 try:
54 yield
55 finally:
56 reset_services()
57 for name, val in snapshot.items():
58 setattr(cfg, name, val)
61class Lilbee:
62 """Programmatic access to lilbee's retrieval pipeline.
64 Usage::
66 from lilbee import Lilbee
68 bee = Lilbee("./docs")
69 bee.sync()
70 results = bee.search("authentication")
71 """
73 def __init__(
74 self,
75 documents_dir: str | Path | None = None,
76 *,
77 config: Config | None = None,
78 provider: LLMProvider | None = None,
79 ) -> None:
80 """Create a lilbee instance.
81 Args:
82 documents_dir: Path to documents folder. Creates a default Config
83 with derived data and lancedb directories.
84 config: Full Config instance for complete control.
85 provider: LLM provider instance. If not given, creates one from config.
87 Pass documents_dir or config, not both. If neither is given, uses
88 ``Config()`` (same defaults as the CLI).
89 """
90 if documents_dir is not None and config is not None:
91 raise ValueError("Pass documents_dir or config, not both")
93 if config is not None:
94 self._config = config
95 elif documents_dir is not None:
96 root = Path(documents_dir).resolve()
97 self._config = cfg.model_copy(
98 update={
99 "data_root": root,
100 "documents_dir": root / "documents",
101 "data_dir": root / "data",
102 "lancedb_dir": root / "data" / "lancedb",
103 },
104 )
105 else:
106 self._config = Config()
108 self._config.documents_dir.mkdir(parents=True, exist_ok=True)
109 self._config.data_dir.mkdir(parents=True, exist_ok=True)
111 self._provider = provider or create_provider(self._config)
112 self._store = Store(self._config)
113 self._embedder = Embedder(self._config, self._provider)
114 self._reranker = Reranker(self._config)
115 self._concepts = ConceptGraph(self._config, self._store)
116 self._searcher = Searcher(
117 self._config,
118 self._provider,
119 self._store,
120 self._embedder,
121 self._reranker,
122 self._concepts,
123 )
125 @property
126 def config(self) -> Config:
127 """The Config instance backing this Lilbee."""
128 return self._config
130 @property
131 def store(self) -> Store:
132 """The Store component."""
133 return self._store
135 @property
136 def embedder(self) -> Embedder:
137 """The Embedder component."""
138 return self._embedder
140 @property
141 def searcher(self) -> Searcher:
142 """The Searcher component."""
143 return self._searcher
145 def sync(self, *, quiet: bool = True) -> SyncResult:
146 """Sync documents to the vector store. Returns what changed."""
147 # heavy: data.ingest transitively imports spaCy via wiki
148 from lilbee.data.ingest import sync as _sync
150 with _swap_config(self._config):
151 return asyncio.run(_sync(quiet=quiet))
153 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]:
154 """Search indexed documents. Returns ranked chunks."""
155 with _swap_config(self._config):
156 return self._searcher.search(query, top_k=top_k)
158 def add(self, paths: list[str | Path]) -> SyncResult:
159 """Add files to the knowledge base and sync.
160 Copies each path into the documents directory, then syncs.
161 """
162 # heavy: data.ingest transitively imports spaCy via wiki
163 from lilbee.data.ingest import sync as _sync
165 resolved = [Path(p).resolve() for p in paths]
166 with _swap_config(self._config):
167 copy_files(resolved, force=True)
168 return asyncio.run(_sync(quiet=True))
170 def remove(self, name: str) -> None:
171 """Remove a document from the index by source name."""
172 with _swap_config(self._config):
173 self._store.remove_documents([name], delete_files=True)
175 def status(self) -> dict[str, object]:
176 """Return index stats (document count, data directory, etc.)."""
177 with _swap_config(self._config):
178 sources = self._store.get_sources()
179 return {
180 "documents_dir": str(self._config.documents_dir),
181 "data_dir": str(self._config.data_dir),
182 "document_count": len(sources),
183 "sources": [s["filename"] for s in sources],
184 }
186 def rebuild(self) -> SyncResult:
187 """Rebuild the entire index from scratch."""
188 # heavy: data.ingest transitively imports spaCy via wiki
189 from lilbee.data.ingest import sync as _sync
191 with _swap_config(self._config):
192 return asyncio.run(_sync(force_rebuild=True, quiet=True))
194 def remember(
195 self,
196 text: str,
197 *,
198 kind: MemoryKind = MemoryKind.FACT,
199 shared: bool = False,
200 ) -> str:
201 """Store a fact or preference in long-term memory; returns its id.
203 This library primitive does not consult ``memory_enabled``: that flag
204 gates the interactive surfaces (TUI/CLI/MCP/REST) and the chat-prompt
205 injection, not direct programmatic access. ``remember`` and ``recall``
206 operate as a pair regardless of the flag.
207 """
208 from lilbee.app.memory import make_memory_row
210 with _swap_config(self._config):
211 record = make_memory_row(text, self._embedder.embed, kind=kind, shared=shared)
212 return self._store.add_memory(record)
214 def recall(self, query: str, *, top_k: int | None = None) -> list[MemoryRow]:
215 """Recall facts relevant to *query* from long-term memory."""
216 from lilbee.data.store import local_owner_predicate
218 with _swap_config(self._config):
219 return self._store.search_memories(
220 self._embedder.embed(query),
221 owner_predicate=local_owner_predicate(),
222 top_k=self._config.memory_top_k if top_k is None else top_k,
223 max_distance=self._config.memory_max_distance,
224 )
226 def memories(self) -> list[MemoryRow]:
227 """List all stored memories, newest first."""
228 from lilbee.data.store import local_owner_predicate
230 with _swap_config(self._config):
231 return self._store.get_memories(owner_predicate=local_owner_predicate())
233 def forget(self, memory_id: str) -> None:
234 """Delete a memory by id."""
235 with _swap_config(self._config):
236 self._store.delete_memory(memory_id)