Coverage for src / lilbee / api.py: 100%
88 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-05-15 20:55 +0000
1"""Programmatic access to lilbee's retrieval pipeline.
3Retrieval only -- no LLM chat. Search your indexed documents from Python.
4Optional features (concept graph, reranker) activate automatically when
5their dependencies are installed.
7Usage::
9 from lilbee import Lilbee
11 bee = Lilbee("./docs")
12 bee.sync()
13 results = bee.search("authentication")
14"""
16from __future__ import annotations
18import asyncio
19from collections.abc import Iterator
20from contextlib import contextmanager
21from pathlib import Path
22from typing import TYPE_CHECKING
24# app.ingest stays at module top: it is a thin wrapper over shutil + the
25# config singleton (~50ms cumulative beyond core.config). data.ingest is
26# deferred at each callsite below because it transitively imports spaCy via
27# the wiki package and adds ~3s on first touch.
28from lilbee.app.ingest import copy_files
29from lilbee.app.services import reset_services
30from lilbee.core.config import Config, cfg
31from lilbee.core.security import validate_path_within
32from lilbee.data.store import Store
33from lilbee.providers.factory import create_provider
34from lilbee.retrieval.concepts import ConceptGraph
35from lilbee.retrieval.embedder import Embedder
36from lilbee.retrieval.query import Searcher
37from lilbee.retrieval.reranker import Reranker
39if TYPE_CHECKING:
40 from lilbee.data.ingest import SyncResult
41 from lilbee.data.store import SearchChunk
42 from lilbee.providers.base import LLMProvider
45@contextmanager
46def _swap_config(target: Config) -> Iterator[None]:
47 """Temporarily replace the global cfg fields with *target*'s values.
48 Not thread-safe -- sequential use only.
49 """
50 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields}
51 for name in type(target).model_fields:
52 setattr(cfg, name, getattr(target, name))
53 reset_services()
54 try:
55 yield
56 finally:
57 reset_services()
58 for name, val in snapshot.items():
59 setattr(cfg, name, val)
62class Lilbee:
63 """Programmatic access to lilbee's retrieval pipeline.
65 Usage::
67 from lilbee import Lilbee
69 bee = Lilbee("./docs")
70 bee.sync()
71 results = bee.search("authentication")
72 """
74 def __init__(
75 self,
76 documents_dir: str | Path | None = None,
77 *,
78 config: Config | None = None,
79 provider: LLMProvider | None = None,
80 ) -> None:
81 """Create a lilbee instance.
82 Args:
83 documents_dir: Path to documents folder. Creates a default Config
84 with derived data and lancedb directories.
85 config: Full Config instance for complete control.
86 provider: LLM provider instance. If not given, creates one from config.
88 Pass documents_dir or config, not both. If neither is given, uses
89 ``Config()`` (same defaults as the CLI).
90 """
91 if documents_dir is not None and config is not None:
92 raise ValueError("Pass documents_dir or config, not both")
94 if config is not None:
95 self._config = config
96 elif documents_dir is not None:
97 root = Path(documents_dir).resolve()
98 self._config = cfg.model_copy(
99 update={
100 "data_root": root,
101 "documents_dir": root / "documents",
102 "data_dir": root / "data",
103 "lancedb_dir": root / "data" / "lancedb",
104 },
105 )
106 else:
107 self._config = Config()
109 self._config.documents_dir.mkdir(parents=True, exist_ok=True)
110 self._config.data_dir.mkdir(parents=True, exist_ok=True)
112 self._provider = provider or create_provider(self._config)
113 self._store = Store(self._config)
114 self._embedder = Embedder(self._config, self._provider)
115 self._reranker = Reranker(self._config)
116 self._concepts = ConceptGraph(self._config, self._store)
117 self._searcher = Searcher(
118 self._config,
119 self._provider,
120 self._store,
121 self._embedder,
122 self._reranker,
123 self._concepts,
124 )
126 @property
127 def config(self) -> Config:
128 """The Config instance backing this Lilbee."""
129 return self._config
131 @property
132 def store(self) -> Store:
133 """The Store component."""
134 return self._store
136 @property
137 def embedder(self) -> Embedder:
138 """The Embedder component."""
139 return self._embedder
141 @property
142 def searcher(self) -> Searcher:
143 """The Searcher component."""
144 return self._searcher
146 def sync(self, *, quiet: bool = True) -> SyncResult:
147 """Sync documents to the vector store. Returns what changed."""
148 # heavy: data.ingest transitively imports spaCy via wiki
149 from lilbee.data.ingest import sync as _sync
151 with _swap_config(self._config):
152 return asyncio.run(_sync(quiet=quiet))
154 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]:
155 """Search indexed documents. Returns ranked chunks."""
156 with _swap_config(self._config):
157 return self._searcher.search(query, top_k=top_k)
159 def add(self, paths: list[str | Path]) -> SyncResult:
160 """Add files to the knowledge base and sync.
161 Copies each path into the documents directory, then syncs.
162 """
163 # heavy: data.ingest transitively imports spaCy via wiki
164 from lilbee.data.ingest import sync as _sync
166 resolved = [Path(p).resolve() for p in paths]
167 with _swap_config(self._config):
168 copy_files(resolved, force=True)
169 return asyncio.run(_sync(quiet=True))
171 def remove(self, name: str) -> None:
172 """Remove a document from the index by source name."""
173 with _swap_config(self._config):
174 self._store.delete_by_source(name)
175 self._store.delete_source(name)
176 try:
177 doc_path = validate_path_within(
178 self._config.documents_dir / name, self._config.documents_dir
179 )
180 except ValueError:
181 return
182 if doc_path.exists():
183 doc_path.unlink()
185 def status(self) -> dict[str, object]:
186 """Return index stats (document count, data directory, etc.)."""
187 with _swap_config(self._config):
188 sources = self._store.get_sources()
189 return {
190 "documents_dir": str(self._config.documents_dir),
191 "data_dir": str(self._config.data_dir),
192 "document_count": len(sources),
193 "sources": [s["filename"] for s in sources],
194 }
196 def rebuild(self) -> SyncResult:
197 """Rebuild the entire index from scratch."""
198 # heavy: data.ingest transitively imports spaCy via wiki
199 from lilbee.data.ingest import sync as _sync
201 with _swap_config(self._config):
202 return asyncio.run(_sync(force_rebuild=True, quiet=True))