Coverage for src / lilbee / api.py: 100%

96 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""Programmatic access to lilbee's retrieval pipeline. 

2 

3Retrieval only -- no LLM chat. Search your indexed documents from Python. 

4Optional features (concept graph, reranker) activate automatically when 

5their dependencies are installed. 

6 

7Usage:: 

8 

9 from lilbee import Lilbee 

10 

11 bee = Lilbee("./docs") 

12 bee.sync() 

13 results = bee.search("authentication") 

14""" 

15 

16from __future__ import annotations 

17 

18import asyncio 

19from collections.abc import Iterator 

20from contextlib import contextmanager 

21from pathlib import Path 

22from typing import TYPE_CHECKING 

23 

24# app.ingest stays at module top: it is a thin wrapper over shutil + the 

25# config singleton (~50ms cumulative beyond core.config). data.ingest is 

26# deferred at each callsite below because it transitively imports spaCy via 

27# the wiki package and adds ~3s on first touch. 

28from lilbee.app.ingest import copy_files 

29from lilbee.app.services import reset_services 

30from lilbee.core.config import Config, cfg 

31from lilbee.data.store import MemoryKind, MemoryRow, Store 

32from lilbee.providers.factory import create_provider 

33from lilbee.retrieval.concepts import ConceptGraph 

34from lilbee.retrieval.embedder import Embedder 

35from lilbee.retrieval.query import Searcher 

36from lilbee.retrieval.reranker import Reranker 

37 

38if TYPE_CHECKING: 

39 from lilbee.data.ingest import SyncResult 

40 from lilbee.data.store import SearchChunk 

41 from lilbee.providers.base import LLMProvider 

42 

43 

44@contextmanager 

45def _swap_config(target: Config) -> Iterator[None]: 

46 """Temporarily replace the global cfg fields with *target*'s values. 

47 Not thread-safe -- sequential use only. 

48 """ 

49 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields} 

50 for name in type(target).model_fields: 

51 setattr(cfg, name, getattr(target, name)) 

52 reset_services() 

53 try: 

54 yield 

55 finally: 

56 reset_services() 

57 for name, val in snapshot.items(): 

58 setattr(cfg, name, val) 

59 

60 

61class Lilbee: 

62 """Programmatic access to lilbee's retrieval pipeline. 

63 

64 Usage:: 

65 

66 from lilbee import Lilbee 

67 

68 bee = Lilbee("./docs") 

69 bee.sync() 

70 results = bee.search("authentication") 

71 """ 

72 

73 def __init__( 

74 self, 

75 documents_dir: str | Path | None = None, 

76 *, 

77 config: Config | None = None, 

78 provider: LLMProvider | None = None, 

79 ) -> None: 

80 """Create a lilbee instance. 

81 Args: 

82 documents_dir: Path to documents folder. Creates a default Config 

83 with derived data and lancedb directories. 

84 config: Full Config instance for complete control. 

85 provider: LLM provider instance. If not given, creates one from config. 

86 

87 Pass documents_dir or config, not both. If neither is given, uses 

88 ``Config()`` (same defaults as the CLI). 

89 """ 

90 if documents_dir is not None and config is not None: 

91 raise ValueError("Pass documents_dir or config, not both") 

92 

93 if config is not None: 

94 self._config = config 

95 elif documents_dir is not None: 

96 root = Path(documents_dir).resolve() 

97 self._config = cfg.model_copy( 

98 update={ 

99 "data_root": root, 

100 "documents_dir": root / "documents", 

101 "data_dir": root / "data", 

102 "lancedb_dir": root / "data" / "lancedb", 

103 }, 

104 ) 

105 else: 

106 self._config = Config() 

107 

108 self._config.documents_dir.mkdir(parents=True, exist_ok=True) 

109 self._config.data_dir.mkdir(parents=True, exist_ok=True) 

110 

111 self._provider = provider or create_provider(self._config) 

112 self._store = Store(self._config) 

113 self._embedder = Embedder(self._config, self._provider) 

114 self._reranker = Reranker(self._config) 

115 self._concepts = ConceptGraph(self._config, self._store) 

116 self._searcher = Searcher( 

117 self._config, 

118 self._provider, 

119 self._store, 

120 self._embedder, 

121 self._reranker, 

122 self._concepts, 

123 ) 

124 

125 @property 

126 def config(self) -> Config: 

127 """The Config instance backing this Lilbee.""" 

128 return self._config 

129 

130 @property 

131 def store(self) -> Store: 

132 """The Store component.""" 

133 return self._store 

134 

135 @property 

136 def embedder(self) -> Embedder: 

137 """The Embedder component.""" 

138 return self._embedder 

139 

140 @property 

141 def searcher(self) -> Searcher: 

142 """The Searcher component.""" 

143 return self._searcher 

144 

145 def sync(self, *, quiet: bool = True) -> SyncResult: 

146 """Sync documents to the vector store. Returns what changed.""" 

147 # heavy: data.ingest transitively imports spaCy via wiki 

148 from lilbee.data.ingest import sync as _sync 

149 

150 with _swap_config(self._config): 

151 return asyncio.run(_sync(quiet=quiet)) 

152 

153 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]: 

154 """Search indexed documents. Returns ranked chunks.""" 

155 with _swap_config(self._config): 

156 return self._searcher.search(query, top_k=top_k) 

157 

158 def add(self, paths: list[str | Path]) -> SyncResult: 

159 """Add files to the knowledge base and sync. 

160 Copies each path into the documents directory, then syncs. 

161 """ 

162 # heavy: data.ingest transitively imports spaCy via wiki 

163 from lilbee.data.ingest import sync as _sync 

164 

165 resolved = [Path(p).resolve() for p in paths] 

166 with _swap_config(self._config): 

167 copy_files(resolved, force=True) 

168 return asyncio.run(_sync(quiet=True)) 

169 

170 def remove(self, name: str) -> None: 

171 """Remove a document from the index by source name.""" 

172 with _swap_config(self._config): 

173 self._store.remove_documents([name], delete_files=True) 

174 

175 def status(self) -> dict[str, object]: 

176 """Return index stats (document count, data directory, etc.).""" 

177 with _swap_config(self._config): 

178 sources = self._store.get_sources() 

179 return { 

180 "documents_dir": str(self._config.documents_dir), 

181 "data_dir": str(self._config.data_dir), 

182 "document_count": len(sources), 

183 "sources": [s["filename"] for s in sources], 

184 } 

185 

186 def rebuild(self) -> SyncResult: 

187 """Rebuild the entire index from scratch.""" 

188 # heavy: data.ingest transitively imports spaCy via wiki 

189 from lilbee.data.ingest import sync as _sync 

190 

191 with _swap_config(self._config): 

192 return asyncio.run(_sync(force_rebuild=True, quiet=True)) 

193 

194 def remember( 

195 self, 

196 text: str, 

197 *, 

198 kind: MemoryKind = MemoryKind.FACT, 

199 shared: bool = False, 

200 ) -> str: 

201 """Store a fact or preference in long-term memory; returns its id. 

202 

203 This library primitive does not consult ``memory_enabled``: that flag 

204 gates the interactive surfaces (TUI/CLI/MCP/REST) and the chat-prompt 

205 injection, not direct programmatic access. ``remember`` and ``recall`` 

206 operate as a pair regardless of the flag. 

207 """ 

208 from lilbee.app.memory import make_memory_row 

209 

210 with _swap_config(self._config): 

211 record = make_memory_row(text, self._embedder.embed, kind=kind, shared=shared) 

212 return self._store.add_memory(record) 

213 

214 def recall(self, query: str, *, top_k: int | None = None) -> list[MemoryRow]: 

215 """Recall facts relevant to *query* from long-term memory.""" 

216 from lilbee.data.store import local_owner_predicate 

217 

218 with _swap_config(self._config): 

219 return self._store.search_memories( 

220 self._embedder.embed(query), 

221 owner_predicate=local_owner_predicate(), 

222 top_k=self._config.memory_top_k if top_k is None else top_k, 

223 max_distance=self._config.memory_max_distance, 

224 ) 

225 

226 def memories(self) -> list[MemoryRow]: 

227 """List all stored memories, newest first.""" 

228 from lilbee.data.store import local_owner_predicate 

229 

230 with _swap_config(self._config): 

231 return self._store.get_memories(owner_predicate=local_owner_predicate()) 

232 

233 def forget(self, memory_id: str) -> None: 

234 """Delete a memory by id.""" 

235 with _swap_config(self._config): 

236 self._store.delete_memory(memory_id)