Coverage for src / lilbee / api.py: 100%

88 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-05-15 20:55 +0000

1"""Programmatic access to lilbee's retrieval pipeline. 

2 

3Retrieval only -- no LLM chat. Search your indexed documents from Python. 

4Optional features (concept graph, reranker) activate automatically when 

5their dependencies are installed. 

6 

7Usage:: 

8 

9 from lilbee import Lilbee 

10 

11 bee = Lilbee("./docs") 

12 bee.sync() 

13 results = bee.search("authentication") 

14""" 

15 

16from __future__ import annotations 

17 

18import asyncio 

19from collections.abc import Iterator 

20from contextlib import contextmanager 

21from pathlib import Path 

22from typing import TYPE_CHECKING 

23 

24# app.ingest stays at module top: it is a thin wrapper over shutil + the 

25# config singleton (~50ms cumulative beyond core.config). data.ingest is 

26# deferred at each callsite below because it transitively imports spaCy via 

27# the wiki package and adds ~3s on first touch. 

28from lilbee.app.ingest import copy_files 

29from lilbee.app.services import reset_services 

30from lilbee.core.config import Config, cfg 

31from lilbee.core.security import validate_path_within 

32from lilbee.data.store import Store 

33from lilbee.providers.factory import create_provider 

34from lilbee.retrieval.concepts import ConceptGraph 

35from lilbee.retrieval.embedder import Embedder 

36from lilbee.retrieval.query import Searcher 

37from lilbee.retrieval.reranker import Reranker 

38 

39if TYPE_CHECKING: 

40 from lilbee.data.ingest import SyncResult 

41 from lilbee.data.store import SearchChunk 

42 from lilbee.providers.base import LLMProvider 

43 

44 

45@contextmanager 

46def _swap_config(target: Config) -> Iterator[None]: 

47 """Temporarily replace the global cfg fields with *target*'s values. 

48 Not thread-safe -- sequential use only. 

49 """ 

50 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields} 

51 for name in type(target).model_fields: 

52 setattr(cfg, name, getattr(target, name)) 

53 reset_services() 

54 try: 

55 yield 

56 finally: 

57 reset_services() 

58 for name, val in snapshot.items(): 

59 setattr(cfg, name, val) 

60 

61 

62class Lilbee: 

63 """Programmatic access to lilbee's retrieval pipeline. 

64 

65 Usage:: 

66 

67 from lilbee import Lilbee 

68 

69 bee = Lilbee("./docs") 

70 bee.sync() 

71 results = bee.search("authentication") 

72 """ 

73 

74 def __init__( 

75 self, 

76 documents_dir: str | Path | None = None, 

77 *, 

78 config: Config | None = None, 

79 provider: LLMProvider | None = None, 

80 ) -> None: 

81 """Create a lilbee instance. 

82 Args: 

83 documents_dir: Path to documents folder. Creates a default Config 

84 with derived data and lancedb directories. 

85 config: Full Config instance for complete control. 

86 provider: LLM provider instance. If not given, creates one from config. 

87 

88 Pass documents_dir or config, not both. If neither is given, uses 

89 ``Config()`` (same defaults as the CLI). 

90 """ 

91 if documents_dir is not None and config is not None: 

92 raise ValueError("Pass documents_dir or config, not both") 

93 

94 if config is not None: 

95 self._config = config 

96 elif documents_dir is not None: 

97 root = Path(documents_dir).resolve() 

98 self._config = cfg.model_copy( 

99 update={ 

100 "data_root": root, 

101 "documents_dir": root / "documents", 

102 "data_dir": root / "data", 

103 "lancedb_dir": root / "data" / "lancedb", 

104 }, 

105 ) 

106 else: 

107 self._config = Config() 

108 

109 self._config.documents_dir.mkdir(parents=True, exist_ok=True) 

110 self._config.data_dir.mkdir(parents=True, exist_ok=True) 

111 

112 self._provider = provider or create_provider(self._config) 

113 self._store = Store(self._config) 

114 self._embedder = Embedder(self._config, self._provider) 

115 self._reranker = Reranker(self._config) 

116 self._concepts = ConceptGraph(self._config, self._store) 

117 self._searcher = Searcher( 

118 self._config, 

119 self._provider, 

120 self._store, 

121 self._embedder, 

122 self._reranker, 

123 self._concepts, 

124 ) 

125 

126 @property 

127 def config(self) -> Config: 

128 """The Config instance backing this Lilbee.""" 

129 return self._config 

130 

131 @property 

132 def store(self) -> Store: 

133 """The Store component.""" 

134 return self._store 

135 

136 @property 

137 def embedder(self) -> Embedder: 

138 """The Embedder component.""" 

139 return self._embedder 

140 

141 @property 

142 def searcher(self) -> Searcher: 

143 """The Searcher component.""" 

144 return self._searcher 

145 

146 def sync(self, *, quiet: bool = True) -> SyncResult: 

147 """Sync documents to the vector store. Returns what changed.""" 

148 # heavy: data.ingest transitively imports spaCy via wiki 

149 from lilbee.data.ingest import sync as _sync 

150 

151 with _swap_config(self._config): 

152 return asyncio.run(_sync(quiet=quiet)) 

153 

154 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]: 

155 """Search indexed documents. Returns ranked chunks.""" 

156 with _swap_config(self._config): 

157 return self._searcher.search(query, top_k=top_k) 

158 

159 def add(self, paths: list[str | Path]) -> SyncResult: 

160 """Add files to the knowledge base and sync. 

161 Copies each path into the documents directory, then syncs. 

162 """ 

163 # heavy: data.ingest transitively imports spaCy via wiki 

164 from lilbee.data.ingest import sync as _sync 

165 

166 resolved = [Path(p).resolve() for p in paths] 

167 with _swap_config(self._config): 

168 copy_files(resolved, force=True) 

169 return asyncio.run(_sync(quiet=True)) 

170 

171 def remove(self, name: str) -> None: 

172 """Remove a document from the index by source name.""" 

173 with _swap_config(self._config): 

174 self._store.delete_by_source(name) 

175 self._store.delete_source(name) 

176 try: 

177 doc_path = validate_path_within( 

178 self._config.documents_dir / name, self._config.documents_dir 

179 ) 

180 except ValueError: 

181 return 

182 if doc_path.exists(): 

183 doc_path.unlink() 

184 

185 def status(self) -> dict[str, object]: 

186 """Return index stats (document count, data directory, etc.).""" 

187 with _swap_config(self._config): 

188 sources = self._store.get_sources() 

189 return { 

190 "documents_dir": str(self._config.documents_dir), 

191 "data_dir": str(self._config.data_dir), 

192 "document_count": len(sources), 

193 "sources": [s["filename"] for s in sources], 

194 } 

195 

196 def rebuild(self) -> SyncResult: 

197 """Rebuild the entire index from scratch.""" 

198 # heavy: data.ingest transitively imports spaCy via wiki 

199 from lilbee.data.ingest import sync as _sync 

200 

201 with _swap_config(self._config): 

202 return asyncio.run(_sync(force_rebuild=True, quiet=True))