Coverage for src / lilbee / retrieval / query / memory_extract.py: 100%

56 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-06-28 01:01 +0000

1"""Auto-extraction of durable memories from a chat turn. 

2 

3A small LLM pass over the user's message and the assistant's answer that 

4proposes durable facts/preferences worth remembering. The model is asked for 

5a strict JSON array; parsing is defensive (a non-conforming reply yields no 

6memories rather than an error). Callers store the results, which the user can 

7review and remove in ``/memories``. 

8""" 

9 

10from __future__ import annotations 

11 

12import json 

13import logging 

14import re 

15from collections.abc import Callable 

16from dataclasses import dataclass 

17 

18from lilbee.data.store import MemoryKind 

19 

20log = logging.getLogger(__name__) 

21 

22ChatFn = Callable[..., str] 

23 

24# Bounds mirror the prior-art auto-capture filter: too-short strings carry no 

25# durable signal, too-long ones are usually the model restating the answer. 

26_MIN_MEMORY_CHARS = 10 

27_MAX_MEMORY_CHARS = 500 

28 

29_EXTRACT_SYSTEM_PROMPT = ( 

30 "You extract durable, long-term memories about the user from a single chat turn. " 

31 "A memory is a stable fact about the user or their project (not a one-off question) " 

32 "or a standing preference for how they want help. " 

33 "Ignore transient details, the assistant's own content, and anything specific to " 

34 "just this question. " 

35 "Respond with ONLY a JSON array (no prose). Each element is an object " 

36 '{"text": "<the memory, third person>", "kind": "fact" | "preference"}. ' 

37 "Return [] when nothing is worth remembering." 

38) 

39 

40_EXTRACT_USER_TEMPLATE = "User said:\n{question}\n\nAssistant replied:\n{answer}" 

41 

42_JSON_ARRAY_RE = re.compile(r"\[.*\]", re.DOTALL) 

43 

44 

45@dataclass(frozen=True) 

46class ExtractedMemory: 

47 """A single memory proposed by the extraction pass.""" 

48 

49 text: str 

50 kind: MemoryKind 

51 

52 

53def build_extract_messages(question: str, answer: str) -> list[dict[str, str]]: 

54 """Build the system+user message pair for the extraction prompt.""" 

55 return [ 

56 {"role": "system", "content": _EXTRACT_SYSTEM_PROMPT}, 

57 { 

58 "role": "user", 

59 "content": _EXTRACT_USER_TEMPLATE.format(question=question, answer=answer), 

60 }, 

61 ] 

62 

63 

64def _coerce_kind(value: object) -> MemoryKind: 

65 """Decode a kind string, defaulting to FACT for anything unrecognized.""" 

66 if not isinstance(value, str): 

67 return MemoryKind.FACT 

68 try: 

69 return MemoryKind(value) 

70 except ValueError: 

71 return MemoryKind.FACT 

72 

73 

74def parse_extraction(raw: str) -> list[ExtractedMemory]: 

75 """Parse the model's reply into memories; tolerate a non-conforming reply. 

76 

77 Extracts the first JSON array in *raw* (models often wrap it in prose or a 

78 code fence), keeps only objects with a usable-length ``text``, and decodes 

79 the kind. Any parse failure yields an empty list. 

80 """ 

81 match = _JSON_ARRAY_RE.search(raw) 

82 if match is None: 

83 return [] 

84 try: 

85 # The regex captures a bracketed span, so a successful parse is always 

86 # a list (a malformed span raises and is caught below). 

87 items = json.loads(match.group(0)) 

88 except json.JSONDecodeError: 

89 return [] 

90 

91 memories: list[ExtractedMemory] = [] 

92 for item in items: 

93 if not isinstance(item, dict): 

94 continue 

95 text = item.get("text") 

96 if not isinstance(text, str): 

97 continue 

98 text = text.strip() 

99 if not _MIN_MEMORY_CHARS <= len(text) <= _MAX_MEMORY_CHARS: 

100 continue 

101 memories.append(ExtractedMemory(text=text, kind=_coerce_kind(item.get("kind")))) 

102 return memories 

103 

104 

105def extract_memories(question: str, answer: str, chat: ChatFn) -> list[ExtractedMemory]: 

106 """Run the extraction pass for one turn; never raises. 

107 

108 *chat* is the provider's non-streaming chat callable. A model or transport 

109 failure logs and yields no memories so a bad extraction never disrupts the 

110 chat session. 

111 """ 

112 if not question.strip() or not answer.strip(): 

113 return [] 

114 try: 

115 raw = chat(build_extract_messages(question, answer), stream=False) 

116 except Exception: 

117 log.debug("Memory extraction call failed", exc_info=True) 

118 return [] 

119 return parse_extraction(raw)