Coverage for src/lilbee/retrieval/query/memory

1"""Auto-extraction of durable memories from a chat turn.

3A small LLM pass over the user's message and the assistant's answer that

4proposes durable facts/preferences worth remembering. The model is asked for

5a strict JSON array; parsing is defensive (a non-conforming reply yields no

6memories rather than an error). Callers store the results, which the user can

7review and remove in ``/memories``.

8"""

10from __future__ import annotations

12import json

13import logging

14import re

15from collections.abc import Callable

16from dataclasses import dataclass

18from lilbee.data.store import MemoryKind

20log = logging.getLogger(__name__)

22ChatFn = Callable[..., str]

24# Bounds mirror the prior-art auto-capture filter: too-short strings carry no

25# durable signal, too-long ones are usually the model restating the answer.

26_MIN_MEMORY_CHARS = 10

27_MAX_MEMORY_CHARS = 500

29_EXTRACT_SYSTEM_PROMPT = (

30 "You extract durable, long-term memories about the user from a single chat turn. "

31 "A memory is a stable fact about the user or their project (not a one-off question) "

32 "or a standing preference for how they want help. "

33 "Ignore transient details, the assistant's own content, and anything specific to "

34 "just this question. "

35 "Respond with ONLY a JSON array (no prose). Each element is an object "

36 '{"text": "<the memory, third person>", "kind": "fact" | "preference"}. '

37 "Return [] when nothing is worth remembering."

38)

40_EXTRACT_USER_TEMPLATE = "User said:\n{question}\n\nAssistant replied:\n{answer}"

42_JSON_ARRAY_RE = re.compile(r"\[.*\]", re.DOTALL)

45@dataclass(frozen=True)

46class ExtractedMemory:

47 """A single memory proposed by the extraction pass."""

49 text: str

50 kind: MemoryKind

53def build_extract_messages(question: str, answer: str) -> list[dict[str, str]]:

54 """Build the system+user message pair for the extraction prompt."""

55 return [

56 {"role": "system", "content": _EXTRACT_SYSTEM_PROMPT},

57 {

58 "role": "user",

59 "content": _EXTRACT_USER_TEMPLATE.format(question=question, answer=answer),

60 },

61 ]

64def _coerce_kind(value: object) -> MemoryKind:

65 """Decode a kind string, defaulting to FACT for anything unrecognized."""

66 if not isinstance(value, str):

67 return MemoryKind.FACT

68 try:

69 return MemoryKind(value)

70 except ValueError:

71 return MemoryKind.FACT

74def parse_extraction(raw: str) -> list[ExtractedMemory]:

75 """Parse the model's reply into memories; tolerate a non-conforming reply.

77 Extracts the first JSON array in *raw* (models often wrap it in prose or a

78 code fence), keeps only objects with a usable-length ``text``, and decodes

79 the kind. Any parse failure yields an empty list.

80 """

81 match = _JSON_ARRAY_RE.search(raw)

82 if match is None:

83 return []

84 try:

85 # The regex captures a bracketed span, so a successful parse is always

86 # a list (a malformed span raises and is caught below).

87 items = json.loads(match.group(0))

88 except json.JSONDecodeError:

89 return []

91 memories: list[ExtractedMemory] = []

92 for item in items:

93 if not isinstance(item, dict):

94 continue

95 text = item.get("text")

96 if not isinstance(text, str):

97 continue

98 text = text.strip()

99 if not _MIN_MEMORY_CHARS <= len(text) <= _MAX_MEMORY_CHARS:

100 continue

101 memories.append(ExtractedMemory(text=text, kind=_coerce_kind(item.get("kind"))))

102 return memories

103

104

105def extract_memories(question: str, answer: str, chat: ChatFn) -> list[ExtractedMemory]:

106 """Run the extraction pass for one turn; never raises.

107

108 *chat* is the provider's non-streaming chat callable. A model or transport

109 failure logs and yields no memories so a bad extraction never disrupts the

110 chat session.

111 """

112 if not question.strip() or not answer.strip():

113 return []

114 try:

115 raw = chat(build_extract_messages(question, answer), stream=False)

116 except Exception:

117 log.debug("Memory extraction call failed", exc_info=True)

118 return []

119 return parse_extraction(raw)

Coverage for src / lilbee / retrieval / query / memory_extract.py: 100%

56 statements