Coverage for src/lilbee/retrieval/query/history

1"""Token-budget history windowing for chat conversations."""

3from __future__ import annotations

5from collections.abc import Callable

6from typing import TYPE_CHECKING

8if TYPE_CHECKING:

9 from lilbee.retrieval.query.searcher import ChatMessage

11# Conservative char->token estimator. Matches OpenAI's "4 chars ~= 1 token"

12# rule of thumb for English; under-counts non-ASCII slightly but the

13# budget already leaves headroom for that.

14_CHARS_PER_TOKEN = 4

17def estimate_text_tokens(text: str) -> int:

18 """Cheap char/4 token estimate for a string."""

19 return max(1, len(text) // _CHARS_PER_TOKEN)

22def estimate_tokens(message: ChatMessage) -> int:

23 """Cheap char/4 token estimate for one message."""

24 return estimate_text_tokens(message["content"])

27def windowed_history(

28 messages: list[ChatMessage],

29 *,

30 max_tokens: int,

31 estimator: Callable[[ChatMessage], int] = estimate_tokens,

32) -> list[ChatMessage]:

33 """Return the suffix of *messages* whose token cost fits in *max_tokens*.

35 Drops messages from the front in pairs so the window starts at a user

36 message; never strands an orphan assistant reply with no preceding user

37 turn for the model to anchor to. The newest pair is always kept even

38 if it exceeds the budget on its own (caller decides what to do then).

39 """

40 if max_tokens <= 0 or not messages:

41 return list(messages)

42 sizes = [estimator(m) for m in messages]

43 total = sum(sizes)

44 if total <= max_tokens:

45 return list(messages)

46 start = 0

47 # ``len(messages) - 2`` keeps the newest user/assistant pair even when it

48 # exceeds the budget on its own. The caller decides what to do if the

49 # final pair is over-sized (typically: send it anyway and let llama-cpp

50 # error if it must, rather than send nothing at all).

51 while start < len(messages) - 2 and total > max_tokens:

52 # Drop the front pair (user + assistant). If the front isn't a user

53 # message (malformed input), drop one to realign.

54 drop = 2 if messages[start]["role"] == "user" else 1

55 for i in range(start, min(start + drop, len(messages))):

56 total -= sizes[i]

57 start += drop

58 return list(messages[start:])

Coverage for src / lilbee / retrieval / query / history_window.py: 100%

22 statements