mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-12 09:12:40 +02:00
Add streaming runtime helpers with behavior-focused unit tests.
This commit is contained in:
parent
c0706364d1
commit
4e664652a8
2 changed files with 212 additions and 0 deletions
92
surfsense_backend/app/tasks/chat/streaming/runtime.py
Normal file
92
surfsense_backend/app/tasks/chat/streaming/runtime.py
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
"""Runtime setup helpers for orchestrated chat streaming."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
|
import logging
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
_PREFLIGHT_TIMEOUT_SEC: float = 2.5
|
||||||
|
_PREFLIGHT_MAX_TOKENS: int = 1
|
||||||
|
|
||||||
|
|
||||||
|
async def preflight_llm(
|
||||||
|
llm: Any,
|
||||||
|
*,
|
||||||
|
is_provider_rate_limited: Callable[[BaseException], bool],
|
||||||
|
) -> None:
|
||||||
|
"""Issue a minimal completion probe to catch immediate provider 429s."""
|
||||||
|
from litellm import acompletion
|
||||||
|
|
||||||
|
model = getattr(llm, "model", None)
|
||||||
|
if not model or model == "auto":
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
await acompletion(
|
||||||
|
model=model,
|
||||||
|
messages=[{"role": "user", "content": "ping"}],
|
||||||
|
api_key=getattr(llm, "api_key", None),
|
||||||
|
api_base=getattr(llm, "api_base", None),
|
||||||
|
max_tokens=_PREFLIGHT_MAX_TOKENS,
|
||||||
|
timeout=_PREFLIGHT_TIMEOUT_SEC,
|
||||||
|
stream=False,
|
||||||
|
metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
if is_provider_rate_limited(exc):
|
||||||
|
raise
|
||||||
|
logging.getLogger(__name__).debug(
|
||||||
|
"auto_pin_preflight non_rate_limit_error model=%s err=%s",
|
||||||
|
model,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def build_main_agent_for_thread(
|
||||||
|
agent_factory: Any,
|
||||||
|
*,
|
||||||
|
llm: Any,
|
||||||
|
search_space_id: int,
|
||||||
|
db_session: Any,
|
||||||
|
connector_service: Any,
|
||||||
|
checkpointer: Any,
|
||||||
|
user_id: str | None,
|
||||||
|
thread_id: int | None,
|
||||||
|
agent_config: Any,
|
||||||
|
firecrawl_api_key: str | None,
|
||||||
|
thread_visibility: Any,
|
||||||
|
filesystem_selection: Any,
|
||||||
|
disabled_tools: list[str] | None = None,
|
||||||
|
mentioned_document_ids: list[int] | None = None,
|
||||||
|
) -> Any:
|
||||||
|
"""Run one canonical agent-build call for a single thread."""
|
||||||
|
return await agent_factory(
|
||||||
|
llm=llm,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
db_session=db_session,
|
||||||
|
connector_service=connector_service,
|
||||||
|
checkpointer=checkpointer,
|
||||||
|
user_id=user_id,
|
||||||
|
thread_id=thread_id,
|
||||||
|
agent_config=agent_config,
|
||||||
|
firecrawl_api_key=firecrawl_api_key,
|
||||||
|
thread_visibility=thread_visibility,
|
||||||
|
filesystem_selection=filesystem_selection,
|
||||||
|
disabled_tools=disabled_tools,
|
||||||
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def settle_speculative_agent_build(task: Any) -> None:
|
||||||
|
"""Wait for a discarded speculative build and swallow its outcome."""
|
||||||
|
with contextlib.suppress(BaseException):
|
||||||
|
await task
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"build_main_agent_for_thread",
|
||||||
|
"preflight_llm",
|
||||||
|
"settle_speculative_agent_build",
|
||||||
|
]
|
||||||
|
|
@ -0,0 +1,120 @@
|
||||||
|
"""Behavior tests for streaming runtime helpers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import types
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.tasks.chat.streaming import runtime
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.unit
|
||||||
|
|
||||||
|
|
||||||
|
async def test_preflight_llm_calls_litellm_when_model_present(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
calls: dict[str, Any] = {}
|
||||||
|
|
||||||
|
async def _fake_acompletion(**kwargs: Any):
|
||||||
|
calls.update(kwargs)
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"litellm",
|
||||||
|
types.SimpleNamespace(acompletion=_fake_acompletion),
|
||||||
|
)
|
||||||
|
|
||||||
|
llm = types.SimpleNamespace(model="openai/test", api_key="k", api_base="b")
|
||||||
|
await runtime.preflight_llm(llm, is_provider_rate_limited=lambda _: False)
|
||||||
|
|
||||||
|
assert calls["model"] == "openai/test"
|
||||||
|
assert calls["max_tokens"] == 1
|
||||||
|
assert calls["timeout"] == 2.5
|
||||||
|
assert calls["stream"] is False
|
||||||
|
|
||||||
|
|
||||||
|
async def test_preflight_llm_rethrows_rate_limited(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||||
|
class _RateLimitedError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def _fake_acompletion(**kwargs: Any):
|
||||||
|
del kwargs
|
||||||
|
raise _RateLimitedError("rl")
|
||||||
|
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"litellm",
|
||||||
|
types.SimpleNamespace(acompletion=_fake_acompletion),
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(_RateLimitedError):
|
||||||
|
await runtime.preflight_llm(
|
||||||
|
types.SimpleNamespace(model="openai/test"),
|
||||||
|
is_provider_rate_limited=lambda exc: isinstance(exc, _RateLimitedError),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def test_preflight_llm_skips_probe_for_auto_model(
|
||||||
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
|
) -> None:
|
||||||
|
called = {"count": 0}
|
||||||
|
|
||||||
|
async def _fake_acompletion(**kwargs: Any):
|
||||||
|
del kwargs
|
||||||
|
called["count"] += 1
|
||||||
|
return {"ok": True}
|
||||||
|
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"litellm",
|
||||||
|
types.SimpleNamespace(acompletion=_fake_acompletion),
|
||||||
|
)
|
||||||
|
|
||||||
|
await runtime.preflight_llm(
|
||||||
|
types.SimpleNamespace(model="auto"),
|
||||||
|
is_provider_rate_limited=lambda _: False,
|
||||||
|
)
|
||||||
|
assert called["count"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
async def test_build_main_agent_for_thread_forwards_arguments() -> None:
|
||||||
|
seen: dict[str, Any] = {}
|
||||||
|
|
||||||
|
async def _factory(**kwargs: Any):
|
||||||
|
seen.update(kwargs)
|
||||||
|
return "agent"
|
||||||
|
|
||||||
|
out = await runtime.build_main_agent_for_thread(
|
||||||
|
_factory,
|
||||||
|
llm="llm",
|
||||||
|
search_space_id=1,
|
||||||
|
db_session="db",
|
||||||
|
connector_service="connector",
|
||||||
|
checkpointer="cp",
|
||||||
|
user_id="u",
|
||||||
|
thread_id=10,
|
||||||
|
agent_config="cfg",
|
||||||
|
firecrawl_api_key="key",
|
||||||
|
thread_visibility="vis",
|
||||||
|
filesystem_selection="fs",
|
||||||
|
disabled_tools=["a"],
|
||||||
|
mentioned_document_ids=[5],
|
||||||
|
)
|
||||||
|
assert out == "agent"
|
||||||
|
assert seen["thread_id"] == 10
|
||||||
|
assert seen["mentioned_document_ids"] == [5]
|
||||||
|
|
||||||
|
|
||||||
|
async def test_settle_speculative_agent_build_swallows_exceptions() -> None:
|
||||||
|
async def _boom() -> None:
|
||||||
|
raise RuntimeError("ignore")
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
task = asyncio.create_task(_boom())
|
||||||
|
await runtime.settle_speculative_agent_build(task)
|
||||||
|
assert task.done()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue