diff --git a/.github/workflows/backend-tests.yml b/.github/workflows/backend-tests.yml index 6e0a6db6d..7916dcf50 100644 --- a/.github/workflows/backend-tests.yml +++ b/.github/workflows/backend-tests.yml @@ -4,6 +4,9 @@ on: pull_request: branches: [main, dev] types: [opened, synchronize, reopened, ready_for_review] + paths: + - 'surfsense_backend/**' + - '.github/workflows/backend-tests.yml' concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -21,26 +24,15 @@ jobs: - name: Checkout code uses: actions/checkout@v6 - - name: Check if backend files changed - id: backend-changes - uses: dorny/paths-filter@v3 - with: - filters: | - backend: - - 'surfsense_backend/**' - - name: Set up Python - if: steps.backend-changes.outputs.backend == 'true' - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - if: steps.backend-changes.outputs.backend == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v8.1.0 - name: Cache dependencies - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: | @@ -51,19 +43,16 @@ jobs: python-deps- - name: Cache HuggingFace models - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface key: hf-models-${{ env.EMBEDDING_MODEL }} - name: Install dependencies - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv sync - name: Run unit tests - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv run pytest -m unit @@ -93,26 +82,15 @@ jobs: - name: Checkout code uses: actions/checkout@v6 - - name: Check if backend files changed - id: backend-changes - uses: dorny/paths-filter@v3 - with: - filters: | - backend: - - 'surfsense_backend/**' - - name: Set up Python - if: steps.backend-changes.outputs.backend == 'true' - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - if: steps.backend-changes.outputs.backend == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v8.1.0 - name: Cache dependencies - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: | @@ -123,19 +101,16 @@ jobs: python-deps- - name: Cache HuggingFace models - if: steps.backend-changes.outputs.backend == 'true' uses: actions/cache@v5 with: path: ~/.cache/huggingface key: hf-models-${{ env.EMBEDDING_MODEL }} - name: Install dependencies - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend run: uv sync - name: Run integration tests - if: steps.backend-changes.outputs.backend == 'true' working-directory: surfsense_backend env: TEST_DATABASE_URL: postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense_test diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml index 0dd2e1809..27e587794 100644 --- a/.github/workflows/code-quality.yml +++ b/.github/workflows/code-quality.yml @@ -11,13 +11,13 @@ concurrency: jobs: file-quality: - name: File Quality Checks + name: File Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -27,7 +27,7 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' @@ -35,7 +35,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} @@ -74,7 +74,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -83,7 +83,7 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' @@ -91,7 +91,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-security-${{ hashFiles('.pre-commit-config.yaml') }} @@ -125,35 +125,36 @@ jobs: exit ${exit_code:-0} python-backend: - name: Python Backend Quality + name: Backend Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.12' - name: Install UV - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v8.1.0 - name: Check if backend files changed id: backend-changes - uses: dorny/paths-filter@v3 + uses: dorny/paths-filter@v4 with: filters: | backend: - 'surfsense_backend/**' + - '.github/workflows/code-quality.yml' - name: Cache dependencies if: steps.backend-changes.outputs.backend == 'true' - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: | ~/.cache/uv @@ -171,7 +172,7 @@ jobs: - name: Cache pre-commit hooks if: steps.backend-changes.outputs.backend == 'true' - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-backend-${{ hashFiles('.pre-commit-config.yaml') }} @@ -206,13 +207,13 @@ jobs: exit ${exit_code:-0} typescript-frontend: - name: TypeScript/JavaScript Quality + name: Frontend Quality runs-on: ubuntu-latest if: github.event.pull_request.draft == false steps: - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -221,24 +222,24 @@ jobs: git fetch origin ${{ github.base_ref }}:${{ github.base_ref }} 2>/dev/null || git fetch origin ${{ github.base_ref }} 2>/dev/null || true - name: Setup Node.js - uses: actions/setup-node@v4 + uses: actions/setup-node@v6 with: - node-version: '18' + node-version: '20' - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - version: latest + uses: pnpm/action-setup@v6 - name: Check if frontend files changed id: frontend-changes - uses: dorny/paths-filter@v3 + uses: dorny/paths-filter@v4 with: filters: | web: - 'surfsense_web/**' + - '.github/workflows/code-quality.yml' extension: - 'surfsense_browser_extension/**' + - '.github/workflows/code-quality.yml' - name: Install dependencies for web if: steps.frontend-changes.outputs.web == 'true' @@ -254,7 +255,7 @@ jobs: run: pip install pre-commit - name: Cache pre-commit hooks - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cache/pre-commit key: pre-commit-frontend-${{ hashFiles('.pre-commit-config.yaml') }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c02d2068b..1d814b430 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -67,7 +67,7 @@ repos: # Biome check for surfsense_web - id: biome-check-web name: biome-check-web - entry: bash -c 'cd surfsense_web && npx @biomejs/biome check --diagnostic-level=error .' + entry: bash -c 'cd surfsense_web && npx @biomejs/biome@2.4.6 check --diagnostic-level=error .' language: system files: ^surfsense_web/ pass_filenames: false diff --git a/VERSION b/VERSION index b056f4120..2678ff8d6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.24 +0.0.25 diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md index b200f7a9a..e61a0bffb 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/citations/on.md @@ -1,11 +1,42 @@ -Apply chunk citations only when the runtime injects `` / -`` blocks. +Citations reach the answer through two channels. Use whichever applies — and +never invent ids you didn't see. Citation ids are resolved by exact-match +lookup; a wrong id silently breaks the link, so when in doubt, omit. + +### Channel A — chunk blocks injected this turn +When `search_surfsense_docs` or `web_search` returns `` / +`` blocks in this turn: 1. For each factual statement taken from those chunks, add - `[citation:chunk_id]` using the exact id from ``. -2. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated). -3. Never invent or normalise ids; if unsure, omit. -4. Plain brackets only — no markdown links, no footnote numbering. -5. If no chunk-tagged documents appear this turn, do not fabricate citations. + `[citation:chunk_id]` using the **exact** id from a visible + `` tag. Copy digit-for-digit (or the URL verbatim); + do not retype from memory. +2. `` is the parent doc id, **not** a citation source — + only ids inside `` count. +3. Multiple chunks → `[citation:id1], [citation:id2]` (comma-separated, + each id copied individually). +4. Never invent, normalise, or guess at adjacent ids; if unsure, omit. +5. Plain brackets only — no markdown links, no footnote numbering. + +### Channel B — citations relayed by a `task` specialist +A `task(...)` tool message may contain `[citation:]` markers +the specialist already attached to its prose. The specialist saw the +underlying `` blocks; you didn't. So: + +1. **Preserve those markers verbatim** in your final answer — do not + reformat, renumber, drop, or wrap them in markdown links. When you + paraphrase a specialist sentence, copy the marker character-for- + character; do not regenerate the id from memory (LLMs reliably + corrupt nearby digits). +2. Keep each marker attached to the sentence the specialist attached + it to. +3. Do **not** add new `[citation:…]` markers of your own to a + specialist's prose; if a fact has no marker, the specialist + couldn't tie it to a chunk and neither can you. +4. When a specialist returns JSON, the citation markers live inside + the prose-bearing fields (e.g. a summary or excerpt). Pull them + along with the surrounding sentence when you quote. + +If neither channel surfaces citation markers this turn, do not fabricate +them. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md index 4dd511014..bcb80f0f4 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/private.md @@ -6,4 +6,10 @@ standing instructions? If yes, call `update_memory` **alongside** your normal response — don't defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). Stay within the budget shown in ``. + +Memory is heading-based markdown. New entries should be under `##` headings +such as `## Facts`, `## Preferences`, or `## Instructions`, with bullets like +`- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write +new saves in the heading-based format. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md index decd23c4d..14d9a6793 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/memory_protocol/team.md @@ -6,4 +6,12 @@ key facts? If yes, call `update_memory` **alongside** your normal response — don't defer it to a later turn. Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). Stay within the budget shown in ``. + +Team memory is heading-based markdown. New entries should be under `##` +headings such as `## Product Decisions`, `## Engineering Conventions`, +`## Project Facts`, or `## Open Questions`, with bullets like +`- YYYY-MM-DD: text`. If existing memory contains legacy `(YYYY-MM-DD) [fact]` +markers, preserve the information but write new saves in the heading-based +format. Do not create personal headings such as `## Preferences` or +`## Instructions`. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md index e7fa842b1..01169ff60 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/description.md @@ -9,7 +9,9 @@ - Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). - Args: `updated_memory` — FULL replacement markdown (merge and curate, don't only append). - - Formatting: bullets `- (YYYY-MM-DD) [marker] text` with markers `[fact]`, - `[pref]`, `[instr]` (priority when trimming: `instr > pref > fact`). - Group bullets under short `##` headings; stay under the limit shown in - ``. + - Formatting: heading-based markdown with entries under `##` headings. + Recommended headings are `## Facts`, `## Preferences`, `## Instructions`, + though clearer natural headings are allowed. New bullets should look like + `- YYYY-MM-DD: text`; stay under the limit shown in ``. + - If existing memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, + preserve the information but write the updated document in the new format. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md index 2505bdf87..9afadb02c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/private/example.md @@ -1,28 +1,28 @@ Alex, is empty. user: "I'm a space enthusiast, explain astrophage to me" -→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n") +→ update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n") (Casual durable fact; use first name, neutral heading.) user: "Remember that I prefer concise answers over detailed explanations" -→ update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n") +→ update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n") (Durable preference; merge with existing memory.) user: "I actually moved to Tokyo last month" -→ update_memory(updated_memory="...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...") +→ update_memory(updated_memory="...\n\n## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...") (Updated fact; date reflects when recorded.) user: "I'm a freelance photographer working on a nature documentary" -→ update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n") +→ update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n") user: "Always respond in bullet points" -→ update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n") +→ update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n") diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md index 13341a910..8459f9e7a 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/description.md @@ -9,8 +9,14 @@ - Skip ephemeral chat noise (one-off Q/A, greetings, session logistics). - Args: `updated_memory` — FULL replacement markdown (merge and curate, don't only append). - - Formatting: bullets `- (YYYY-MM-DD) [fact] text`. Team memory uses ONLY - the `[fact]` marker (never `[pref]` or `[instr]`). Group bullets under - short `##` headings (2-3 words each); stay under the limit shown in - ``. When trimming, prioritise: decisions/conventions > key - facts > current priorities. + - Formatting: heading-based markdown with entries under `##` headings. + Recommended headings are `## Product Decisions`, + `## Engineering Conventions`, `## Project Facts`, and `## Open Questions`. + New bullets should look like `- YYYY-MM-DD: text`; stay under the limit + shown in ``. + - If existing memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the + information but write the updated document in the new format. + - Do not create personal headings such as `## Preferences`, + `## Instructions`, `## Personal Notes`, or `## Personal Instructions`. + When trimming, prioritise: decisions/conventions > key facts > current + priorities. diff --git a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md index 8bd8fcfe4..5d06d9a0c 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md +++ b/surfsense_backend/app/agents/multi_agent_chat/main_agent/system_prompt/prompts/tools/update_memory/team/example.md @@ -1,9 +1,9 @@ user: "Let's remember that we decided to do weekly standup meetings on Mondays" -→ update_memory(updated_memory="...\n\n## Team rituals\n- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...") +→ update_memory(updated_memory="...\n\n## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...") user: "Our office is in downtown Seattle, 5th floor" -→ update_memory(updated_memory="...\n\n## Workspace\n- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...") +→ update_memory(updated_memory="...\n\n## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...") diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py index 8f51ffed7..0119752c1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/middleware.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from typing import Any, cast from deepagents.backends.protocol import BackendFactory, BackendProtocol @@ -15,8 +16,12 @@ from langchain.agents import create_agent from langchain.chat_models import init_chat_model from langgraph.types import Checkpointer +from app.utils.perf import get_perf_logger + from .task_tool import build_task_tool_with_parent_config +_perf_log = get_perf_logger() + class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): """``SubAgentMiddleware`` variant that compiles each subagent against the parent checkpointer.""" @@ -54,8 +59,11 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): def _surf_compile_subagent_graphs(self) -> list[dict[str, Any]]: """Mirror of ``SubAgentMiddleware._get_subagents`` that threads the parent checkpointer.""" specs: list[dict[str, Any]] = [] + loop_start = time.perf_counter() + timings: list[tuple[str, float, str]] = [] # (name, elapsed, source) for spec in self._subagents: + spec_start = time.perf_counter() if "runnable" in spec: compiled = cast(CompiledSubAgent, spec) specs.append( @@ -65,6 +73,9 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): "runnable": compiled["runnable"], } ) + timings.append( + (compiled["name"], time.perf_counter() - spec_start, "precompiled") + ) continue if "model" not in spec: @@ -79,20 +90,44 @@ class SurfSenseCheckpointedSubAgentMiddleware(SubAgentMiddleware): model = init_chat_model(model) middleware: list[Any] = list(spec.get("middleware", [])) + tools_count = len(spec.get("tools") or []) + mw_count = len(middleware) + compile_start = time.perf_counter() + runnable = create_agent( + model, + system_prompt=spec["system_prompt"], + tools=spec["tools"], + middleware=middleware, + name=spec["name"], + checkpointer=self._surf_checkpointer, + ) + compile_elapsed = time.perf_counter() - compile_start specs.append( { "name": spec["name"], "description": spec["description"], - "runnable": create_agent( - model, - system_prompt=spec["system_prompt"], - tools=spec["tools"], - middleware=middleware, - name=spec["name"], - checkpointer=self._surf_checkpointer, - ), + "runnable": runnable, } ) + timings.append( + ( + spec["name"], + compile_elapsed, + f"compiled tools={tools_count} mw={mw_count}", + ) + ) + + total_elapsed = time.perf_counter() - loop_start + per_subagent = ", ".join( + f"{name}={elapsed * 1000:.0f}ms[{source}]" + for name, elapsed, source in timings + ) + _perf_log.info( + "[subagent_compile] total=%.3fs count=%d details=[%s]", + total_elapsed, + len(timings), + per_subagent, + ) return specs diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py index f9b316e23..f6a9ff146 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/checkpointed_subagent_middleware/task_tool.py @@ -9,6 +9,7 @@ re-raises any new pending interrupt back to the parent. from __future__ import annotations import logging +import time from typing import Annotated, Any, NoReturn from deepagents.middleware.subagents import TASK_TOOL_DESCRIPTION @@ -19,6 +20,8 @@ from langchain_core.tools import StructuredTool from langgraph.errors import GraphInterrupt from langgraph.types import Command, Interrupt +from app.utils.perf import get_perf_logger + from .config import ( consume_surfsense_resume, drain_parent_null_resume, @@ -35,6 +38,7 @@ from .resume import ( ) logger = logging.getLogger(__name__) +_perf_log = get_perf_logger() def _reraise_stamped_subagent_interrupt( @@ -209,6 +213,7 @@ def build_task_tool_with_parent_config( ], runtime: ToolRuntime, ) -> str | Command: + atask_start = time.perf_counter() logger.info( "[hitl_route] atask ENTRY: subagent_type=%r tool_call_id=%s", subagent_type, @@ -230,8 +235,10 @@ def build_task_tool_with_parent_config( # Resume bridge — see ``task`` above. pending_id: str | None = None pending_value: Any = None + aget_state_elapsed = 0.0 aget_state = getattr(subagent, "aget_state", None) if callable(aget_state): + aget_state_start = time.perf_counter() try: snapshot = await aget_state(sub_config) pending_id, pending_value = get_first_pending_subagent_interrupt( @@ -248,32 +255,78 @@ def build_task_tool_with_parent_config( "Subagent aget_state failed; falling back to fresh ainvoke", exc_info=True, ) + finally: + aget_state_elapsed = time.perf_counter() - aget_state_start - if pending_value is not None: - resume_value = consume_surfsense_resume(runtime) - if resume_value is None: - raise RuntimeError( - f"Subagent {subagent_type!r} has a pending interrupt but no " - "surfsense_resume_value on config; resume bridge is broken." - ) - expected = hitlrequest_action_count(pending_value) - resume_value = fan_out_decisions_to_match(resume_value, expected) - # Prevent the parent's resume payload from leaking into subagent - # interrupts via langgraph's parent_scratchpad fallback. - drain_parent_null_resume(runtime) - try: - result = await subagent.ainvoke( - build_resume_command(resume_value, pending_id), - config=sub_config, - ) - except GraphInterrupt as gi: - _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) - else: - try: - result = await subagent.ainvoke(subagent_state, config=sub_config) - except GraphInterrupt as gi: - _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) - return _return_command_with_state_update(result, runtime.tool_call_id) + invoke_path = "resume" if pending_value is not None else "fresh" + ainvoke_start = time.perf_counter() + ainvoke_outcome = "ok" + try: + if pending_value is not None: + resume_value = consume_surfsense_resume(runtime) + if resume_value is None: + raise RuntimeError( + f"Subagent {subagent_type!r} has a pending interrupt but no " + "surfsense_resume_value on config; resume bridge is broken." + ) + expected = hitlrequest_action_count(pending_value) + resume_value = fan_out_decisions_to_match(resume_value, expected) + # Prevent the parent's resume payload from leaking into subagent + # interrupts via langgraph's parent_scratchpad fallback. + drain_parent_null_resume(runtime) + try: + result = await subagent.ainvoke( + build_resume_command(resume_value, pending_id), + config=sub_config, + ) + except GraphInterrupt as gi: + ainvoke_outcome = "interrupted" + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + time.perf_counter() - ainvoke_start, + time.perf_counter() - atask_start, + ) + _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) + else: + try: + result = await subagent.ainvoke(subagent_state, config=sub_config) + except GraphInterrupt as gi: + ainvoke_outcome = "interrupted" + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + time.perf_counter() - ainvoke_start, + time.perf_counter() - atask_start, + ) + _reraise_stamped_subagent_interrupt(gi, runtime.tool_call_id) + ainvoke_elapsed = time.perf_counter() - ainvoke_start + except GraphInterrupt: + raise + + merge_start = time.perf_counter() + cmd = _return_command_with_state_update(result, runtime.tool_call_id) + merge_elapsed = time.perf_counter() - merge_start + _perf_log.info( + "[hitl_route] atask EXIT subagent_type=%r path=%s outcome=%s " + "aget_state=%.3fs ainvoke=%.3fs merge=%.3fs total=%.3fs", + subagent_type, + invoke_path, + ainvoke_outcome, + aget_state_elapsed, + ainvoke_elapsed, + merge_elapsed, + time.perf_counter() - atask_start, + ) + return cmd return StructuredTool.from_function( name="task", diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py index fcdb1c61e..27cee8b37 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/main_agent/knowledge_priority.py @@ -6,6 +6,7 @@ from langchain_core.language_models import BaseChatModel from app.agents.new_chat.filesystem_selection import FilesystemMode from app.agents.new_chat.middleware import KnowledgePriorityMiddleware +from app.services.llm_service import get_planner_llm def build_knowledge_priority_mw( @@ -19,6 +20,7 @@ def build_knowledge_priority_mw( ) -> KnowledgePriorityMiddleware: return KnowledgePriorityMiddleware( llm=llm, + planner_llm=get_planner_llm(), search_space_id=search_space_id, filesystem_mode=filesystem_mode, available_connectors=available_connectors, diff --git a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py index 01c77bb84..e8a4c9899 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py +++ b/surfsense_backend/app/agents/multi_agent_chat/middleware/shared/kb_context_projection.py @@ -2,6 +2,7 @@ from __future__ import annotations +import time from typing import Any from langchain.agents.middleware import AgentMiddleware, AgentState @@ -10,6 +11,9 @@ from langgraph.runtime import Runtime from app.agents.new_chat.filesystem_state import SurfSenseFilesystemState from app.agents.new_chat.middleware.knowledge_search import _render_priority_message +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] @@ -30,17 +34,34 @@ class KbContextProjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] runtime: Runtime[Any], ) -> dict[str, Any] | None: del runtime + start = time.perf_counter() tree_text = state.get("workspace_tree_text") priority = state.get("kb_priority") if not tree_text and not priority: + _perf_log.info( + "[kb_context_projection] tree=0 priority=0 elapsed=%.3fs", + time.perf_counter() - start, + ) return None messages = list(state.get("messages") or []) insert_at = max(len(messages) - 1, 0) + tree_chars = 0 if tree_text: + tree_chars = len(tree_text) messages.insert(insert_at, SystemMessage(content=tree_text)) + priority_count = 0 if priority: + priority_count = ( + len(priority) if hasattr(priority, "__len__") else 1 + ) messages.insert(insert_at, _render_priority_message(priority)) + _perf_log.info( + "[kb_context_projection] tree_chars=%d priority_items=%d elapsed=%.3fs", + tree_chars, + priority_count, + time.perf_counter() - start, + ) return {"messages": messages} diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md index d6837ec92..e989e3ee6 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/description_readonly.md @@ -2,4 +2,4 @@ Read-only specialist for the user's workspace (documents and folders). Use to fi Pass your full question as one string. The specialist runs in isolation: it cannot see this thread, so include any path hints, filters, or constraints it needs. -The specialist returns plain prose with absolute paths. +The specialist returns plain prose with absolute paths and `[citation:]` markers when claims came from KB-indexed chunks. Preserve those markers verbatim if you forward the answer. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md index 60cafb30c..2ae21c271 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_cloud.md @@ -35,6 +35,43 @@ Map outcomes to your `status`: You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. Never report values you did not actually see. +## Chunk citations in your prose + +When `read_file` returns a KB-indexed document under `/documents/`, the response includes `` blocks. Whenever a fact in your `action_summary` or `evidence.content_excerpt` came from a specific chunk, append `[citation:]` to the sentence stating that fact, using the **exact** id from the `` tag. The caller relays these markers to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation. + +### Where chunk ids live in `read_file` output + +A KB document's XML has three numeric attributes — only **one** is a citation source: + +``` + + + 42 ← NOT a citation. Parent doc id; ignore for citations. + ... + + + ← Index hint; the same id also appears below. + + + + ← This is the citation source. + + + +``` + +### Rules + +- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory. +- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation. +- Never cite `` — that's the parent doc, not a chunk. +- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick. +- Prefer **fewer accurate citations** over many speculative ones. +- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`. +- Plain square brackets only — no markdown links, no parentheses, no footnote numbers. +- Tool results without `` (write/edit/move confirmations, `ls` / `glob` / `grep` listings, error strings) carry no chunk id and need none. +- Populate `evidence.chunk_ids` with **only** ids you actually emitted in `[citation:…]` markers — same set, same digits. + ## Examples **Example 1 — happy path write (path discovered from existing convention):** @@ -118,5 +155,6 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. +- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output. Infer before you call; map every tool outcome faithfully. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md index 8f64f2eb6..4e5465aaf 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_desktop.md @@ -35,6 +35,10 @@ Map outcomes to your `status`: You construct the structured `evidence` fields from your own knowledge of what you called and what you observed — the tools do not return them. `chunk_ids` apply only to `` hits; for local-file operations leave them `null`. Never report values you did not actually see. +## Chunk citations in your prose + +In desktop mode your filesystem tools read local files only, and local-file tool results do **not** carry `` tags. Do not emit `[citation:…]` markers in `action_summary` or `evidence.content_excerpt`, and leave `evidence.chunk_ids` `null` — the absolute path is the only reference for local-file work. + ## Examples **Example 1 — happy path write (path discovered from existing convention):** @@ -118,5 +122,6 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. +- `evidence.content_excerpt`: max ~500 characters. Surface a short excerpt or a one-sentence summary, not the full file body. The supervisor already sees the tool's raw output. Infer before you call; map every tool outcome faithfully. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md index 3abfcd8b9..c7813e71d 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_cloud.md @@ -27,3 +27,42 @@ Reply in plain prose: - Cite every claim with an absolute path under `/documents/`. - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content. - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop. + +## Chunk citations + +When the evidence for a claim came from a `read_file` response that included `` blocks (i.e. a KB-indexed document under `/documents/`), append `[citation:]` to the sentence stating that claim. The caller passes these markers through to the end user verbatim, and the UI resolves each id by exact match against the database, so a wrong id silently breaks the citation. + +### Where chunk ids live in `read_file` output + +A KB document's XML has three numeric attributes — only **one** is a citation source: + +``` + + + 42 ← NOT a citation. Parent doc id; ignore for citations. + ... + + + ← Index hint; the same id also appears below. + + + + ← This is the citation source. + + + +``` + +### Rules + +- Use the **exact** id from a `` tag whose content you actually quoted or paraphrased. Copy digit-for-digit; do **not** retype from memory. +- Before emitting `[citation:N]`, confirm the literal substring `` (or its index twin `chunk_id="N"`) appears in the tool result you are summarising this turn. If you can't see it, omit the citation. +- Never cite `` — that's the parent doc, not a chunk. +- Never invent, normalise, shorten, or guess at adjacent ids. If unsure between two candidates, omit rather than pick. +- Prefer **fewer accurate citations** over many speculative ones. One correct `[citation:128]` is more useful than a string of wrong ids. +- Multiple chunks supporting the same point → comma-separated and copied individually: `[citation:128], [citation:129]`. +- Plain square brackets only — no markdown links, no parentheses, no footnote numbers. +- If a claim came from a tool result that did **not** carry a chunk id (`ls`, `glob`, `grep` listings, error strings, or files without ``), skip the citation. +- The absolute path under `/documents/` is always required; chunk citations are additive, they do not replace the path reference. + +Example: `The Q2 roadmap lists three milestones (/documents/planning/q2-roadmap.md) [citation:128], [citation:129].` diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md index 1b3d72b64..2ea711e44 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/knowledge_base/system_prompt_readonly_desktop.md @@ -28,3 +28,7 @@ Reply in plain prose: - Cite every claim with an absolute path. - If the workspace does not contain the requested information, say so explicitly. Do not fabricate paths or content. - If the question is genuinely ambiguous after a thorough lookup, list the candidates with their paths and stop. + +## Chunk citations + +In desktop mode your filesystem tools read local files only, and local-file `read_file` responses do **not** carry `` tags. Cite each claim with the absolute local path; do not emit `[citation:…]` markers — your caller has nothing to resolve them against. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md index 32becf233..13f7b68a5 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/system_prompt.md @@ -18,6 +18,10 @@ Persist durable preferences/facts/instructions with `update_memory` while avoidi - Do not store transient chatter. - Do not store secrets unless explicitly instructed. - If memory intent is unclear, return `status=blocked` with the missing intent signal. +- Persisted memory is heading-based markdown. New saved bullets should look like + `- YYYY-MM-DD: text` under `##` headings. If existing memory has legacy + `(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write + the updated document in the heading-based format. @@ -53,4 +57,7 @@ Rules: - `status=success` -> `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` -> `next_step` must be non-null. - `status=blocked` due to missing required inputs -> `missing_fields` must be non-null. +- `evidence.memory_category` is a semantic classification for supervisor logs + only. It is not the persisted storage format and must not force inline + `[fact|preference|instruction]` markers into saved memory. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py index 23375a081..67bcc3e06 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/memory/tools/update_memory.py @@ -1,280 +1,23 @@ -"""Overwrite one markdown memory document per user or team, with size and shrink guards.""" +"""Memory update tools backed by the canonical memory service.""" from __future__ import annotations import logging -import re -from typing import Any, Literal +from typing import Any from uuid import UUID -from langchain_core.messages import HumanMessage from langchain_core.tools import tool -from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.db import SearchSpace, User +from app.services.memory import ( + MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, + MemoryScope, + save_memory, +) logger = logging.getLogger(__name__) -MEMORY_SOFT_LIMIT = 18_000 -MEMORY_HARD_LIMIT = 25_000 - -_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -_HEADING_NORMALIZE_RE = re.compile(r"\s+") - -_MARKER_RE = re.compile(r"\[(fact|pref|instr)\]") -_BULLET_FORMAT_RE = re.compile(r"^- \(\d{4}-\d{2}-\d{2}\) \[(fact|pref|instr)\] .+$") -_PERSONAL_ONLY_MARKERS = {"pref", "instr"} - - -# --------------------------------------------------------------------------- -# Diff validation -# --------------------------------------------------------------------------- - - -def _extract_headings(memory: str) -> set[str]: - """Return all ``## …`` heading texts (without the ``## `` prefix).""" - return set(_SECTION_HEADING_RE.findall(memory)) - - -def _normalize_heading(heading: str) -> str: - """Normalize heading text for robust scope checks.""" - return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()) - - -def _validate_memory_scope( - content: str, scope: Literal["user", "team"] -) -> dict[str, Any] | None: - """Reject personal-only markers ([pref], [instr]) in team memory.""" - if scope != "team": - return None - - markers = set(_MARKER_RE.findall(content)) - leaked = sorted(markers & _PERSONAL_ONLY_MARKERS) - if leaked: - tags = ", ".join(f"[{m}]" for m in leaked) - return { - "status": "error", - "message": ( - f"Team memory cannot include personal markers: {tags}. " - "Use [fact] only in team memory." - ), - } - return None - - -def _validate_bullet_format(content: str) -> list[str]: - """Return warnings for bullet lines that don't match the required format. - - Expected: ``- (YYYY-MM-DD) [fact|pref|instr] text`` - """ - warnings: list[str] = [] - for line in content.splitlines(): - stripped = line.strip() - if not stripped.startswith("- "): - continue - if not _BULLET_FORMAT_RE.match(stripped): - short = stripped[:80] + ("..." if len(stripped) > 80 else "") - warnings.append(f"Malformed bullet: {short}") - return warnings - - -def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]: - """Return a list of warning strings about suspicious changes.""" - if not old_memory: - return [] - - warnings: list[str] = [] - old_headings = _extract_headings(old_memory) - new_headings = _extract_headings(new_memory) - dropped = old_headings - new_headings - if dropped: - names = ", ".join(sorted(dropped)) - warnings.append( - f"Sections removed: {names}. " - "If unintentional, the user can restore from the settings page." - ) - - old_len = len(old_memory) - new_len = len(new_memory) - if old_len > 0 and new_len < old_len * 0.4: - warnings.append( - f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). " - "Possible data loss." - ) - return warnings - - -# --------------------------------------------------------------------------- -# Size validation & soft warning -# --------------------------------------------------------------------------- - - -def _validate_memory_size(content: str) -> dict[str, Any] | None: - """Return an error/warning dict if *content* is too large, else None.""" - length = len(content) - if length > MEMORY_HARD_LIMIT: - return { - "status": "error", - "message": ( - f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " - f"({length:,} chars). Consolidate by merging related items, " - "removing outdated entries, and shortening descriptions. " - "Then call update_memory again." - ), - } - return None - - -def _soft_warning(content: str) -> str | None: - """Return a warning string if content exceeds the soft limit.""" - length = len(content) - if length > MEMORY_SOFT_LIMIT: - return ( - f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " - "Consolidate by merging related items and removing less important " - "entries on your next update." - ) - return None - - -# --------------------------------------------------------------------------- -# Forced rewrite when memory exceeds the hard limit -# --------------------------------------------------------------------------- - -_FORCED_REWRITE_PROMPT = """\ -You are a memory curator. The following memory document exceeds the character \ -limit and must be shortened. - -RULES: -1. Rewrite the document to be under {target} characters. -2. Preserve existing ## headings. Every entry must remain under a heading. You may merge - or rename headings to consolidate, but keep names personal and descriptive. -3. Priority for keeping content: [instr] > [pref] > [fact]. -4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. -5. Every bullet MUST have format: - (YYYY-MM-DD) [fact|pref|instr] text -6. Preserve the user's first name in entries — do not replace it with "the user". -7. Output ONLY the consolidated markdown — no explanations, no wrapping. - - -{content} -""" - - -async def _forced_rewrite(content: str, llm: Any) -> str | None: - """Use a focused LLM call to compress *content* under the hard limit. - - Returns the rewritten string, or ``None`` if the call fails. - """ - try: - prompt = _FORCED_REWRITE_PROMPT.format( - target=MEMORY_HARD_LIMIT, content=content - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal"]}, - ) - text = ( - response.content - if isinstance(response.content, str) - else str(response.content) - ) - return text.strip() - except Exception: - logger.exception("Forced rewrite LLM call failed") - return None - - -# --------------------------------------------------------------------------- -# Shared save-and-respond logic -# --------------------------------------------------------------------------- - - -async def _save_memory( - *, - updated_memory: str, - old_memory: str | None, - llm: Any | None, - apply_fn, - commit_fn, - rollback_fn, - label: str, - scope: Literal["user", "team"], -) -> dict[str, Any]: - """Validate, optionally force-rewrite if over the hard limit, save, and - return a response dict. - - Parameters - ---------- - updated_memory : str - The new document the agent submitted. - old_memory : str | None - The previously persisted document (for diff checks). - llm : Any | None - LLM instance for forced rewrite (may be ``None``). - apply_fn : callable(str) -> None - Callback that sets the new memory on the ORM object. - commit_fn : coroutine - ``session.commit``. - rollback_fn : coroutine - ``session.rollback``. - label : str - Human label for log messages (e.g. "user memory", "team memory"). - """ - content = updated_memory - - # --- forced rewrite if over the hard limit --- - if len(content) > MEMORY_HARD_LIMIT and llm is not None: - rewritten = await _forced_rewrite(content, llm) - if rewritten is not None and len(rewritten) < len(content): - content = rewritten - - # --- hard-limit gate (reject if still too large after rewrite) --- - size_err = _validate_memory_size(content) - if size_err: - return size_err - - scope_err = _validate_memory_scope(content, scope) - if scope_err: - return scope_err - - # --- persist --- - try: - apply_fn(content) - await commit_fn() - except Exception as e: - logger.exception("Failed to update %s: %s", label, e) - await rollback_fn() - return {"status": "error", "message": f"Failed to update {label}: {e}"} - - # --- build response --- - resp: dict[str, Any] = { - "status": "saved", - "message": f"{label.capitalize()} updated.", - } - - if content is not updated_memory: - resp["notice"] = "Memory was automatically rewritten to fit within limits." - - diff_warnings = _validate_diff(old_memory, content) - if diff_warnings: - resp["diff_warnings"] = diff_warnings - - format_warnings = _validate_bullet_format(content) - if format_warnings: - resp["format_warnings"] = format_warnings - - warning = _soft_warning(content) - if warning: - resp["warning"] = warning - - return resp - - -# --------------------------------------------------------------------------- -# Tool factories -# --------------------------------------------------------------------------- - def create_update_memory_tool( user_id: str | UUID, @@ -287,40 +30,22 @@ def create_update_memory_tool( async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the user's personal memory document. - Your current memory is shown in in the system prompt. - When the user shares important long-term information (preferences, - facts, instructions, context), rewrite the memory document to include - the new information. Merge new facts with existing ones, update - contradictions, remove outdated entries, and keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: - result = await db_session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return {"status": "error", "message": "User not found."} - - old_memory = user.memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.USER, + target_id=uid, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="memory", - scope="user", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update user memory: %s", e) await db_session.rollback() - return { - "status": "error", - "message": f"Failed to update memory: {e}", - } + return {"status": "error", "message": f"Failed to update memory: {e}"} return update_memory @@ -334,36 +59,18 @@ def create_update_team_memory_tool( async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the team's shared memory document for this search space. - Your current team memory is shown in in the system - prompt. When the team shares important long-term information - (decisions, conventions, key facts, priorities), rewrite the memory - document to include the new information. Merge new facts with - existing ones, update contradictions, remove outdated entries, and - keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current team memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: - result = await db_session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return {"status": "error", "message": "Search space not found."} - - old_memory = space.shared_memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(space, "shared_memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="team memory", - scope="team", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update team memory: %s", e) await db_session.rollback() @@ -373,3 +80,11 @@ def create_update_team_memory_tool( } return update_memory + + +__all__ = [ + "MEMORY_HARD_LIMIT", + "MEMORY_SOFT_LIMIT", + "create_update_memory_tool", + "create_update_team_memory_tool", +] diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md index cf558db62..f1a22ddf1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/builtins/research/system_prompt.md @@ -50,4 +50,6 @@ Rules: - `status=success` -> `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` -> `next_step` must be non-null. - `status=blocked` due to missing required inputs -> `missing_fields` must be non-null. +- `evidence.findings`: max 10 entries, each a single sentence stating one distinct fact. Do not paste raw paragraphs, scraped pages, or quote blocks. +- `evidence.sources`: max 10 URLs, one per finding when applicable. List each URL once. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md index 1b7e84710..9434db7a1 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/airtable/system_prompt.md @@ -38,7 +38,7 @@ Supervisor: "List open tasks in the Project Tracker base." 2. List tables in that base → identify the Tasks table; capture its table ID. 3. Get table schema → identify the status field and the choice IDs that represent "open" states. 4. List records with a typed filter on the status field for those choice IDs. -5. Return `status=success` with the matched records in `evidence.items`. +5. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched records listed in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; one line per record; up to 10 entries, then `"...and N more"`). @@ -97,7 +97,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: base, table, field, choice, record, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (record id, primary-field value, and 1-2 most relevant fields; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, choice IDs, or required fields. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md index 5b27c18ba..a663f5b37 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/calendar/system_prompt.md @@ -29,7 +29,7 @@ You are a Google Calendar specialist for the user's connected calendar. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Calendar tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `event_id`, `title` / `summary`, `start_at`, `end_at`, and `html_link` inside `evidence` when the tool returned them. For `search_calendar_events`, place the raw `events` array inside `evidence.items`. Never invent a field the tool did not return. +Surface the tool's `event_id`, `title` / `summary`, `start_at`, `end_at`, and `html_link` inside `evidence` when the tool returned them. For `search_calendar_events`, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; one line per event; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples @@ -115,7 +115,7 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. -- For `search_calendar_events` results, populate `evidence.items` with `{ "events": [...], "total": N }`. +- For `search_calendar_events` results, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (title, date, start time; up to 10 entries, then `"...and N more"`). - For ambiguous matches across `update_calendar_event` / `delete_calendar_event`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`, where `label` should include the event title and start time for human readability). Infer before you call; map every tool outcome faithfully. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md index eaea5827b..898197f14 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/clickup/system_prompt.md @@ -36,7 +36,7 @@ Failure handling: Supervisor: "Find tasks about the homepage redesign." 1. Workspace search for "homepage redesign" → matched tasks. -2. Return `status=success` with the matched tasks in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched tasks listed in `action_summary` (task id, title, status, assignees; one line per task; up to 10 entries, then `"...and N more"`). @@ -98,7 +98,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: task, list, member, status, custom-field choice, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (task id, title, status, assignees; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, list statuses, or assignees. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md index a0ba6d87e..249f9ec8b 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/discord/system_prompt.md @@ -24,7 +24,7 @@ You are a Discord specialist for the user's connected Discord server. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Discord tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `channel_id`, `message_id`, and the listed channels/messages payload inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `channel_id`, and `message_id` inside `evidence` when the tool returned them. For `list_discord_channels` and `read_discord_messages`, set `evidence.items` to `{ "total": N }` and list the matched entries in `action_summary` (channel name or sender + timestamp + short text snippet; one line per entry; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md index d74e9bdc4..c04d69ad0 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/gmail/system_prompt.md @@ -33,7 +33,7 @@ You are a Gmail specialist for the user's connected Gmail mailbox. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Gmail tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message_id`, `thread_id`, `draft_id`, `subject`, and recipient fields inside `evidence` when the tool returned them. For `search_gmail`, place the raw `emails` array inside `evidence.items`. Never invent a field the tool did not return. +Surface the tool's `message_id`, `thread_id`, `draft_id`, `subject`, and recipient fields inside `evidence` when the tool returned them. For `search_gmail`, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; one line per email; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples @@ -114,7 +114,7 @@ Rules: - `status=success` → `next_step=null`, `missing_fields=null`. - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. -- For `search_gmail` results, populate `evidence.items` with `{ "emails": [...], "total": N }`. +- For `search_gmail` results, set `evidence.items` to `{ "total": N }` and list the matched emails in `action_summary` (sender, subject, date; up to 10 entries, then `"...and N more"`). - For ambiguous matches across `update_gmail_draft` / `trash_gmail_email` / `read_gmail_email`, populate `evidence.matched_candidates` with up to 5 options (`id` + `label`). Infer before you call; verify before you send; map every tool outcome faithfully. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md index 79c46f8a0..4dcc56454 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/jira/system_prompt.md @@ -39,7 +39,7 @@ Failure handling: Supervisor: "Find issues assigned to me with status 'In Progress'." 1. JQL search with `assignee = currentUser() AND status = "In Progress"`. -2. Return `status=success` with the matched issues in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched issues listed in `action_summary` (issue key, summary, status, assignee; one line per issue; up to 10 entries, then `"...and N more"`). @@ -116,7 +116,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: site, project, issue, user, transition, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (issue key, summary, status, assignee; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers, transitions, or required fields. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md index f7dbeb9a9..1d96a4105 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/linear/system_prompt.md @@ -32,7 +32,7 @@ Failure handling: Supervisor: "Find issues assigned to me with priority Urgent." 1. Discovery: list issues with filters `{assignee: "me", priority: 1}`. -2. Return `status=success` with the matched issues in `evidence.items`. +2. Return `status=success` with `evidence.items` set to `{ "total": N }` and the matched issues listed in `action_summary` (identifier, title, state, assignee; one line per issue; up to 10 entries, then `"...and N more"`). @@ -106,7 +106,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: issue, user, project, state, etc.). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (identifier, title, state, assignee; up to 10 entries, then `"...and N more"`). Discover before you mutate; never guess identifiers. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md index 953dbff58..0f42161b3 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/luma/system_prompt.md @@ -26,7 +26,7 @@ You are a Luma specialist for the user's connected Luma account. | `error` | `error` | Relay the tool's `message` verbatim as `next_step` (this covers Luma Plus 403s and other API errors). | | tool raises / unknown | `error` | `"Luma tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `event_id`, `name`, `start_at`, and `url` inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `event_id`, `name`, `start_at`, and `url` inside `evidence` when the tool returned them. For `list_luma_events`, set `evidence.items` to `{ "total": N }` and list the matched events in `action_summary` (event name, start date/time, location if present; one line per event; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md index c8edfc1db..3c24b19c9 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/slack/system_prompt.md @@ -37,7 +37,7 @@ Failure handling: Supervisor: "Summarize the latest discussion in #marketing." 1. Search channels for "marketing" → one strong match. Capture the channel ID. 2. Read that channel's recent message history. -3. Return `status=success` with the message list in `evidence.items`. +3. Return `status=success` with `evidence.items` set to `{ "total": N }` and the messages listed in `action_summary` (sender, timestamp, text snippet; one line per message; up to 10 entries, then `"...and N more"`). @@ -92,7 +92,7 @@ Rules: - `status=partial|blocked|error` → `next_step` must be non-null. - `status=blocked` due to missing required inputs → `missing_fields` must be non-null. - For blocked ambiguity, populate `evidence.matched_candidates` with up to 5 options (`id` + `label` — works for any kind of candidate: channel, user, message, thread). -- For discovery-only queries (lists), populate `evidence.items` with the structured list. +- For discovery-only queries (lists), set `evidence.items` to `{ "total": N }` and list the matched items in `action_summary` (channel/user, key identifier, timestamp, short snippet; up to 10 entries, then `"...and N more"`). Discover before you post; never guess channel, user, or thread targets. diff --git a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md index b79c59f90..c3a280f79 100644 --- a/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md +++ b/surfsense_backend/app/agents/multi_agent_chat/subagents/connectors/teams/system_prompt.md @@ -26,7 +26,7 @@ You are a Microsoft Teams specialist for the user's connected Teams account. | `error` | `error` | Relay the tool's `message` verbatim as `next_step`. | | tool raises / unknown | `error` | `"Teams tool failed unexpectedly. Ask the user to retry shortly."` | -Surface the tool's `message`, `team_id`, `team_name`, `channel_id`, `channel_name`, and `message_id` inside `evidence` when the tool returned them. Never invent a field the tool did not return. +Surface the tool's `message`, `team_id`, `team_name`, `channel_id`, `channel_name`, and `message_id` inside `evidence` when the tool returned them. For `list_teams_channels` and `read_teams_messages`, set `evidence.items` to `{ "total": N }` and list the matched entries in `action_summary` (team › channel, or sender + timestamp + short text snippet; one line per entry; up to 10 entries, then `"...and N more"`). Never invent a field the tool did not return. ## Examples diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py index 605c31416..f8db333ba 100644 --- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py +++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py @@ -102,6 +102,7 @@ from app.agents.new_chat.tools.registry import ( ) from app.db import ChatVisibility from app.services.connector_service import ConnectorService +from app.services.llm_service import get_planner_llm from app.utils.perf import get_perf_logger _perf_log = get_perf_logger() @@ -1077,6 +1078,7 @@ def _build_compiled_agent_blocking( else None, KnowledgePriorityMiddleware( llm=llm, + planner_llm=get_planner_llm(), search_space_id=search_space_id, filesystem_mode=filesystem_mode, available_connectors=available_connectors, diff --git a/surfsense_backend/app/agents/new_chat/memory_extraction.py b/surfsense_backend/app/agents/new_chat/memory_extraction.py deleted file mode 100644 index e31774a7c..000000000 --- a/surfsense_backend/app/agents/new_chat/memory_extraction.py +++ /dev/null @@ -1,232 +0,0 @@ -"""Background memory extraction for the SurfSense agent. - -After each agent response, if the agent did not call ``update_memory`` during -the turn, this module can run a lightweight LLM call to decide whether the -latest message contains long-term information worth persisting. -""" - -from __future__ import annotations - -import logging -from typing import Any -from uuid import UUID - -from langchain_core.messages import HumanMessage -from sqlalchemy import select - -from app.agents.new_chat.tools.update_memory import _save_memory -from app.db import SearchSpace, User, shielded_async_session -from app.utils.content_utils import extract_text_content - -logger = logging.getLogger(__name__) - -_MEMORY_EXTRACT_PROMPT = """\ -You are a memory extraction assistant. Analyze the user's message and decide \ -if it contains any long-term information worth persisting to memory. - -Worth remembering: preferences, background/identity, goals, projects, \ -instructions, tools/languages they use, decisions, expertise, workplace — \ -durable facts that will matter in future conversations. - -NOT worth remembering: greetings, one-off factual questions, session \ -logistics, ephemeral requests, follow-up clarifications with no new personal \ -info, things that only matter for the current task. - -If the message contains memorizable information, output the FULL updated \ -memory document with the new facts merged into the existing content. Follow \ -these rules: -- Every entry MUST be under a ## heading. Preserve existing headings; create new ones - freely. Keep heading names short (2-3 words) and natural. Do NOT include the user's - name in headings. -- Keep entries as single bullet points. Be descriptive but concise — include relevant - details and context rather than just a few words. -- Every bullet MUST use format: - (YYYY-MM-DD) [fact|pref|instr] text - [fact] = durable facts, [pref] = preferences, [instr] = standing instructions. -- Use the user's first name (from ) in entry text, not "the user". -- If a new fact contradicts an existing entry, update the existing entry. -- Do not duplicate information that is already present. - -If nothing is worth remembering, output exactly: NO_UPDATE - -{user_name} - - -{current_memory} - - - -{user_message} -""" - -_TEAM_MEMORY_EXTRACT_PROMPT = """\ -You are a team-memory extraction assistant. Analyze the latest message and \ -decide if it contains durable TEAM-level information worth persisting. - -Decision policy: -- Prioritize recall for durable team context, while avoiding personal-only facts. -- Do NOT require explicit consensus language. A direct team-level statement can - be stored if it is stable and broadly useful for future team chats. -- If evidence is weak or clearly tentative, output NO_UPDATE. - -Worth remembering (team-level only): -- Decisions and defaults that guide future team work -- Team conventions/standards (naming, review policy, coding norms) -- Stable org/project facts (locations, ownership, constraints) -- Long-lived architecture/process facts -- Ongoing priorities that are likely relevant beyond this turn - -NOT worth remembering: -- Personal preferences or biography of one person -- Questions, brainstorming, tentative ideas, or speculation -- One-off requests, status updates, TODOs, logistics for this session -- Information scoped only to a single ephemeral task - -If the message contains memorizable team information, output the FULL updated \ -team memory document with new facts merged into existing content. Follow rules: -- Every entry MUST be under a ## heading. Preserve existing headings; create new ones - freely. Keep heading names short (2-3 words) and natural. -- Keep entries as single bullet points. Be descriptive but concise — include relevant - details and context rather than just a few words. -- Every bullet MUST use format: - (YYYY-MM-DD) [fact] text - Team memory uses ONLY the [fact] marker. Never use [pref] or [instr]. -- If a new fact contradicts an existing entry, update the existing entry. -- Do not duplicate existing information. -- Preserve neutral team phrasing; avoid person-specific memory unless role-anchored. - -If nothing is worth remembering, output exactly: NO_UPDATE - - -{current_memory} - - - -{author} - - - -{user_message} -""" - - -async def extract_and_save_memory( - *, - user_message: str, - user_id: str | None, - llm: Any, -) -> None: - """Background task: extract memorizable info and persist it. - - Designed to be fire-and-forget — catches all exceptions internally. - """ - if not user_id: - return - - try: - uid = UUID(user_id) if isinstance(user_id, str) else user_id - - async with shielded_async_session() as session: - result = await session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return - - old_memory = user.memory_md - first_name = ( - user.display_name.strip().split()[0] - if user.display_name and user.display_name.strip() - else "The user" - ) - prompt = _MEMORY_EXTRACT_PROMPT.format( - current_memory=old_memory or "(empty)", - user_message=user_message, - user_name=first_name, - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-extraction"]}, - ) - text = extract_text_content(response.content).strip() - - if text == "NO_UPDATE" or not text: - logger.debug("Memory extraction: no update needed (user %s)", uid) - return - - save_result = await _save_memory( - updated_memory=text, - old_memory=old_memory, - llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="memory", - scope="user", - ) - logger.info( - "Background memory extraction for user %s: %s", - uid, - save_result.get("status"), - ) - except Exception: - logger.exception("Background user memory extraction failed") - - -async def extract_and_save_team_memory( - *, - user_message: str, - search_space_id: int | None, - llm: Any, - author_display_name: str | None = None, -) -> None: - """Background task: extract team-level memory and persist it. - - Runs only for shared threads. Designed to be fire-and-forget and catches - exceptions internally. - """ - if not search_space_id: - return - - try: - async with shielded_async_session() as session: - result = await session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return - - old_memory = space.shared_memory_md - prompt = _TEAM_MEMORY_EXTRACT_PROMPT.format( - current_memory=old_memory or "(empty)", - author=author_display_name or "Unknown team member", - user_message=user_message, - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "team-memory-extraction"]}, - ) - text = extract_text_content(response.content).strip() - - if text == "NO_UPDATE" or not text: - logger.debug( - "Team memory extraction: no update needed (space %s)", - search_space_id, - ) - return - - save_result = await _save_memory( - updated_memory=text, - old_memory=old_memory, - llm=llm, - apply_fn=lambda content: setattr(space, "shared_memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="team memory", - scope="team", - ) - logger.info( - "Background team memory extraction for space %s: %s", - search_space_id, - save_result.get("status"), - ) - except Exception: - logger.exception("Background team memory extraction failed") diff --git a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py index d577441dd..cc30f4897 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py +++ b/surfsense_backend/app/agents/new_chat/middleware/kb_persistence.py @@ -32,6 +32,7 @@ exact same routine when ``aafter_agent`` was skipped (e.g. client disconnect). from __future__ import annotations +import asyncio import logging from datetime import UTC, datetime from typing import Any @@ -249,11 +250,11 @@ async def _create_document( session.add(doc) await session.flush() - summary_embedding = embed_texts([content])[0] + summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0] doc.embedding = summary_embedding chunks = chunk_text(content) if chunks: - chunk_embeddings = embed_texts(chunks) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunks) session.add_all( [ Chunk(document_id=doc.id, content=text, embedding=embedding) @@ -295,13 +296,13 @@ async def _update_document( search_space_id, ) - summary_embedding = embed_texts([content])[0] + summary_embedding = (await asyncio.to_thread(embed_texts, [content]))[0] document.embedding = summary_embedding await session.execute(delete(Chunk).where(Chunk.document_id == document.id)) chunks = chunk_text(content) if chunks: - chunk_embeddings = embed_texts(chunks) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunks) session.add_all( [ Chunk(document_id=document.id, content=text, embedding=embedding) diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py index dc06f8763..77b413940 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_search.py @@ -457,7 +457,7 @@ async def search_knowledge_base( if not query: return [] - [embedding] = embed_texts([query]) + [embedding] = await asyncio.to_thread(embed_texts, [query]) doc_types = _resolve_search_types(available_connectors, available_document_types) retriever_top_k = min(top_k * 3, 30) @@ -579,6 +579,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] self, *, llm: BaseChatModel | None = None, + planner_llm: BaseChatModel | None = None, search_space_id: int, filesystem_mode: FilesystemMode = FilesystemMode.CLOUD, available_connectors: list[str] | None = None, @@ -588,6 +589,15 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] inject_system_message: bool = True, # For backwards compatibility ) -> None: self.llm = llm + # The planner LLM handles short, structured internal tasks (query + # rewriting, date extraction, recency classification). When an + # operator marks a global config ``is_planner: true`` we route + # those calls to a cheap/fast model (e.g. gpt-4o-mini, Haiku, Azure + # gpt-5.x-nano) instead of the user's chat LLM — those classification + # tasks don't need frontier-tier capability. Falls back to the chat + # LLM when no planner config is wired up so deployments without one + # keep working unchanged. + self.planner_llm = planner_llm or llm self.search_space_id = search_space_id self.filesystem_mode = filesystem_mode self.available_connectors = available_connectors @@ -598,7 +608,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] # Build the kb-planner private Runnable ONCE here so we don't pay # the ``create_agent`` compile cost (50-200ms) on every turn. # Disabled by default behind ``enable_kb_planner_runnable``; when - # off the planner falls back to the legacy ``self.llm.ainvoke`` + # off the planner falls back to the legacy ``planner_llm.ainvoke`` # path. self._planner: Runnable | None = None self._planner_compile_failed = False @@ -608,7 +618,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] Returns ``None`` when the feature flag is disabled, when the LLM is unavailable, or when ``create_agent`` raises (we fall back to the - legacy ``self.llm.ainvoke`` path in that case). Compilation happens + legacy ``planner_llm.ainvoke`` path in that case). Compilation happens lazily on first call, then memoized via ``self._planner``. The compiled agent is constructed without tools — the planner's @@ -618,7 +628,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] """ if self._planner is not None or self._planner_compile_failed: return self._planner - if self.llm is None: + if self.planner_llm is None: return None flags = get_flags() if not flags.enable_kb_planner_runnable or flags.disable_new_agent_stack: @@ -628,13 +638,13 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] try: self._planner = create_agent( - self.llm, + self.planner_llm, tools=[], middleware=[RetryAfterMiddleware(max_retries=2)], ) except Exception as exc: # pragma: no cover - defensive logger.warning( - "kb-planner Runnable compile failed; falling back to llm.ainvoke: %s", + "kb-planner Runnable compile failed; falling back to planner_llm.ainvoke: %s", exc, ) self._planner_compile_failed = True @@ -647,12 +657,12 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] messages: Sequence[BaseMessage], user_text: str, ) -> tuple[str, datetime | None, datetime | None, bool]: - if self.llm is None: + if self.planner_llm is None: return user_text, None, None, False recent_conversation = _render_recent_conversation( messages, - llm=self.llm, + llm=self.planner_llm, user_text=user_text, ) prompt = _build_kb_planner_prompt( @@ -663,8 +673,8 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] t0 = loop.time() # Prefer the compiled-once planner Runnable when enabled; otherwise - # fall back to ``self.llm.ainvoke``. The ``surfsense:internal`` tag - # is preserved on both paths so ``_stream_agent_events`` still + # fall back to ``planner_llm.ainvoke``. The ``surfsense:internal`` + # tag is preserved on both paths so ``_stream_agent_events`` still # suppresses the planner's intermediate events from the UI. planner = self._build_kb_planner_runnable() try: @@ -684,7 +694,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg] else AIMessage(content="") ) else: - response = await self.llm.ainvoke( + response = await self.planner_llm.ainvoke( [HumanMessage(content=prompt)], config={"tags": ["surfsense:internal"]}, ) diff --git a/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py b/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py index 4ed1e53f4..6bd6430d1 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py +++ b/surfsense_backend/app/agents/new_chat/middleware/knowledge_tree.py @@ -24,6 +24,7 @@ from __future__ import annotations import asyncio import logging +import time from typing import Any from langchain.agents.middleware import AgentMiddleware, AgentState @@ -41,6 +42,9 @@ from app.agents.new_chat.path_resolver import ( doc_to_virtual_path, ) from app.db import Document, shielded_async_session +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() try: from litellm import token_counter @@ -124,6 +128,7 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] if self.filesystem_mode != FilesystemMode.CLOUD: return None + start = time.perf_counter() update: dict[str, Any] = {} if not state.get("cwd"): update["cwd"] = DOCUMENTS_ROOT @@ -131,7 +136,11 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] anon_doc = state.get("kb_anon_doc") if anon_doc: tree_msg = self._render_anon_tree(anon_doc) + cache_outcome = "anon" else: + version = int(state.get("tree_version") or 0) + cache_key = (self.search_space_id, version, False) + cache_outcome = "hit" if cache_key in self._cache else "miss" tree_msg = await self._render_kb_tree(state) update["workspace_tree_text"] = tree_msg @@ -141,6 +150,14 @@ class KnowledgeTreeMiddleware(AgentMiddleware): # type: ignore[type-arg] insert_at = max(len(messages) - 1, 0) messages.insert(insert_at, SystemMessage(content=tree_msg)) update["messages"] = messages + + _perf_log.info( + "[knowledge_tree] cache=%s chars=%d elapsed=%.3fs space=%d", + cache_outcome, + len(tree_msg), + time.perf_counter() - start, + self.search_space_id, + ) return update def before_agent( # type: ignore[override] diff --git a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py index 6179adccd..9d0fd26a1 100644 --- a/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py +++ b/surfsense_backend/app/agents/new_chat/middleware/memory_injection.py @@ -8,6 +8,7 @@ Injects memory markdown into the system prompt on every turn: from __future__ import annotations import logging +import time from typing import Any from uuid import UUID @@ -17,10 +18,12 @@ from langgraph.runtime import Runtime from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT from app.db import ChatVisibility, SearchSpace, User, shielded_async_session +from app.services.memory import MEMORY_HARD_LIMIT, MEMORY_SOFT_LIMIT +from app.utils.perf import get_perf_logger logger = logging.getLogger(__name__) +_perf_log = get_perf_logger() class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] @@ -53,9 +56,13 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] if not isinstance(last_message, HumanMessage): return None + start = time.perf_counter() + db_elapsed = 0.0 memory_blocks: list[str] = [] + scope = "team" if self.visibility == ChatVisibility.SEARCH_SPACE else "user" async with shielded_async_session() as session: + db_start = time.perf_counter() if self.visibility == ChatVisibility.SEARCH_SPACE: team_memory = await self._load_team_memory(session) if team_memory: @@ -96,7 +103,15 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] f"" ) + db_elapsed = time.perf_counter() - db_start + if not memory_blocks: + _perf_log.info( + "[memory_injection] scope=%s injected=0 db=%.3fs total=%.3fs", + scope, + db_elapsed, + time.perf_counter() - start, + ) return None memory_text = "\n\n".join(memory_blocks) @@ -106,6 +121,13 @@ class MemoryInjectionMiddleware(AgentMiddleware): # type: ignore[type-arg] insert_idx = 1 if len(new_messages) > 1 else 0 new_messages.insert(insert_idx, memory_msg) + _perf_log.info( + "[memory_injection] scope=%s injected=1 chars=%d db=%.3fs total=%.3fs", + scope, + len(memory_text), + db_elapsed, + time.perf_counter() - start, + ) return {"messages": new_messages} async def _load_user_memory( diff --git a/surfsense_backend/app/agents/new_chat/prompt_caching.py b/surfsense_backend/app/agents/new_chat/prompt_caching.py index 9fe47cdac..b58a48266 100644 --- a/surfsense_backend/app/agents/new_chat/prompt_caching.py +++ b/surfsense_backend/app/agents/new_chat/prompt_caching.py @@ -39,9 +39,19 @@ For OpenAI-family configs we additionally pass: - ``prompt_cache_key=f"surfsense-thread-{thread_id}"`` — routing hint that raises hit rate by sending requests with a shared prefix to the same - backend. + backend. Supported by ``openai/``, ``deepseek/``, ``xai/``, and + ``azure/`` (added to LiteLLM's Azure transformer in + https://github.com/BerriAI/litellm/pull/20989, Feb 2026; verified + against ``AzureOpenAIConfig.get_supported_openai_params`` in our + installed litellm 1.83.14 for ``azure/gpt-4o``, ``azure/gpt-4o-mini``, + ``azure/gpt-5.4``, ``azure/gpt-5.4-mini``). - ``prompt_cache_retention="24h"`` — extends cache TTL beyond the default - 5-10 min in-memory cache. + 5-10 min in-memory cache. Set ONLY for OpenAI/DeepSeek/xAI: Azure's + server-side support landed in Microsoft's docs on 2026-05-13 but + LiteLLM 1.83.14's Azure transformer still omits it from its supported + params list, so it gets silently dropped by ``litellm.drop_params``. + Azure's default in-memory retention (5-10 min, max 1 h) already + bridges intra-conversation turns; revisit when LiteLLM bumps Azure. Safety net: ``litellm.drop_params=True`` is set globally in ``app.services.llm_service`` at module-load time. Any kwarg the destination @@ -81,13 +91,31 @@ _DEFAULT_INJECTION_POINTS: tuple[dict[str, Any], ...] = ( {"location": "message", "index": -1}, ) -# Providers (uppercase ``AgentConfig.provider`` values) that natively expose -# OpenAI-style automatic prompt caching with ``prompt_cache_key`` and -# ``prompt_cache_retention`` kwargs. Strict whitelist — many other providers -# in ``PROVIDER_MAP`` route through litellm's ``openai`` prefix without -# implementing the OpenAI prompt-cache surface (e.g. MOONSHOT, ZHIPU, -# MINIMAX), so we can't infer family from the litellm prefix alone. -_OPENAI_FAMILY_PROVIDERS: frozenset[str] = frozenset({"OPENAI", "DEEPSEEK", "XAI"}) +# Providers (uppercase ``AgentConfig.provider`` values) that accept the +# OpenAI ``prompt_cache_key`` routing hint. Microsoft's Azure OpenAI docs +# (2026-05-13) confirm automatic prompt caching applies to every GPT-4o +# or newer Azure deployment at ≥1024 tokens with no configuration needed, +# and that ``prompt_cache_key`` is combined with the prefix hash to +# improve routing affinity and therefore cache hit rate. LiteLLM's Azure +# transformer ships ``prompt_cache_key`` in its supported params as of +# https://github.com/BerriAI/litellm/pull/20989. +# +# Strict whitelist — many other providers in ``PROVIDER_MAP`` route +# through litellm's ``openai`` prefix without implementing the OpenAI +# prompt-cache surface (e.g. MOONSHOT, ZHIPU, MINIMAX), so we can't infer +# family from the litellm prefix alone. +_PROMPT_CACHE_KEY_PROVIDERS: frozenset[str] = frozenset( + {"OPENAI", "DEEPSEEK", "XAI", "AZURE", "AZURE_OPENAI"} +) + +# Subset of ``_PROMPT_CACHE_KEY_PROVIDERS`` that also accept +# ``prompt_cache_retention="24h"``. Azure is excluded: see module +# docstring — LiteLLM 1.83.14's Azure transformer omits the param so +# ``drop_params`` silently strips it. Re-add Azure once a future LiteLLM +# release wires it into ``AzureOpenAIConfig.get_supported_openai_params``. +_PROMPT_CACHE_RETENTION_PROVIDERS: frozenset[str] = frozenset( + {"OPENAI", "DEEPSEEK", "XAI"} +) def _is_router_llm(llm: BaseChatModel) -> bool: @@ -101,13 +129,13 @@ def _is_router_llm(llm: BaseChatModel) -> bool: return type(llm).__name__ == "ChatLiteLLMRouter" -def _is_openai_family_config(agent_config: AgentConfig | None) -> bool: - """Whether the config targets an OpenAI-style prompt-cache surface. +def _provider_supports_prompt_cache_key(agent_config: AgentConfig | None) -> bool: + """Whether the config targets a provider that accepts ``prompt_cache_key``. - Strict — only returns True when the user explicitly chose OPENAI, - DEEPSEEK, or XAI as the provider in their ``NewLLMConfig`` / - ``YAMLConfig``. Auto-mode and custom providers return False because - we can't statically know the destination. + Strict — only returns True for explicitly chosen OPENAI, DEEPSEEK, + XAI, AZURE, or AZURE_OPENAI providers. Auto-mode and custom + providers return False because we can't statically know the + destination and the router fans out across mixed providers. """ if agent_config is None or not agent_config.provider: return False @@ -115,7 +143,25 @@ def _is_openai_family_config(agent_config: AgentConfig | None) -> bool: return False if agent_config.custom_provider: return False - return agent_config.provider.upper() in _OPENAI_FAMILY_PROVIDERS + return agent_config.provider.upper() in _PROMPT_CACHE_KEY_PROVIDERS + + +def _provider_supports_prompt_cache_retention( + agent_config: AgentConfig | None, +) -> bool: + """Whether the config targets a provider that accepts ``prompt_cache_retention``. + + Tighter than :func:`_provider_supports_prompt_cache_key` — Azure + deployments are excluded until LiteLLM ships the param in its Azure + transformer (see module docstring). + """ + if agent_config is None or not agent_config.provider: + return False + if agent_config.is_auto_mode: + return False + if agent_config.custom_provider: + return False + return agent_config.provider.upper() in _PROMPT_CACHE_RETENTION_PROVIDERS def _get_or_init_model_kwargs(llm: BaseChatModel) -> dict[str, Any] | None: @@ -173,16 +219,23 @@ def apply_litellm_prompt_caching( dict(point) for point in _DEFAULT_INJECTION_POINTS ] - # OpenAI-family extras only when we statically know the destination is - # OpenAI / DeepSeek / xAI. Auto-mode router fans out across providers - # so we can't safely set OpenAI-only kwargs there (drop_params would - # strip them but it's wasteful to set them in the first place). + # OpenAI-style extras only when we statically know the destination + # accepts them. Auto-mode router fans out across mixed providers so + # we can't safely set destination-specific kwargs there (drop_params + # would strip them but it's wasteful to set them in the first + # place). if _is_router_llm(llm): return - if not _is_openai_family_config(agent_config): - return - if thread_id is not None and "prompt_cache_key" not in model_kwargs: + if ( + thread_id is not None + and "prompt_cache_key" not in model_kwargs + and _provider_supports_prompt_cache_key(agent_config) + ): model_kwargs["prompt_cache_key"] = f"surfsense-thread-{thread_id}" - if "prompt_cache_retention" not in model_kwargs: + + if ( + "prompt_cache_retention" not in model_kwargs + and _provider_supports_prompt_cache_retention(agent_config) + ): model_kwargs["prompt_cache_retention"] = "24h" diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md index 8f7da14f8..22fed418a 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_private.md @@ -3,4 +3,10 @@ IMPORTANT — After understanding each user message, ALWAYS check: does this mes reveal durable facts about the user (role, interests, preferences, projects, background, or standing instructions)? If yes, you MUST call update_memory alongside your normal response — do not defer this to a later turn. + +Memory is stored as a heading-based markdown document. New entries should be +under `##` headings such as `## Facts`, `## Preferences`, or `## Instructions` +with bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact|pref|instr]` markers, preserve the information but write +new saves in the heading-based format. diff --git a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md index 61d89cc5d..38ec798c0 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/base/memory_protocol_team.md @@ -3,4 +3,12 @@ IMPORTANT — After understanding each user message, ALWAYS check: does this mes reveal durable facts about the team (decisions, conventions, architecture, processes, or key facts)? If yes, you MUST call update_memory alongside your normal response — do not defer this to a later turn. + +Team memory is stored as a heading-based markdown document. New entries should +be under `##` headings such as `## Product Decisions`, +`## Engineering Conventions`, `## Project Facts`, or `## Open Questions` with +bullets like `- YYYY-MM-DD: text`. If existing memory contains legacy +`(YYYY-MM-DD) [fact]` markers, preserve the information but write new saves in +the heading-based format. Do not create personal headings such as +`## Preferences` or `## Instructions`. diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md index f83fe40b4..496bdcae3 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_private.md @@ -1,16 +1,16 @@ - Alex, is empty. User: "I'm a space enthusiast, explain astrophage to me" - - The user casually shared a durable fact. Use their first name in the entry, short neutral heading: - update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n") + - The user casually shared a durable fact: + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n") - User: "Remember that I prefer concise answers over detailed explanations" - - Durable preference. Merge with existing memory, add a new heading: - update_memory(updated_memory="## Interests & background\n- (2025-03-15) [fact] Alex is a space enthusiast\n\n## Response style\n- (2025-03-15) [pref] Alex prefers concise answers over detailed explanations\n") + - Durable preference. Merge with existing memory: + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex is a space enthusiast\n\n## Preferences\n- 2025-03-15: Alex prefers concise answers over detailed explanations\n") - User: "I actually moved to Tokyo last month" - Updated fact, date prefix reflects when recorded: - update_memory(updated_memory="## Interests & background\n...\n\n## Personal context\n- (2025-03-15) [fact] Alex lives in Tokyo (previously London)\n...") + update_memory(updated_memory="## Facts\n- 2025-03-15: Alex lives in Tokyo (previously London)\n...") - User: "I'm a freelance photographer working on a nature documentary" - Durable background info under a fitting heading: - update_memory(updated_memory="...\n\n## Current focus\n- (2025-03-15) [fact] Alex is a freelance photographer\n- (2025-03-15) [fact] Alex is working on a nature documentary\n") + update_memory(updated_memory="...\n\n## Current Focus\n- 2025-03-15: Alex is a freelance photographer\n- 2025-03-15: Alex is working on a nature documentary\n") - User: "Always respond in bullet points" - Standing instruction: - update_memory(updated_memory="...\n\n## Response style\n- (2025-03-15) [instr] Always respond to Alex in bullet points\n") + update_memory(updated_memory="...\n\n## Instructions\n- 2025-03-15: Always respond to Alex in bullet points\n") diff --git a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md index 1c74fdf6e..16b90babf 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/examples/update_memory_team.md @@ -1,7 +1,7 @@ - User: "Let's remember that we decided to do weekly standup meetings on Mondays" - Durable team decision: - update_memory(updated_memory="- (2025-03-15) [fact] Weekly standup meetings on Mondays\n...") + update_memory(updated_memory="## Product Decisions\n- 2025-03-15: Weekly standup meetings happen on Mondays\n...") - User: "Our office is in downtown Seattle, 5th floor" - Durable team fact: - update_memory(updated_memory="- (2025-03-15) [fact] Office location: downtown Seattle, 5th floor\n...") + update_memory(updated_memory="## Project Facts\n- 2025-03-15: Office location is downtown Seattle, 5th floor\n...") diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md index 184013804..65de785e9 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md +++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_private.md @@ -1,31 +1,26 @@ - update_memory: Update your personal memory document about the user. - - Your current memory is already in in your context. The `chars` and - `limit` attributes show your current usage and the maximum allowed size. - - This is your curated long-term memory — the distilled essence of what you know about - the user, not raw conversation logs. - - Call update_memory when: - * The user explicitly asks to remember or forget something - * The user shares durable facts or preferences that will matter in future conversations - - The user's first name is provided in . Use it in memory entries - instead of "the user" (e.g. "{name} works at..." not "The user works at..."). - Do not store the name itself as a separate memory entry. - - Do not store short-lived or ephemeral info: one-off questions, greetings, - session logistics, or things that only matter for the current task. + - Your current memory is already in in your context. The `chars` + and `limit` attributes show current usage and the maximum allowed size. + - This is curated long-term memory, not raw conversation logs. + - Call update_memory when the user explicitly asks to remember/forget + something or shares durable facts, preferences, or standing instructions. + - The user's first name is provided in . Use it in entries instead + of "the user" when helpful. Do not store the name alone as a memory entry. + - Do not store short-lived info: one-off questions, greetings, session + logistics, or things that only matter for the current task. - Args: - - updated_memory: The FULL updated markdown document (not a diff). - Merge new facts with existing ones, update contradictions, remove outdated entries. - Treat every update as a curation pass — consolidate, don't just append. - - Every bullet MUST use this format: - (YYYY-MM-DD) [marker] text - Markers: - [fact] — durable facts (role, background, projects, tools, expertise) - [pref] — preferences (response style, languages, formats, tools) - [instr] — standing instructions (always/never do, response rules) - - Keep it concise and well under the character limit shown in . - - Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and - natural. Do NOT include the user's name in headings. Organize by context — e.g. - who they are, what they're focused on, how they prefer things. Create, split, or - merge headings freely as the memory grows. - - Each entry MUST be a single bullet point. Be descriptive but concise — include relevant - details and context rather than just a few words. - - During consolidation, prioritize keeping: [instr] > [pref] > [fact]. + - updated_memory: The FULL updated markdown document, not a diff. Merge new + facts with existing ones, update contradictions, remove outdated entries, + and consolidate instead of only appending. + - Use heading-based Markdown: + * Every entry must be under a `##` heading. + * Recommended headings: `## Facts`, `## Preferences`, `## Instructions`. + Specific natural headings are allowed when clearer. + * New bullets should use `- YYYY-MM-DD: text`. + * Each entry should be one concise but descriptive bullet. + - If existing memory uses legacy `(YYYY-MM-DD) [fact|pref|instr]` markers, + preserve the information but write the updated document in the new + heading-based format. + - During consolidation, prioritize durable instructions and preferences before + generic facts. diff --git a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md index 7eaca8818..79d4ead3a 100644 --- a/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md +++ b/surfsense_backend/app/agents/new_chat/prompts/tools/update_memory_team.md @@ -1,26 +1,28 @@ - update_memory: Update the team's shared memory document for this search space. - - Your current team memory is already in in your context. The `chars` - and `limit` attributes show current usage and the maximum allowed size. - - This is the team's curated long-term memory — decisions, conventions, key facts. - - NEVER store personal memory in team memory (e.g. personal bio, individual - preferences, or user-only standing instructions). - - Call update_memory when: - * A team member explicitly asks to remember or forget something - * The conversation surfaces durable team decisions, conventions, or facts - that will matter in future conversations - - Do not store short-lived or ephemeral info: one-off questions, greetings, - session logistics, or things that only matter for the current task. + - Your current team memory is already in in your context. The + `chars` and `limit` attributes show current usage and the maximum allowed size. + - This is curated long-term team memory: decisions, conventions, architecture, + processes, and key shared facts. + - NEVER store personal memory in team memory: individual bios, personal + preferences, or user-only standing instructions. + - Call update_memory when a team member asks to remember/forget something, or + when the conversation surfaces durable team context that matters later. + - Do not store short-lived info: one-off questions, greetings, session + logistics, or things that only matter for the current task. - Args: - - updated_memory: The FULL updated markdown document (not a diff). - Merge new facts with existing ones, update contradictions, remove outdated entries. - Treat every update as a curation pass — consolidate, don't just append. - - Every bullet MUST use this format: - (YYYY-MM-DD) [fact] text - Team memory uses ONLY the [fact] marker. Never use [pref] or [instr] in team memory. - - Keep it concise and well under the character limit shown in . - - Every entry MUST be under a `##` heading. Keep heading names short (2-3 words) and - natural. Organize by context — e.g. what the team decided, current architecture, - active processes. Create, split, or merge headings freely as the memory grows. - - Each entry MUST be a single bullet point. Be descriptive but concise — include relevant - details and context rather than just a few words. - - During consolidation, prioritize keeping: decisions/conventions > key facts > current priorities. + - updated_memory: The FULL updated markdown document, not a diff. Merge new + facts with existing ones, update contradictions, remove outdated entries, + and consolidate instead of only appending. + - Use heading-based Markdown: + * Every entry must be under a `##` heading. + * Recommended headings: `## Product Decisions`, `## Engineering Conventions`, + `## Project Facts`, `## Open Questions`. + * New bullets should use `- YYYY-MM-DD: text`. + * Each entry should be one concise but descriptive bullet. + - If existing memory uses legacy `(YYYY-MM-DD) [fact]` markers, preserve the + information but write the updated document in the new heading-based format. + - Do not create personal headings such as `## Preferences`, `## Instructions`, + `## Personal Notes`, or `## Personal Instructions`. + - During consolidation, prioritize decisions/conventions, then key facts, then + current priorities. diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py index 64368a878..6c4cfb6be 100644 --- a/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tool.py @@ -36,8 +36,16 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.agents.new_chat.middleware.dedup_tool_calls import dedup_key_full_args from app.agents.new_chat.tools.hitl import request_approval from app.agents.new_chat.tools.mcp_client import MCPClient +from app.agents.new_chat.tools.mcp_tools_cache import ( + CachedMCPTools, + read_cached_tools, + write_cached_tools, +) from app.db import SearchSourceConnector from app.services.mcp_oauth.registry import MCP_SERVICES, get_service_by_connector_type +from app.utils.perf import get_perf_logger + +_perf_log = get_perf_logger() logger = logging.getLogger(__name__) @@ -293,15 +301,21 @@ async def _create_mcp_tool_from_definition_http( timeout: float = 60.0, ) -> str: """Execute a single MCP HTTP call with the given headers.""" + call_start = time.perf_counter() async with ( streamablehttp_client(url, headers=call_headers) as (read, write, _), ClientSession(read, write) as session, ): + init_start = time.perf_counter() await session.initialize() + init_elapsed = time.perf_counter() - init_start + + tool_start = time.perf_counter() response = await asyncio.wait_for( session.call_tool(original_tool_name, arguments=call_kwargs), timeout=timeout, ) + tool_elapsed = time.perf_counter() - tool_start result = [] for content in response.content: @@ -312,7 +326,18 @@ async def _create_mcp_tool_from_definition_http( else: result.append(str(content)) - return "\n".join(result) if result else "" + payload = "\n".join(result) if result else "" + + _perf_log.info( + "[mcp_http_call] connector=%s tool=%s init=%.3fs call=%.3fs total=%.3fs out_chars=%d", + connector_id, + original_tool_name, + init_elapsed, + tool_elapsed, + time.perf_counter() - call_start, + len(payload), + ) + return payload async def mcp_http_tool_call(**kwargs) -> str: """Execute the MCP tool call via HTTP transport.""" @@ -496,6 +521,7 @@ async def _load_http_mcp_tools( is_generic_mcp: bool = False, *, bypass_internal_hitl: bool = False, + cached_tools: CachedMCPTools | None = None, ) -> list[StructuredTool]: """Load tools from an HTTP-based MCP server. @@ -506,6 +532,8 @@ async def _load_http_mcp_tools( readonly_tools: Tool names that skip HITL approval (read-only operations). tool_name_prefix: If set, each tool name is prefixed for multi-account disambiguation (e.g. ``linear_25``). + cached_tools: If provided, skip live discovery and rebuild wrappers + from the persisted definitions. """ tools: list[StructuredTool] = [] @@ -529,15 +557,23 @@ async def _load_http_mcp_tools( allowed_set = set(allowed_tools) if allowed_tools else None - async def _discover(disc_headers: dict[str, str]) -> list[dict[str, Any]]: - """Connect, initialize, and list tools from the MCP server.""" + async def _discover( + disc_headers: dict[str, str], + ) -> tuple[dict[str, str | None], list[dict[str, Any]]]: + """Connect, initialize, and list tools — returns (serverInfo, tools).""" async with ( streamablehttp_client(url, headers=disc_headers) as (read, write, _), ClientSession(read, write) as session, ): - await session.initialize() + init_result = await session.initialize() + server_info: dict[str, str | None] = {"name": None, "version": None} + si = getattr(init_result, "serverInfo", None) + if si is not None: + server_info["name"] = getattr(si, "name", None) + server_info["version"] = getattr(si, "version", None) + response = await session.list_tools() - return [ + return server_info, [ { "name": tool.name, "description": tool.description or "", @@ -548,47 +584,65 @@ async def _load_http_mcp_tools( for tool in response.tools ] - try: - tool_definitions = await _discover(headers) - except Exception as first_err: - if not _is_auth_error(first_err) or connector_id is None: - logger.exception( - "Failed to connect to HTTP MCP server at '%s' (connector %d): %s", - url, - connector_id, - first_err, - ) - return tools - - logger.warning( - "HTTP MCP discovery for connector %d got 401 — attempting token refresh", - connector_id, - ) - fresh_headers = await _force_refresh_and_get_headers(connector_id) - if fresh_headers is None: - await _mark_connector_auth_expired(connector_id) - logger.error( - "HTTP MCP discovery for connector %d: token refresh failed, marking auth_expired", - connector_id, - ) - return tools - + if cached_tools is not None: + tool_definitions = [ + { + "name": td.name, + "description": td.description, + "input_schema": td.input_schema, + } + for td in cached_tools.tools + ] + else: try: - tool_definitions = await _discover(fresh_headers) - headers = fresh_headers - logger.info( - "HTTP MCP discovery for connector %d succeeded after 401 recovery", + server_info, tool_definitions = await _discover(headers) + except Exception as first_err: + if not _is_auth_error(first_err) or connector_id is None: + logger.exception( + "Failed to connect to HTTP MCP server at '%s' (connector %d): %s", + url, + connector_id, + first_err, + ) + return tools + + logger.warning( + "HTTP MCP discovery for connector %d got 401 — attempting token refresh", connector_id, ) - except Exception as retry_err: - logger.exception( - "HTTP MCP discovery for connector %d still failing after refresh: %s", - connector_id, - retry_err, - ) - if _is_auth_error(retry_err): + fresh_headers = await _force_refresh_and_get_headers(connector_id) + if fresh_headers is None: await _mark_connector_auth_expired(connector_id) - return tools + logger.error( + "HTTP MCP discovery for connector %d: token refresh failed, marking auth_expired", + connector_id, + ) + return tools + + try: + server_info, tool_definitions = await _discover(fresh_headers) + headers = fresh_headers + logger.info( + "HTTP MCP discovery for connector %d succeeded after 401 recovery", + connector_id, + ) + except Exception as retry_err: + logger.exception( + "HTTP MCP discovery for connector %d still failing after refresh: %s", + connector_id, + retry_err, + ) + if _is_auth_error(retry_err): + await _mark_connector_auth_expired(connector_id) + return tools + + await write_cached_tools( + connector_id, + tool_definitions, + server_name=server_info.get("name"), + server_version=server_info.get("version"), + transport=server_config.get("transport", "streamable-http"), + ) total_discovered = len(tool_definitions) @@ -792,14 +846,25 @@ async def _maybe_refresh_mcp_oauth_token( except (ValueError, TypeError): return server_config + refresh_start = time.perf_counter() try: new_access = await _refresh_connector_token(session, connector) if not new_access: + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=no_token", + connector.id, + time.perf_counter() - refresh_start, + ) return server_config logger.info( "Proactively refreshed MCP OAuth token for connector %s", connector.id ) + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=refreshed", + connector.id, + time.perf_counter() - refresh_start, + ) refreshed_config = dict(server_config) refreshed_config["headers"] = { @@ -809,6 +874,11 @@ async def _maybe_refresh_mcp_oauth_token( return refreshed_config except Exception: + _perf_log.info( + "[mcp_oauth_refresh] connector=%s elapsed=%.3fs outcome=failed", + connector.id, + time.perf_counter() - refresh_start, + ) logger.warning( "Failed to refresh MCP OAuth token for connector %s", connector.id, @@ -937,6 +1007,94 @@ def invalidate_mcp_tools_cache(search_space_id: int | None = None) -> None: _mcp_tools_cache.clear() +async def discover_single_mcp_connector(connector_id: int) -> None: + """Force live MCP discovery for one connector so its ``cached_tools`` row is fresh. + + ``_load_http_mcp_tools`` persists ``cached_tools`` as a side effect of any + live discovery; passing ``cached_tools=None`` here guarantees we go to the + network. The returned wrappers are discarded — the in-process LRU is + rebuilt lazily on the next user query. Stdio connectors are not cached and + are skipped. + """ + from app.db import async_session_maker + + started = time.perf_counter() + try: + async with async_session_maker() as session: + connector = await session.get(SearchSourceConnector, connector_id) + if connector is None: + logger.info( + "discover_single_mcp_connector: connector %d not found", + connector_id, + ) + return + + cfg = connector.config or {} + server_config = cfg.get("server_config", {}) + if not server_config or not isinstance(server_config, dict): + return + + transport = server_config.get("transport", "stdio") + if transport not in ("streamable-http", "http", "sse"): + return + + if cfg.get("mcp_oauth"): + server_config = await _maybe_refresh_mcp_oauth_token( + session, connector, cfg, server_config + ) + cfg = connector.config or {} + server_config = _inject_oauth_headers(cfg, server_config) + if server_config is None: + logger.info( + "discover_single_mcp_connector: OAuth token unavailable for connector %d", + connector_id, + ) + return + + ct = ( + connector.connector_type.value + if hasattr(connector.connector_type, "value") + else str(connector.connector_type) + ) + svc_cfg = get_service_by_connector_type(ct) + allowed_tools = svc_cfg.allowed_tools if svc_cfg else [] + readonly_tools = svc_cfg.readonly_tools if svc_cfg else frozenset() + + await asyncio.wait_for( + _load_http_mcp_tools( + connector.id, + connector.name, + server_config, + trusted_tools=cfg.get("trusted_tools", []), + allowed_tools=allowed_tools, + readonly_tools=readonly_tools, + tool_name_prefix=None, + is_generic_mcp=svc_cfg is None, + bypass_internal_hitl=True, + cached_tools=None, + ), + timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, + ) + + _perf_log.info( + "[mcp_prefetch] connector=%s elapsed=%.3fs", + connector_id, + time.perf_counter() - started, + ) + except TimeoutError: + logger.warning( + "discover_single_mcp_connector: connector %d timed out after %ds", + connector_id, + _MCP_DISCOVERY_TIMEOUT_SECONDS, + ) + except Exception: + logger.warning( + "discover_single_mcp_connector: failed for connector %d", + connector_id, + exc_info=True, + ) + + async def load_mcp_tools( session: AsyncSession, search_space_id: int, @@ -1063,6 +1221,7 @@ async def load_mcp_tools( "tool_name_prefix": tool_name_prefix, "transport": server_config.get("transport", "stdio"), "is_generic_mcp": svc_cfg is None, + "cached_tools": read_cached_tools(connector), } ) @@ -1074,9 +1233,12 @@ async def load_mcp_tools( ) async def _discover_one(task: dict[str, Any]) -> list[StructuredTool]: + discover_start = time.perf_counter() + transport = task["transport"] + cached_tools = task.get("cached_tools") try: - if task["transport"] in ("streamable-http", "http", "sse"): - return await asyncio.wait_for( + if transport in ("streamable-http", "http", "sse"): + result = await asyncio.wait_for( _load_http_mcp_tools( task["connector_id"], task["connector_name"], @@ -1087,11 +1249,12 @@ async def load_mcp_tools( tool_name_prefix=task["tool_name_prefix"], is_generic_mcp=task.get("is_generic_mcp", False), bypass_internal_hitl=bypass_internal_hitl, + cached_tools=cached_tools, ), timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) else: - return await asyncio.wait_for( + result = await asyncio.wait_for( _load_stdio_mcp_tools( task["connector_id"], task["connector_name"], @@ -1101,7 +1264,24 @@ async def load_mcp_tools( ), timeout=_MCP_DISCOVERY_TIMEOUT_SECONDS, ) + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s tools=%d elapsed=%.3fs cache=%s", + task["connector_id"], + task["connector_name"], + transport, + len(result), + time.perf_counter() - discover_start, + "hit" if cached_tools is not None else "miss", + ) + return result except TimeoutError: + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s elapsed=%.3fs outcome=timeout", + task["connector_id"], + task["connector_name"], + transport, + time.perf_counter() - discover_start, + ) logger.error( "MCP connector %d timed out after %ds during discovery", task["connector_id"], @@ -1109,6 +1289,13 @@ async def load_mcp_tools( ) return [] except Exception as e: + _perf_log.info( + "[mcp_discover] connector=%s name=%r transport=%s elapsed=%.3fs outcome=error", + task["connector_id"], + task["connector_name"], + transport, + time.perf_counter() - discover_start, + ) logger.exception( "Failed to load tools from MCP connector %d: %s", task["connector_id"], @@ -1116,7 +1303,14 @@ async def load_mcp_tools( ) return [] + gather_start = time.perf_counter() results = await asyncio.gather(*[_discover_one(t) for t in discovery_tasks]) + _perf_log.info( + "[mcp_discover] gather_wall=%.3fs connectors=%d total_tools=%d", + time.perf_counter() - gather_start, + len(discovery_tasks), + sum(len(r) for r in results), + ) tools: list[StructuredTool] = [tool for sublist in results for tool in sublist] _mcp_tools_cache[cache_key] = (now, tools) diff --git a/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py new file mode 100644 index 000000000..81027e1c4 --- /dev/null +++ b/surfsense_backend/app/agents/new_chat/tools/mcp_tools_cache.py @@ -0,0 +1,145 @@ +"""Persist MCP ``list_tools`` results in ``SearchSourceConnector.config.cached_tools``.""" + +from __future__ import annotations + +import asyncio +import logging +from datetime import UTC, datetime +from typing import Any + +from pydantic import BaseModel, Field, ValidationError +from sqlalchemy import select +from sqlalchemy.orm.attributes import flag_modified + +from app.db import SearchSourceConnector, async_session_maker + +logger = logging.getLogger(__name__) + +_pending_prefetch_tasks: set[asyncio.Task[None]] = set() + + +class CachedMCPToolDef(BaseModel): + name: str + description: str = "" + input_schema: dict[str, Any] = Field(default_factory=dict) + + +class CachedMCPTools(BaseModel): + discovered_at: datetime + server_version: str | None = None + server_name: str | None = None + transport: str | None = None + tools: list[CachedMCPToolDef] + + +def read_cached_tools(connector: SearchSourceConnector) -> CachedMCPTools | None: + """Return parsed cached tools or ``None`` if missing / corrupt (caller falls back to live discovery).""" + cfg = connector.config or {} + raw = cfg.get("cached_tools") + if not raw or not isinstance(raw, dict): + return None + + try: + return CachedMCPTools.model_validate(raw) + except ValidationError as exc: + logger.warning( + "MCP connector %d has corrupt cached_tools — falling back to live discovery: %s", + connector.id, + exc, + ) + return None + + +async def write_cached_tools( + connector_id: int, + tool_definitions: list[dict[str, Any]], + *, + server_name: str | None = None, + server_version: str | None = None, + transport: str | None = None, +) -> None: + """Best-effort persist; uses its own session so a write failure cannot poison the caller's transaction.""" + payload = CachedMCPTools( + discovered_at=datetime.now(UTC), + server_version=server_version, + server_name=server_name, + transport=transport, + tools=[CachedMCPToolDef.model_validate(td) for td in tool_definitions], + ) + + try: + async with async_session_maker() as session: + result = await session.execute( + select(SearchSourceConnector).filter( + SearchSourceConnector.id == connector_id, + ) + ) + connector = result.scalars().first() + if connector is None: + return + + cfg = dict(connector.config or {}) + cfg["cached_tools"] = payload.model_dump(mode="json") + connector.config = cfg + flag_modified(connector, "config") + await session.commit() + + logger.info( + "Persisted cached_tools for MCP connector %d (%d tools)", + connector_id, + len(payload.tools), + ) + except Exception: + logger.warning( + "Failed to persist cached_tools for MCP connector %d", + connector_id, + exc_info=True, + ) + + +def refresh_mcp_tools_cache_for_connector( + connector_id: int, + search_space_id: int, +) -> None: + """Maintain the MCP tool cache after a single-connector lifecycle event. + + Synchronously evicts the in-process LRU for the connector's search space + (LRU keys are per-space, so eviction cannot be scoped finer), then schedules + a background live discovery for this connector alone so its persisted + ``cached_tools`` row is refreshed before the next user query. + + Idempotent. Eviction is best-effort; prefetch is best-effort and only runs + when an event loop is available. Neither path raises. + """ + try: + from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + + invalidate_mcp_tools_cache(search_space_id) + except Exception: + logger.debug( + "MCP in-process cache eviction skipped for space %d", + search_space_id, + exc_info=True, + ) + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return + + task = loop.create_task(_run_connector_prefetch(connector_id)) + _pending_prefetch_tasks.add(task) + task.add_done_callback(_pending_prefetch_tasks.discard) + + +async def _run_connector_prefetch(connector_id: int) -> None: + from app.agents.new_chat.tools.mcp_tool import discover_single_mcp_connector + + try: + await discover_single_mcp_connector(connector_id) + except Exception: + logger.warning( + "MCP background prefetch failed for connector_id=%d", + connector_id, + exc_info=True, + ) diff --git a/surfsense_backend/app/agents/new_chat/tools/update_memory.py b/surfsense_backend/app/agents/new_chat/tools/update_memory.py index 062668aac..78a65201b 100644 --- a/surfsense_backend/app/agents/new_chat/tools/update_memory.py +++ b/surfsense_backend/app/agents/new_chat/tools/update_memory.py @@ -1,369 +1,53 @@ -"""Markdown-document memory tool for the SurfSense agent. - -Replaces the old row-per-fact save_memory / recall_memory tools with a single -update_memory tool that overwrites a freeform markdown TEXT column. The LLM -always sees the current memory in / tags injected -by MemoryInjectionMiddleware, so it passes the FULL updated document each time. - -Overflow handling: - - Soft limit (18K chars): a warning is returned telling the agent to - consolidate on the next update. - - Hard limit (25K chars): a forced LLM-driven rewrite compresses the document. - If it still exceeds the limit after rewriting, the save is rejected. - - Diff validation: warns when entire ``##`` sections are dropped or when the - document shrinks by more than 60%. -""" +"""Memory update tools backed by the canonical memory service.""" from __future__ import annotations import logging -import re -from typing import Any, Literal +from typing import Any from uuid import UUID -from langchain_core.messages import HumanMessage from langchain_core.tools import tool -from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession -from app.db import SearchSpace, User, async_session_maker -from app.utils.content_utils import extract_text_content +from app.db import async_session_maker +from app.services.memory import MemoryScope, save_memory logger = logging.getLogger(__name__) -MEMORY_SOFT_LIMIT = 18_000 -MEMORY_HARD_LIMIT = 25_000 - -_SECTION_HEADING_RE = re.compile(r"^##\s+(.+)$", re.MULTILINE) -_HEADING_NORMALIZE_RE = re.compile(r"\s+") - -_MARKER_RE = re.compile(r"\[(fact|pref|instr)\]") -_BULLET_FORMAT_RE = re.compile(r"^- \(\d{4}-\d{2}-\d{2}\) \[(fact|pref|instr)\] .+$") -_PERSONAL_ONLY_MARKERS = {"pref", "instr"} - - -# --------------------------------------------------------------------------- -# Diff validation -# --------------------------------------------------------------------------- - - -def _extract_headings(memory: str) -> set[str]: - """Return all ``## …`` heading texts (without the ``## `` prefix).""" - return set(_SECTION_HEADING_RE.findall(memory)) - - -def _normalize_heading(heading: str) -> str: - """Normalize heading text for robust scope checks.""" - return _HEADING_NORMALIZE_RE.sub(" ", heading.strip().lower()) - - -def _validate_memory_scope( - content: str, scope: Literal["user", "team"] -) -> dict[str, Any] | None: - """Reject personal-only markers ([pref], [instr]) in team memory.""" - if scope != "team": - return None - - markers = set(_MARKER_RE.findall(content)) - leaked = sorted(markers & _PERSONAL_ONLY_MARKERS) - if leaked: - tags = ", ".join(f"[{m}]" for m in leaked) - return { - "status": "error", - "message": ( - f"Team memory cannot include personal markers: {tags}. " - "Use [fact] only in team memory." - ), - } - return None - - -def _validate_bullet_format(content: str) -> list[str]: - """Return warnings for bullet lines that don't match the required format. - - Expected: ``- (YYYY-MM-DD) [fact|pref|instr] text`` - """ - warnings: list[str] = [] - for line in content.splitlines(): - stripped = line.strip() - if not stripped.startswith("- "): - continue - if not _BULLET_FORMAT_RE.match(stripped): - short = stripped[:80] + ("..." if len(stripped) > 80 else "") - warnings.append(f"Malformed bullet: {short}") - return warnings - - -def _validate_diff(old_memory: str | None, new_memory: str) -> list[str]: - """Return a list of warning strings about suspicious changes.""" - if not old_memory: - return [] - - warnings: list[str] = [] - old_headings = _extract_headings(old_memory) - new_headings = _extract_headings(new_memory) - dropped = old_headings - new_headings - if dropped: - names = ", ".join(sorted(dropped)) - warnings.append( - f"Sections removed: {names}. " - "If unintentional, the user can restore from the settings page." - ) - - old_len = len(old_memory) - new_len = len(new_memory) - if old_len > 0 and new_len < old_len * 0.4: - warnings.append( - f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). " - "Possible data loss." - ) - return warnings - - -# --------------------------------------------------------------------------- -# Size validation & soft warning -# --------------------------------------------------------------------------- - - -def _validate_memory_size(content: str) -> dict[str, Any] | None: - """Return an error/warning dict if *content* is too large, else None.""" - length = len(content) - if length > MEMORY_HARD_LIMIT: - return { - "status": "error", - "message": ( - f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " - f"({length:,} chars). Consolidate by merging related items, " - "removing outdated entries, and shortening descriptions. " - "Then call update_memory again." - ), - } - return None - - -def _soft_warning(content: str) -> str | None: - """Return a warning string if content exceeds the soft limit.""" - length = len(content) - if length > MEMORY_SOFT_LIMIT: - return ( - f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " - "Consolidate by merging related items and removing less important " - "entries on your next update." - ) - return None - - -# --------------------------------------------------------------------------- -# Forced rewrite when memory exceeds the hard limit -# --------------------------------------------------------------------------- - -_FORCED_REWRITE_PROMPT = """\ -You are a memory curator. The following memory document exceeds the character \ -limit and must be shortened. - -RULES: -1. Rewrite the document to be under {target} characters. -2. Preserve existing ## headings. Every entry must remain under a heading. You may merge - or rename headings to consolidate, but keep names personal and descriptive. -3. Priority for keeping content: [instr] > [pref] > [fact]. -4. Merge duplicate entries, remove outdated entries, shorten verbose descriptions. -5. Every bullet MUST have format: - (YYYY-MM-DD) [fact|pref|instr] text -6. Preserve the user's first name in entries — do not replace it with "the user". -7. Output ONLY the consolidated markdown — no explanations, no wrapping. - - -{content} -""" - - -async def _forced_rewrite(content: str, llm: Any) -> str | None: - """Use a focused LLM call to compress *content* under the hard limit. - - Returns the rewritten string, or ``None`` if the call fails. - """ - try: - prompt = _FORCED_REWRITE_PROMPT.format( - target=MEMORY_HARD_LIMIT, content=content - ) - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal"]}, - ) - text = extract_text_content(response.content).strip() - if not text: - logger.warning("Forced rewrite returned empty text; aborting rewrite") - return None - return text - except Exception: - logger.exception("Forced rewrite LLM call failed") - return None - - -# --------------------------------------------------------------------------- -# Shared save-and-respond logic -# --------------------------------------------------------------------------- - - -async def _save_memory( - *, - updated_memory: str, - old_memory: str | None, - llm: Any | None, - apply_fn, - commit_fn, - rollback_fn, - label: str, - scope: Literal["user", "team"], -) -> dict[str, Any]: - """Validate, optionally force-rewrite if over the hard limit, save, and - return a response dict. - - Parameters - ---------- - updated_memory : str - The new document the agent submitted. - old_memory : str | None - The previously persisted document (for diff checks). - llm : Any | None - LLM instance for forced rewrite (may be ``None``). - apply_fn : callable(str) -> None - Callback that sets the new memory on the ORM object. - commit_fn : coroutine - ``session.commit``. - rollback_fn : coroutine - ``session.rollback``. - label : str - Human label for log messages (e.g. "user memory", "team memory"). - """ - if not isinstance(updated_memory, str): - logger.warning( - "Refusing non-string memory payload (type=%s)", - type(updated_memory).__name__, - ) - return { - "status": "error", - "message": "Internal error: memory payload must be a string.", - } - - content = updated_memory - - # --- forced rewrite if over the hard limit --- - if len(content) > MEMORY_HARD_LIMIT and llm is not None: - rewritten = await _forced_rewrite(content, llm) - if rewritten is not None and len(rewritten) < len(content): - content = rewritten - - # --- hard-limit gate (reject if still too large after rewrite) --- - size_err = _validate_memory_size(content) - if size_err: - return size_err - - scope_err = _validate_memory_scope(content, scope) - if scope_err: - return scope_err - - # --- persist --- - try: - apply_fn(content) - await commit_fn() - except Exception as e: - logger.exception("Failed to update %s: %s", label, e) - await rollback_fn() - return {"status": "error", "message": f"Failed to update {label}: {e}"} - - # --- build response --- - resp: dict[str, Any] = { - "status": "saved", - "message": f"{label.capitalize()} updated.", - } - - if content is not updated_memory: - resp["notice"] = "Memory was automatically rewritten to fit within limits." - - diff_warnings = _validate_diff(old_memory, content) - if diff_warnings: - resp["diff_warnings"] = diff_warnings - - format_warnings = _validate_bullet_format(content) - if format_warnings: - resp["format_warnings"] = format_warnings - - warning = _soft_warning(content) - if warning: - resp["warning"] = warning - - return resp - - -# --------------------------------------------------------------------------- -# Tool factories -# --------------------------------------------------------------------------- - def create_update_memory_tool( user_id: str | UUID, db_session: AsyncSession, llm: Any | None = None, ): - """Factory function to create the user-memory update tool. + """Factory for the user-memory update tool. - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - The session's bound ``commit``/``rollback`` methods are captured at - call time, after ``async with`` has bound ``db_session`` locally. - - Args: - user_id: ID of the user whose memory document is being updated. - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - llm: Optional LLM for the forced-rewrite path. - - Returns: - Configured update_memory tool for the user-memory scope. + Uses a fresh short-lived session per call so compiled-agent caches never + retain a stale request-scoped session. """ - del db_session # per-call session — see docstring + del db_session uid = UUID(user_id) if isinstance(user_id, str) else user_id @tool async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the user's personal memory document. - Your current memory is shown in in the system prompt. - When the user shares important long-term information (preferences, - facts, instructions, context), rewrite the memory document to include - the new information. Merge new facts with existing ones, update - contradictions, remove outdated entries, and keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: async with async_session_maker() as db_session: - result = await db_session.execute(select(User).where(User.id == uid)) - user = result.scalars().first() - if not user: - return {"status": "error", "message": "User not found."} - - old_memory = user.memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.USER, + target_id=uid, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="memory", - scope="user", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update user memory: %s", e) - return { - "status": "error", - "message": f"Failed to update memory: {e}", - } + return {"status": "error", "message": f"Failed to update memory: {e}"} return update_memory @@ -373,64 +57,26 @@ def create_update_team_memory_tool( db_session: AsyncSession, llm: Any | None = None, ): - """Factory function to create the team-memory update tool. - - The tool acquires its own short-lived ``AsyncSession`` per call via - :data:`async_session_maker` so the closure is safe to share across - HTTP requests by the compiled-agent cache. Capturing a per-request - session here would surface stale/closed sessions on cache hits. - The session's bound ``commit``/``rollback`` methods are captured at - call time, after ``async with`` has bound ``db_session`` locally. - - Args: - search_space_id: ID of the search space whose team memory is being - updated. - db_session: Reserved for registry compatibility. Per-call sessions - are opened via :data:`async_session_maker` inside the tool body. - llm: Optional LLM for the forced-rewrite path. - - Returns: - Configured update_memory tool for the team-memory scope. - """ - del db_session # per-call session — see docstring + """Factory for the team-memory update tool.""" + del db_session @tool async def update_memory(updated_memory: str) -> dict[str, Any]: """Update the team's shared memory document for this search space. - Your current team memory is shown in in the system - prompt. When the team shares important long-term information - (decisions, conventions, key facts, priorities), rewrite the memory - document to include the new information. Merge new facts with - existing ones, update contradictions, remove outdated entries, and - keep it concise. - - Args: - updated_memory: The FULL updated markdown document (not a diff). + The current team memory is shown in . Pass the FULL updated + markdown document, not a diff. """ try: async with async_session_maker() as db_session: - result = await db_session.execute( - select(SearchSpace).where(SearchSpace.id == search_space_id) - ) - space = result.scalars().first() - if not space: - return {"status": "error", "message": "Search space not found."} - - old_memory = space.shared_memory_md - - return await _save_memory( - updated_memory=updated_memory, - old_memory=old_memory, + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=updated_memory, + session=db_session, llm=llm, - apply_fn=lambda content: setattr( - space, "shared_memory_md", content - ), - commit_fn=db_session.commit, - rollback_fn=db_session.rollback, - label="team memory", - scope="team", ) + return result.to_dict() except Exception as e: logger.exception("Failed to update team memory: %s", e) return { @@ -439,3 +85,9 @@ def create_update_team_memory_tool( } return update_memory + + +__all__ = [ + "create_update_memory_tool", + "create_update_team_memory_tool", +] diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py index 448818e88..5643c048b 100644 --- a/surfsense_backend/app/config/__init__.py +++ b/surfsense_backend/app/config/__init__.py @@ -110,6 +110,19 @@ def load_global_llm_configs(): except Exception as e: print(f"Warning: Failed to score global LLM configs: {e}") + # Planner LLM is a singleton role. If an operator accidentally + # marks multiple configs ``is_planner: true``, only the first one + # is used at runtime — surface the others at startup so the + # mistake is caught before traffic, not silently buried. + planner_cfgs = [c for c in configs if c.get("is_planner") is True] + if len(planner_cfgs) > 1: + extra_ids = [c.get("id") for c in planner_cfgs[1:]] + print( + "Warning: Multiple global LLM configs marked is_planner=true " + f"(ids {[c.get('id') for c in planner_cfgs]}); using id " + f"{planner_cfgs[0].get('id')} and ignoring {extra_ids}" + ) + return configs except Exception as e: print(f"Warning: Failed to load global LLM configs: {e}") diff --git a/surfsense_backend/app/config/global_llm_config.example.yaml b/surfsense_backend/app/config/global_llm_config.example.yaml index d92640c8d..83d556754 100644 --- a/surfsense_backend/app/config/global_llm_config.example.yaml +++ b/surfsense_backend/app/config/global_llm_config.example.yaml @@ -258,6 +258,45 @@ global_llm_configs: use_default_system_instructions: true citations_enabled: true + # Example: Planner LLM - small, fast model used for internal utility tasks + # + # The PLANNER role handles short, structured internal calls (KB query + # rewriting, date extraction, recency classification, etc.) that don't + # need frontier-tier capability. Pointing the planner at a cheap+fast + # model (gpt-4o-mini, Claude Haiku, Azure gpt-5.x-nano, Groq Llama, ...) + # typically saves 500ms-1.5s per turn vs. routing those same internal + # calls through the user's chat model. + # + # Activation: + # - Mark EXACTLY ONE global config with ``is_planner: true``. + # - If multiple are marked, the first one wins and a WARNING is logged. + # - If none is marked, every internal call falls back to the user's + # chat LLM (same behavior as before this flag existed). + # + # This config is operator-only — it is NOT exposed in the user-facing + # model selector, never billed against premium quota, and the + # billing_tier / anonymous_enabled fields below are ignored. + - id: -9 + name: "Global Planner (GPT-4o mini)" + description: "Internal-only planner LLM for query rewriting and classification" + is_planner: true + billing_tier: "free" + anonymous_enabled: false + seo_enabled: false + quota_reserve_tokens: 1000 + provider: "OPENAI" + model_name: "gpt-4o-mini" + api_key: "sk-your-openai-api-key-here" + api_base: "" + rpm: 3500 + tpm: 200000 + litellm_params: + temperature: 0 + max_tokens: 1000 + system_instructions: "" + use_default_system_instructions: true + citations_enabled: false + # ============================================================================= # OpenRouter Integration # ============================================================================= @@ -493,6 +532,20 @@ global_vision_llm_configs: # - Lower temperature (0.3) is recommended for accurate screenshot analysis # - Lower max_tokens (1000) is sufficient since autocomplete produces short suggestions # +# PLANNER LLM NOTES: +# - is_planner: true marks a config as the internal-only planner LLM (small, +# fast model used for KB query rewriting, date extraction, recency +# classification, etc.). Only one config may carry this flag — if +# multiple do, the first one wins and a startup WARNING is logged. +# - When no config is marked is_planner, every internal utility call falls +# back to the user's chat LLM (the historical behavior). +# - Planner configs are NOT shown in the user-facing model selector and +# are NOT billed against the user's premium quota. Their billing_tier, +# anonymous_enabled, seo_* fields are ignored. +# - Recommended models: gpt-4o-mini, claude-3-5-haiku, gemini-1.5-flash, +# azure gpt-5.x-nano, groq llama3-8b — anything <200ms p50 on a 1-2k +# prompt. Frontier models here defeat the purpose of the flag. +# # TOKEN QUOTA & ANONYMOUS ACCESS NOTES: # - billing_tier: "free" or "premium". Controls whether registered users need premium token quota. # - anonymous_enabled: true/false. Whether the model appears in the public no-login catalog. diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index 5b6a74376..ec4d1650f 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -54,6 +54,7 @@ from .search_spaces_routes import router as search_spaces_router from .slack_add_connector_route import router as slack_add_connector_router from .stripe_routes import router as stripe_router from .surfsense_docs_routes import router as surfsense_docs_router +from .team_memory_routes import router as team_memory_router from .teams_add_connector_route import router as teams_add_connector_router from .video_presentations_routes import router as video_presentations_router from .vision_llm_routes import router as vision_llm_router @@ -117,3 +118,4 @@ router.include_router(stripe_router) # Stripe checkout for additional page pack router.include_router(youtube_router) # YouTube playlist resolution router.include_router(prompts_router) router.include_router(memory_router) # User personal memory (memory.md style) +router.include_router(team_memory_router) # Search-space team memory diff --git a/surfsense_backend/app/routes/mcp_oauth_route.py b/surfsense_backend/app/routes/mcp_oauth_route.py index 1abc1f1ec..57248d631 100644 --- a/surfsense_backend/app/routes/mcp_oauth_route.py +++ b/surfsense_backend/app/routes/mcp_oauth_route.py @@ -428,7 +428,7 @@ async def mcp_oauth_callback( await session.commit() await session.refresh(db_connector) - _invalidate_cache(space_id) + _refresh_mcp_cache(db_connector.id, space_id) logger.info( "Re-authenticated %s MCP connector %s for user %s", @@ -481,7 +481,7 @@ async def mcp_oauth_callback( detail="A connector for this service already exists.", ) from e - _invalidate_cache(space_id) + _refresh_mcp_cache(new_connector.id, space_id) logger.info( "Created %s MCP connector %s for user %s in space %s", @@ -658,10 +658,17 @@ async def reauth_mcp_service( # --------------------------------------------------------------------------- -def _invalidate_cache(space_id: int) -> None: - try: - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache +def _refresh_mcp_cache(connector_id: int, space_id: int) -> None: + """Evict the in-process MCP tool LRU and schedule background prefetch. - invalidate_mcp_tools_cache(space_id) + Wraps :func:`refresh_mcp_tools_cache_for_connector` so any failure is + isolated from the OAuth response flow. + """ + try: + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) + + refresh_mcp_tools_cache_for_connector(connector_id, space_id) except Exception: - logger.debug("MCP cache invalidation skipped", exc_info=True) + logger.debug("MCP cache refresh skipped", exc_info=True) diff --git a/surfsense_backend/app/routes/memory_routes.py b/surfsense_backend/app/routes/memory_routes.py index e57ca4055..8e73a277c 100644 --- a/surfsense_backend/app/routes/memory_routes.py +++ b/surfsense_backend/app/routes/memory_routes.py @@ -1,75 +1,40 @@ -"""Routes for user memory management (personal memory.md).""" +"""Routes for user memory management.""" from __future__ import annotations -import logging - from fastapi import APIRouter, Depends, HTTPException -from langchain_core.messages import HumanMessage from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncSession -from app.agents.new_chat.llm_config import ( - create_chat_litellm_from_agent_config, - load_agent_llm_config_for_search_space, -) -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.db import User, get_async_session +from app.services.memory import ( + MemoryRead, + MemoryScope, + memory_limits, + read_memory, + reset_memory, + save_memory, +) from app.users import current_active_user -from app.utils.content_utils import extract_text_content - -logger = logging.getLogger(__name__) router = APIRouter() -class MemoryRead(BaseModel): - memory_md: str - - class MemoryUpdate(BaseModel): memory_md: str -class MemoryEditRequest(BaseModel): - query: str - search_space_id: int - - -_MEMORY_EDIT_PROMPT = """\ -You are a memory editor. The user wants to modify their memory document. \ -Apply the user's instruction to the existing memory document and output the \ -FULL updated document. - -RULES: -1. If the instruction asks to add something, add it with format: \ -- (YYYY-MM-DD) [fact|pref|instr] text, under an existing or new ## heading. \ -Heading names should be personal and descriptive, not generic categories. -2. If the instruction asks to remove something, remove the matching entry. -3. If the instruction asks to change something, update the matching entry. -4. Preserve existing ## headings and all other entries. -5. Every bullet must include a marker: [fact], [pref], or [instr]. -6. Use the user's first name (from ) in entries instead of "the user". -7. Output ONLY the updated markdown — no explanations, no wrapping. - -{user_name} - - -{current_memory} - - - -{instruction} -""" - - @router.get("/users/me/memory", response_model=MemoryRead) async def get_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + memory_md = await read_memory( + scope=MemoryScope.USER, + target_id=user.id, + session=session, + ) + return MemoryRead(memory_md=memory_md, limits=memory_limits()) @router.put("/users/me/memory", response_model=MemoryRead) @@ -78,73 +43,27 @@ async def update_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - if len(body.memory_md) > MEMORY_HARD_LIMIT: - raise HTTPException( - status_code=400, - detail=f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit ({len(body.memory_md):,} chars).", - ) - user.memory_md = body.memory_md - session.add(user) - await session.commit() - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + result = await save_memory( + scope=MemoryScope.USER, + target_id=user.id, + content=body.memory_md, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) -@router.post("/users/me/memory/edit", response_model=MemoryRead) -async def edit_user_memory( - body: MemoryEditRequest, +@router.post("/users/me/memory/reset", response_model=MemoryRead) +async def reset_user_memory( user: User = Depends(current_active_user), session: AsyncSession = Depends(get_async_session), ): - """Apply a natural language edit to the user's personal memory via LLM.""" - agent_config = await load_agent_llm_config_for_search_space( - session, body.search_space_id + result = await reset_memory( + scope=MemoryScope.USER, + target_id=user.id, + session=session, ) - if not agent_config: - raise HTTPException(status_code=500, detail="No LLM configuration available.") - llm = create_chat_litellm_from_agent_config(agent_config) - if not llm: - raise HTTPException(status_code=500, detail="Failed to create LLM instance.") - - await session.refresh(user, ["memory_md", "display_name"]) - current_memory = user.memory_md or "" - first_name = ( - user.display_name.strip().split()[0] - if user.display_name and user.display_name.strip() - else "The user" - ) - - prompt = _MEMORY_EDIT_PROMPT.format( - current_memory=current_memory or "(empty)", - instruction=body.query, - user_name=first_name, - ) - try: - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-edit"]}, - ) - updated = extract_text_content(response.content).strip() - except Exception as e: - logger.exception("Memory edit LLM call failed: %s", e) - raise HTTPException(status_code=500, detail="Memory edit failed.") from e - - if not updated: - raise HTTPException(status_code=400, detail="LLM returned empty result.") - - result = await _save_memory( - updated_memory=updated, - old_memory=current_memory, - llm=llm, - apply_fn=lambda content: setattr(user, "memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="memory", - scope="user", - ) - - if result.get("status") == "error": - raise HTTPException(status_code=400, detail=result["message"]) - - await session.refresh(user, ["memory_md"]) - return MemoryRead(memory_md=user.memory_md or "") + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index e9ffb7050..1338fe16b 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -2650,9 +2650,11 @@ async def create_mcp_connector( f"for user {user.id} in search space {search_space_id}" ) - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) - invalidate_mcp_tools_cache(search_space_id) + refresh_mcp_tools_cache_for_connector(db_connector.id, search_space_id) connector_read = SearchSourceConnectorRead.model_validate(db_connector) return MCPConnectorRead.from_connector(connector_read) @@ -2828,9 +2830,11 @@ async def update_mcp_connector( logger.info(f"Updated MCP connector {connector_id}") - from app.agents.new_chat.tools.mcp_tool import invalidate_mcp_tools_cache + from app.agents.new_chat.tools.mcp_tools_cache import ( + refresh_mcp_tools_cache_for_connector, + ) - invalidate_mcp_tools_cache(connector.search_space_id) + refresh_mcp_tools_cache_for_connector(connector.id, connector.search_space_id) connector_read = SearchSourceConnectorRead.model_validate(connector) return MCPConnectorRead.from_connector(connector_read) diff --git a/surfsense_backend/app/routes/search_spaces_routes.py b/surfsense_backend/app/routes/search_spaces_routes.py index 0f0e43035..db230b0f5 100644 --- a/surfsense_backend/app/routes/search_spaces_routes.py +++ b/surfsense_backend/app/routes/search_spaces_routes.py @@ -1,17 +1,10 @@ import logging from fastapi import APIRouter, Depends, HTTPException -from langchain_core.messages import HumanMessage -from pydantic import BaseModel as PydanticBaseModel from sqlalchemy import func, update from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select -from app.agents.new_chat.llm_config import ( - create_chat_litellm_from_agent_config, - load_agent_llm_config_for_search_space, -) -from app.agents.new_chat.tools.update_memory import MEMORY_HARD_LIMIT, _save_memory from app.config import config from app.db import ( ImageGenerationConfig, @@ -35,7 +28,6 @@ from app.schemas import ( SearchSpaceWithStats, ) from app.users import current_active_user -from app.utils.content_utils import extract_text_content from app.utils.rbac import check_permission, check_search_space_access logger = logging.getLogger(__name__) @@ -43,34 +35,6 @@ logger = logging.getLogger(__name__) router = APIRouter() -class _TeamMemoryEditRequest(PydanticBaseModel): - query: str - - -_TEAM_MEMORY_EDIT_PROMPT = """\ -You are a memory editor for a team workspace. The user wants to modify the \ -team's shared memory document. Apply the user's instruction to the existing \ -memory document and output the FULL updated document. - -RULES: -1. If the instruction asks to add something, add it with format: \ -- (YYYY-MM-DD) [fact] text, under an existing or new ## heading. \ -Heading names should be descriptive, not generic categories. -2. If the instruction asks to remove something, remove the matching entry. -3. If the instruction asks to change something, update the matching entry. -4. Preserve existing ## headings and all other entries. -5. NEVER use [pref] or [instr] markers. Team memory uses [fact] only. -6. Output ONLY the updated markdown — no explanations, no wrapping. - - -{current_memory} - - - -{instruction} -""" - - async def create_default_roles_and_membership( session: AsyncSession, search_space_id: int, @@ -294,15 +258,6 @@ async def update_search_space( update_data = search_space_update.model_dump(exclude_unset=True) - if ( - "shared_memory_md" in update_data - and len(update_data["shared_memory_md"] or "") > MEMORY_HARD_LIMIT - ): - raise HTTPException( - status_code=400, - detail=f"Team memory exceeds {MEMORY_HARD_LIMIT:,} character limit.", - ) - for key, value in update_data.items(): setattr(db_search_space, key, value) await session.commit() @@ -317,72 +272,6 @@ async def update_search_space( ) from e -@router.post( - "/searchspaces/{search_space_id}/memory/edit", - response_model=SearchSpaceRead, -) -async def edit_team_memory( - search_space_id: int, - body: _TeamMemoryEditRequest, - session: AsyncSession = Depends(get_async_session), - user: User = Depends(current_active_user), -): - """Apply a natural language edit to the team memory via LLM.""" - await check_search_space_access(session, user, search_space_id) - - agent_config = await load_agent_llm_config_for_search_space( - session, search_space_id - ) - if not agent_config: - raise HTTPException(status_code=500, detail="No LLM configuration available.") - llm = create_chat_litellm_from_agent_config(agent_config) - if not llm: - raise HTTPException(status_code=500, detail="Failed to create LLM instance.") - - result = await session.execute( - select(SearchSpace).filter(SearchSpace.id == search_space_id) - ) - db_search_space = result.scalars().first() - if not db_search_space: - raise HTTPException(status_code=404, detail="Search space not found") - - current_memory = db_search_space.shared_memory_md or "" - - prompt = _TEAM_MEMORY_EDIT_PROMPT.format( - current_memory=current_memory or "(empty)", - instruction=body.query, - ) - try: - response = await llm.ainvoke( - [HumanMessage(content=prompt)], - config={"tags": ["surfsense:internal", "memory-edit"]}, - ) - updated = extract_text_content(response.content).strip() - except Exception as e: - logger.exception("Team memory edit LLM call failed: %s", e) - raise HTTPException(status_code=500, detail="Team memory edit failed.") from e - - if not updated: - raise HTTPException(status_code=400, detail="LLM returned empty result.") - - save_result = await _save_memory( - updated_memory=updated, - old_memory=current_memory, - llm=llm, - apply_fn=lambda content: setattr(db_search_space, "shared_memory_md", content), - commit_fn=session.commit, - rollback_fn=session.rollback, - label="team memory", - scope="team", - ) - - if save_result.get("status") == "error": - raise HTTPException(status_code=400, detail=save_result["message"]) - - await session.refresh(db_search_space) - return db_search_space - - @router.post("/searchspaces/{search_space_id}/ai-sort") async def trigger_ai_sort( search_space_id: int, diff --git a/surfsense_backend/app/routes/team_memory_routes.py b/surfsense_backend/app/routes/team_memory_routes.py new file mode 100644 index 000000000..b37a99b03 --- /dev/null +++ b/surfsense_backend/app/routes/team_memory_routes.py @@ -0,0 +1,76 @@ +"""Routes for search-space team memory.""" + +from __future__ import annotations + +from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import User, get_async_session +from app.services.memory import ( + MemoryRead, + MemoryScope, + memory_limits, + read_memory, + reset_memory, + save_memory, +) +from app.users import current_active_user +from app.utils.rbac import check_search_space_access + +router = APIRouter() + + +class TeamMemoryUpdate(BaseModel): + memory_md: str + + +@router.get("/searchspaces/{search_space_id}/memory", response_model=MemoryRead) +async def get_team_memory( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + memory_md = await read_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + session=session, + ) + return MemoryRead(memory_md=memory_md, limits=memory_limits()) + + +@router.put("/searchspaces/{search_space_id}/memory", response_model=MemoryRead) +async def update_team_memory( + search_space_id: int, + body: TeamMemoryUpdate, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + content=body.memory_md, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) + + +@router.post("/searchspaces/{search_space_id}/memory/reset", response_model=MemoryRead) +async def reset_team_memory( + search_space_id: int, + session: AsyncSession = Depends(get_async_session), + user: User = Depends(current_active_user), +): + await check_search_space_access(session, user, search_space_id) + result = await reset_memory( + scope=MemoryScope.TEAM, + target_id=search_space_id, + session=session, + ) + if result.status == "error": + raise HTTPException(status_code=400, detail=result.message) + return MemoryRead(memory_md=result.memory_md, limits=memory_limits()) diff --git a/surfsense_backend/app/schemas/search_space.py b/surfsense_backend/app/schemas/search_space.py index 77e34ea4b..70ed0004e 100644 --- a/surfsense_backend/app/schemas/search_space.py +++ b/surfsense_backend/app/schemas/search_space.py @@ -21,7 +21,6 @@ class SearchSpaceUpdate(BaseModel): description: str | None = None citations_enabled: bool | None = None qna_custom_instructions: str | None = None - shared_memory_md: str | None = None ai_file_sort_enabled: bool | None = None diff --git a/surfsense_backend/app/services/gmail/kb_sync_service.py b/surfsense_backend/app/services/gmail/kb_sync_service.py index 885ee4b94..6ff5f3c2b 100644 --- a/surfsense_backend/app/services/gmail/kb_sync_service.py +++ b/surfsense_backend/app/services/gmail/kb_sync_service.py @@ -1,3 +1,4 @@ +import asyncio import logging from datetime import datetime @@ -100,7 +101,9 @@ class GmailKBSyncService: else: logger.warning("No LLM configured -- using fallback summary") summary_content = f"Gmail Message: {subject}\n\n{indexable_content}" - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/services/google_calendar/kb_sync_service.py b/surfsense_backend/app/services/google_calendar/kb_sync_service.py index 602a55738..1f017ec4d 100644 --- a/surfsense_backend/app/services/google_calendar/kb_sync_service.py +++ b/surfsense_backend/app/services/google_calendar/kb_sync_service.py @@ -116,7 +116,9 @@ class GoogleCalendarKBSyncService: summary_content = ( f"Google Calendar Event: {event_summary}\n\n{indexable_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -295,7 +297,9 @@ class GoogleCalendarKBSyncService: summary_content = ( f"Google Calendar Event: {event_summary}\n\n{indexable_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/services/jira/kb_sync_service.py b/surfsense_backend/app/services/jira/kb_sync_service.py index 8e88bee81..5f6668377 100644 --- a/surfsense_backend/app/services/jira/kb_sync_service.py +++ b/surfsense_backend/app/services/jira/kb_sync_service.py @@ -98,7 +98,9 @@ class JiraKBSyncService: summary_content = ( f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(issue_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -212,7 +214,9 @@ class JiraKBSyncService: summary_content = ( f"Jira Issue {issue_identifier}: {issue_title}\n\n{issue_content}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(issue_content) diff --git a/surfsense_backend/app/services/llm_service.py b/surfsense_backend/app/services/llm_service.py index ade202c72..fa97fb33a 100644 --- a/surfsense_backend/app/services/llm_service.py +++ b/surfsense_backend/app/services/llm_service.py @@ -659,3 +659,36 @@ async def get_user_long_context_llm( return await get_document_summary_llm( session, search_space_id, disable_streaming=disable_streaming ) + + +def get_planner_llm() -> ChatLiteLLM | None: + """Return a planner LLM instance from the first global config marked + ``is_planner: true``, or ``None`` if no planner config is defined. + + The planner role handles short, structured internal tasks (KB search + planning: query rewriting, date extraction, recency classification). + These tasks are well-served by small/fast models (e.g. gpt-4o-mini, + Claude Haiku, Azure gpt-5.x-nano) — using the user's chat LLM for them + is unnecessarily expensive and slow. + + This helper reads from ``config.GLOBAL_LLM_CONFIGS`` (loaded at import + time from ``global_llm_config.yaml``) so it has no DB cost and can be + called synchronously from middleware/factory code. It returns the same + instance shape as the global path of ``get_search_space_llm_instance``. + + Callers MUST fall back to their chat LLM when this returns ``None`` so + deployments without a planner config keep working unchanged. + """ + from app.agents.new_chat.llm_config import create_chat_litellm_from_config + + planner_cfg = next( + ( + cfg + for cfg in config.GLOBAL_LLM_CONFIGS + if cfg.get("is_planner") is True + ), + None, + ) + if not planner_cfg: + return None + return create_chat_litellm_from_config(planner_cfg) diff --git a/surfsense_backend/app/services/memory/__init__.py b/surfsense_backend/app/services/memory/__init__.py new file mode 100644 index 000000000..eef6559c2 --- /dev/null +++ b/surfsense_backend/app/services/memory/__init__.py @@ -0,0 +1,32 @@ +"""First-class memory service for user and team markdown memory.""" + +from .schemas import MemoryLimits, MemoryRead +from .service import ( + MemoryScope, + SaveResult, + memory_limits, + read_memory, + reset_memory, + save_memory, +) +from .validation import ( + MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, + validate_bullet_format, + validate_memory_scope, +) + +__all__ = [ + "MEMORY_HARD_LIMIT", + "MEMORY_SOFT_LIMIT", + "MemoryLimits", + "MemoryRead", + "MemoryScope", + "SaveResult", + "memory_limits", + "read_memory", + "reset_memory", + "save_memory", + "validate_bullet_format", + "validate_memory_scope", +] diff --git a/surfsense_backend/app/services/memory/document.py b/surfsense_backend/app/services/memory/document.py new file mode 100644 index 000000000..498195e25 --- /dev/null +++ b/surfsense_backend/app/services/memory/document.py @@ -0,0 +1,200 @@ +"""Memory-specific markdown document model and canonical renderer. + +This intentionally parses only SurfSense memory's small markdown contract: +``##`` sections with dated bullet items. Unknown lines are preserved so user +edits are not lost, while legacy marker bullets are normalized on render. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import date + +DEFAULT_LEGACY_SECTION = "Memory" +LEGACY_MARKERS = frozenset({"fact", "pref", "instr"}) + + +@dataclass(frozen=True) +class MemoryBullet: + entry_date: date + text: str + + +@dataclass(frozen=True) +class MemoryRawLine: + text: str + + +MemoryLine = MemoryBullet | MemoryRawLine + + +@dataclass(frozen=True) +class MemorySection: + heading: str + lines: list[MemoryLine] = field(default_factory=list) + explicit_heading: bool = True + + +@dataclass(frozen=True) +class MemoryDocument: + sections: list[MemorySection] = field(default_factory=list) + + @property + def has_explicit_heading(self) -> bool: + return any(section.explicit_heading for section in self.sections) + + +def is_section_heading(line: str) -> bool: + return line.startswith("## ") and bool(line[3:].strip()) + + +def heading_text(line: str) -> str: + return line[3:].strip() + + +def normalize_heading(heading: str) -> str: + chars: list[str] = [] + previous_was_space = True + for char in heading.strip().lower(): + if char.isalnum(): + chars.append(char) + previous_was_space = False + elif not previous_was_space: + chars.append(" ") + previous_was_space = True + return "".join(chars).strip() + + +def parse_bullet_line(line: str) -> MemoryBullet | None: + stripped = line.strip() + if not stripped.startswith("- "): + return None + + body = stripped[2:] + parsed = _parse_canonical_bullet(body) + if parsed is not None: + return parsed + return _parse_legacy_bullet(body) + + +def _parse_canonical_bullet(body: str) -> MemoryBullet | None: + if len(body) < 13 or body[10:12] != ": ": + return None + try: + entry_date = date.fromisoformat(body[:10]) + except ValueError: + return None + text = body[12:].strip() + if not text: + return None + return MemoryBullet(entry_date=entry_date, text=text) + + +def _parse_legacy_bullet(body: str) -> MemoryBullet | None: + if len(body) < 20 or not body.startswith("("): + return None + if len(body) < 14 or body[11:14] != ") [": + return None + try: + entry_date = date.fromisoformat(body[1:11]) + except ValueError: + return None + + marker_end = body.find("] ", 14) + if marker_end == -1: + return None + marker = body[14:marker_end] + if marker not in LEGACY_MARKERS: + return None + + text = body[marker_end + 2 :].strip() + if not text: + return None + return MemoryBullet(entry_date=entry_date, text=text) + + +def parse_memory_document(content: str | None) -> MemoryDocument: + if not content: + return MemoryDocument() + + sections: list[MemorySection] = [] + current_heading: str | None = None + current_explicit = True + current_lines: list[MemoryLine] = [] + + def flush_current() -> None: + nonlocal current_heading, current_explicit, current_lines + if current_heading is None: + return + sections.append( + MemorySection( + heading=current_heading, + lines=current_lines, + explicit_heading=current_explicit, + ) + ) + current_heading = None + current_explicit = True + current_lines = [] + + for raw_line in content.strip().splitlines(): + line = raw_line.rstrip() + if is_section_heading(line): + flush_current() + current_heading = heading_text(line) + current_explicit = True + current_lines = [] + continue + + bullet = parse_bullet_line(line) + if current_heading is None: + if bullet is None: + continue + current_heading = DEFAULT_LEGACY_SECTION + current_explicit = False + current_lines = [bullet] + continue + + current_lines.append(bullet if bullet is not None else MemoryRawLine(text=line)) + + flush_current() + return MemoryDocument(sections=sections) + + +def render_memory_document(document: MemoryDocument) -> str: + rendered_sections: list[str] = [] + for section in document.sections: + section_lines = [f"## {section.heading}"] + for line in section.lines: + if isinstance(line, MemoryBullet): + section_lines.append(f"- {line.entry_date.isoformat()}: {line.text}") + else: + section_lines.append(line.text) + rendered_sections.append("\n".join(section_lines).strip()) + return "\n\n".join(section for section in rendered_sections if section).strip() + + +def extract_headings(memory: str | None) -> set[str]: + document = parse_memory_document(memory) + return { + normalize_heading(section.heading) + for section in document.sections + if section.explicit_heading + } + + +def has_explicit_heading(content: str) -> bool: + return parse_memory_document(content).has_explicit_heading + + +def nonstandard_bullets(content: str) -> list[str]: + warnings: list[str] = [] + for line in content.splitlines(): + stripped = line.strip() + if not stripped.startswith("- "): + continue + if parse_bullet_line(stripped) is not None: + continue + short = stripped[:80] + ("..." if len(stripped) > 80 else "") + warnings.append(f"Non-standard memory bullet: {short}") + return warnings diff --git a/surfsense_backend/app/services/memory/prompts.py b/surfsense_backend/app/services/memory/prompts.py new file mode 100644 index 000000000..25c09e9c5 --- /dev/null +++ b/surfsense_backend/app/services/memory/prompts.py @@ -0,0 +1,20 @@ +"""Prompts used by the memory service.""" + +FORCED_REWRITE_PROMPT = """\ +You are a memory curator. The following memory document exceeds the character \ +limit and must be shortened. + +RULES: +1. Rewrite the document to be under {target} characters. +2. Output Markdown only. Use clear `##` headings and concise bullet points. +3. New-format bullets should look like: `- YYYY-MM-DD: memory text`. +4. If the input contains legacy markers like `(YYYY-MM-DD) [fact]`, preserve the + information but remove the inline marker in the output. +5. Preserve durable instructions and preferences before generic facts when + compressing personal memory. +6. Preserve existing headings when useful; merge duplicate headings and bullets. +7. Output ONLY the consolidated markdown — no explanations, no wrapping. + + +{content} +""" diff --git a/surfsense_backend/app/services/memory/rewrite.py b/surfsense_backend/app/services/memory/rewrite.py new file mode 100644 index 000000000..270904ce7 --- /dev/null +++ b/surfsense_backend/app/services/memory/rewrite.py @@ -0,0 +1,35 @@ +"""LLM-backed memory rewrite helpers.""" + +from __future__ import annotations + +import logging +from typing import Any + +from langchain_core.messages import HumanMessage + +from app.services.memory.prompts import FORCED_REWRITE_PROMPT +from app.services.memory.validation import MEMORY_HARD_LIMIT +from app.utils.content_utils import extract_text_content + +logger = logging.getLogger(__name__) + + +async def forced_rewrite(content: str, llm: Any) -> str | None: + """Use a focused LLM call to compress memory under the hard limit.""" + try: + prompt = FORCED_REWRITE_PROMPT.format( + target=MEMORY_HARD_LIMIT, + content=content, + ) + response = await llm.ainvoke( + [HumanMessage(content=prompt)], + config={"tags": ["surfsense:internal", "memory-rewrite"]}, + ) + text = extract_text_content(response.content).strip() + if not text: + logger.warning("Forced memory rewrite returned empty text") + return None + return text + except Exception: + logger.exception("Forced memory rewrite LLM call failed") + return None diff --git a/surfsense_backend/app/services/memory/schemas.py b/surfsense_backend/app/services/memory/schemas.py new file mode 100644 index 000000000..78c69d800 --- /dev/null +++ b/surfsense_backend/app/services/memory/schemas.py @@ -0,0 +1,19 @@ +"""Schemas for memory API responses and structured extraction.""" + +from __future__ import annotations + +from pydantic import BaseModel + + +class MemoryLimits(BaseModel): + """Canonical memory size limits exposed to clients.""" + + soft: int + hard: int + + +class MemoryRead(BaseModel): + """Memory document payload returned by user and team memory APIs.""" + + memory_md: str + limits: MemoryLimits diff --git a/surfsense_backend/app/services/memory/service.py b/surfsense_backend/app/services/memory/service.py new file mode 100644 index 000000000..feca000c9 --- /dev/null +++ b/surfsense_backend/app/services/memory/service.py @@ -0,0 +1,247 @@ +"""Canonical read/write/reset/extract service for markdown memory.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from enum import StrEnum +from typing import Any, Literal +from uuid import UUID + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import SearchSpace, User +from app.services.memory.document import parse_memory_document, render_memory_document +from app.services.memory.rewrite import forced_rewrite +from app.services.memory.schemas import MemoryLimits +from app.services.memory.validation import ( + MEMORY_HARD_LIMIT, + MEMORY_SOFT_LIMIT, + soft_limit_warning, + strip_preamble_to_first_heading, + validate_bullet_format, + validate_diff, + validate_heading_sanity, + validate_memory_scope, + validate_memory_size, +) + +logger = logging.getLogger(__name__) + +_NO_UPDATE_SENTINELS = frozenset( + { + "NO_UPDATE", + "NO UPDATE", + "NO_CHANGE", + "NO CHANGE", + } +) + + +class MemoryScope(StrEnum): + USER = "user" + TEAM = "team" + + +@dataclass(frozen=True) +class SaveResult: + status: Literal["saved", "error", "no_op"] + message: str + memory_md: str = "" + warnings: list[str] = field(default_factory=list) + diff_warnings: list[str] = field(default_factory=list) + format_warnings: list[str] = field(default_factory=list) + notice: str | None = None + + def to_dict(self) -> dict[str, Any]: + data: dict[str, Any] = { + "status": self.status, + "message": self.message, + "memory_md": self.memory_md, + } + if self.notice: + data["notice"] = self.notice + if self.warnings: + data["warnings"] = self.warnings + if len(self.warnings) == 1: + data["warning"] = self.warnings[0] + if self.diff_warnings: + data["diff_warnings"] = self.diff_warnings + if self.format_warnings: + data["format_warnings"] = self.format_warnings + return data + + +def memory_limits() -> MemoryLimits: + return MemoryLimits(soft=MEMORY_SOFT_LIMIT, hard=MEMORY_HARD_LIMIT) + + +def _normalize_scope(scope: MemoryScope | str) -> MemoryScope: + return scope if isinstance(scope, MemoryScope) else MemoryScope(scope) + + +def _normalize_user_id(target_id: str | UUID) -> UUID: + return UUID(target_id) if isinstance(target_id, str) else target_id + + +async def _load_target( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> User | SearchSpace | None: + normalized = _normalize_scope(scope) + if normalized is MemoryScope.USER: + result = await session.execute( + select(User).where(User.id == _normalize_user_id(target_id)) # type: ignore[arg-type] + ) + return result.scalars().first() + result = await session.execute( + select(SearchSpace).where(SearchSpace.id == int(target_id)) + ) + return result.scalars().first() + + +def _get_memory(target: User | SearchSpace, scope: MemoryScope) -> str: + if scope is MemoryScope.USER: + return getattr(target, "memory_md", None) or "" + return getattr(target, "shared_memory_md", None) or "" + + +def _set_memory(target: User | SearchSpace, scope: MemoryScope, content: str) -> None: + if scope is MemoryScope.USER: + target.memory_md = content + else: + target.shared_memory_md = content + + +async def read_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> str: + normalized = _normalize_scope(scope) + target = await _load_target(scope=normalized, target_id=target_id, session=session) + if target is None: + return "" + return _get_memory(target, normalized) + + +async def save_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + content: str, + session: AsyncSession, + llm: Any | None = None, +) -> SaveResult: + normalized = _normalize_scope(scope) + if not isinstance(content, str): + return SaveResult( + status="error", + message="Internal error: memory payload must be a string.", + ) + + target = await _load_target(scope=normalized, target_id=target_id, session=session) + if target is None: + return SaveResult( + status="error", + message="User not found." + if normalized is MemoryScope.USER + else "Search space not found.", + ) + + old_memory = _get_memory(target, normalized) + next_content = strip_preamble_to_first_heading(content.strip()) + notice: str | None = None + warnings: list[str] = [] + + if next_content.upper() in _NO_UPDATE_SENTINELS: + return SaveResult( + status="no_op", + message="No memory update requested.", + memory_md=old_memory, + ) + + if len(next_content) > MEMORY_HARD_LIMIT and llm is not None: + rewritten = await forced_rewrite(next_content, llm) + if rewritten is not None and len(rewritten) < len(next_content): + next_content = strip_preamble_to_first_heading(rewritten) + notice = "Memory was automatically rewritten to fit within limits." + + for validation in ( + validate_memory_size(next_content), + validate_heading_sanity(next_content), + ): + if validation: + return SaveResult( + status="error", + message=validation["message"], + memory_md=old_memory, + ) + + scope_error, scope_warnings = validate_memory_scope( + next_content, + normalized.value, + old_memory=old_memory, + ) + warnings.extend(scope_warnings) + if scope_error: + return SaveResult( + status="error", + message=scope_error["message"], + memory_md=old_memory, + warnings=warnings, + ) + + next_content = render_memory_document(parse_memory_document(next_content)) + + try: + _set_memory(target, normalized, next_content) + session.add(target) + await session.commit() + except Exception as e: + logger.exception("Failed to update %s memory: %s", normalized.value, e) + await session.rollback() + return SaveResult( + status="error", + message=f"Failed to update {normalized.value} memory: {e}", + memory_md=old_memory, + ) + + diff_warnings = validate_diff(old_memory, next_content) + format_warnings = validate_bullet_format(next_content) + warning = soft_limit_warning(next_content) + if warning: + warnings.append(warning) + + return SaveResult( + status="saved", + message=( + "Memory updated." + if normalized is MemoryScope.USER + else "Team memory updated." + ), + memory_md=next_content, + warnings=warnings, + diff_warnings=diff_warnings, + format_warnings=format_warnings, + notice=notice, + ) + + +async def reset_memory( + *, + scope: MemoryScope | str, + target_id: str | int | UUID, + session: AsyncSession, +) -> SaveResult: + return await save_memory( + scope=scope, + target_id=target_id, + content="", + session=session, + llm=None, + ) diff --git a/surfsense_backend/app/services/memory/validation.py b/surfsense_backend/app/services/memory/validation.py new file mode 100644 index 000000000..6565f39c7 --- /dev/null +++ b/surfsense_backend/app/services/memory/validation.py @@ -0,0 +1,140 @@ +"""Validation helpers for markdown-backed memory.""" + +from __future__ import annotations + +from typing import Literal + +from app.services.memory.document import ( + extract_headings, + has_explicit_heading, + nonstandard_bullets, + parse_memory_document, +) + +MEMORY_SOFT_LIMIT = 18_000 +MEMORY_HARD_LIMIT = 25_000 + +_FORBIDDEN_TEAM_HEADINGS = { + "preferences", + "instructions", + "personal notes", + "personal instructions", +} + + +def has_markdown_heading(content: str) -> bool: + return has_explicit_heading(content) + + +def strip_preamble_to_first_heading(content: str) -> str: + """Drop model preamble before the first ``##`` heading, if one exists.""" + lines = content.splitlines() + for index, line in enumerate(lines): + if line.startswith("## ") and line[3:].strip(): + return "\n".join(lines[index:]).strip() + return content.strip() + + +def validate_memory_size(content: str) -> dict[str, str] | None: + length = len(content) + if length > MEMORY_HARD_LIMIT: + return { + "status": "error", + "message": ( + f"Memory exceeds {MEMORY_HARD_LIMIT:,} character limit " + f"({length:,} chars). Consolidate by merging related items, " + "removing outdated entries, and shortening descriptions." + ), + } + return None + + +def validate_heading_sanity(content: str) -> dict[str, str] | None: + """Block long prose blobs without headings unless they are legacy bullets.""" + stripped = content.strip() + if not stripped: + return None + if has_markdown_heading(stripped): + return None + if len(stripped) <= 40: + return None + if parse_memory_document(stripped).sections: + return None + return { + "status": "error", + "message": "Memory must be markdown with at least one ## heading.", + } + + +def validate_memory_scope( + content: str, + scope: Literal["user", "team"], + *, + old_memory: str | None = None, +) -> tuple[dict[str, str] | None, list[str]]: + """Reject new personal headings in team memory, grandfather existing ones.""" + if scope != "team": + return None, [] + + old_forbidden = extract_headings(old_memory) & _FORBIDDEN_TEAM_HEADINGS + new_forbidden = extract_headings(content) & _FORBIDDEN_TEAM_HEADINGS + introduced = sorted(new_forbidden - old_forbidden) + grandfathered = sorted(new_forbidden & old_forbidden) + + warnings: list[str] = [] + if grandfathered: + warnings.append( + "Team memory contains legacy personal headings: " + + ", ".join(grandfathered) + + ". Please consolidate them into team-safe headings." + ) + if introduced: + return ( + { + "status": "error", + "message": ( + "Team memory cannot introduce personal headings: " + + ", ".join(introduced) + + ". Use team-safe headings instead." + ), + }, + warnings, + ) + return None, warnings + + +def validate_bullet_format(content: str) -> list[str]: + return nonstandard_bullets(content) + + +def validate_diff(old_memory: str | None, new_memory: str) -> list[str]: + if not old_memory: + return [] + + warnings: list[str] = [] + old_headings = extract_headings(old_memory) + new_headings = extract_headings(new_memory) + dropped = old_headings - new_headings + if dropped: + names = ", ".join(sorted(dropped)) + warnings.append( + f"Sections removed: {names}. If unintentional, restore them from the memory document." + ) + + old_len = len(old_memory) + new_len = len(new_memory) + if old_len > 0 and new_len < old_len * 0.4: + warnings.append( + f"Memory shrank significantly ({old_len:,} -> {new_len:,} chars). Possible data loss." + ) + return warnings + + +def soft_limit_warning(content: str) -> str | None: + length = len(content) + if length > MEMORY_SOFT_LIMIT: + return ( + f"Memory is at {length:,}/{MEMORY_HARD_LIMIT:,} characters. " + "Consolidate by merging related items and removing less important entries." + ) + return None diff --git a/surfsense_backend/app/services/onedrive/kb_sync_service.py b/surfsense_backend/app/services/onedrive/kb_sync_service.py index e9b2e38ea..e1da3b4a1 100644 --- a/surfsense_backend/app/services/onedrive/kb_sync_service.py +++ b/surfsense_backend/app/services/onedrive/kb_sync_service.py @@ -1,3 +1,4 @@ +import asyncio import logging from datetime import datetime @@ -95,7 +96,9 @@ class OneDriveKBSyncService: else: logger.warning("No LLM configured — using fallback summary") summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}" - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(indexable_content) now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") diff --git a/surfsense_backend/app/services/revert_service.py b/surfsense_backend/app/services/revert_service.py index d02a31345..60f6503aa 100644 --- a/surfsense_backend/app/services/revert_service.py +++ b/surfsense_backend/app/services/revert_service.py @@ -29,6 +29,7 @@ same trap waiting to happen). from __future__ import annotations +import asyncio import logging from dataclasses import dataclass from datetime import UTC, datetime @@ -234,7 +235,7 @@ async def _restore_in_place_document( if isinstance(c, dict) and isinstance(c.get("content"), str) ] if chunk_texts: - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) session.add_all( [ Chunk(document_id=doc.id, content=text, embedding=embedding) @@ -244,7 +245,9 @@ async def _restore_in_place_document( ] ) if isinstance(revision.content_before, str): - doc.embedding = embed_texts([revision.content_before])[0] + doc.embedding = ( + await asyncio.to_thread(embed_texts, [revision.content_before]) + )[0] doc.updated_at = datetime.now(UTC) return RevertOutcome(status="ok", message="Document restored from snapshot.") @@ -320,7 +323,7 @@ async def _reinsert_document_from_revision( session.add(new_doc) await session.flush() - new_doc.embedding = embed_texts([content])[0] + new_doc.embedding = (await asyncio.to_thread(embed_texts, [content]))[0] chunk_texts = [] chunks_before = revision.chunks_before if isinstance(chunks_before, list): @@ -330,7 +333,7 @@ async def _reinsert_document_from_revision( if isinstance(c, dict) and isinstance(c.get("content"), str) ] if chunk_texts: - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) session.add_all( [ Chunk(document_id=new_doc.id, content=text, embedding=embedding) diff --git a/surfsense_backend/app/services/token_tracking_service.py b/surfsense_backend/app/services/token_tracking_service.py index 9406d9be4..3f07e6f9e 100644 --- a/surfsense_backend/app/services/token_tracking_service.py +++ b/surfsense_backend/app/services/token_tracking_service.py @@ -325,6 +325,24 @@ class TokenTrackingCallback(CustomLogger): total_tokens = getattr(usage, "total_tokens", 0) or 0 call_kind = "chat" + # Prompt-cache accounting. LiteLLM normalizes every provider's cache + # fields onto ``usage.prompt_tokens_details``: + # - ``cached_tokens`` — cache reads (OpenAI/Azure native, DeepSeek + # mapped from ``prompt_cache_hit_tokens``, + # Anthropic mapped from ``cache_read_input_tokens``). + # - ``cache_creation_tokens`` — cache writes (Anthropic only; OpenAI/Azure + # do not expose a write count). + # See ``litellm.types.utils.Usage.__init__`` for the mapping. + cached_tokens = 0 + cache_creation_tokens = 0 + if not is_image: + prompt_details = getattr(usage, "prompt_tokens_details", None) + if prompt_details is not None: + cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 + cache_creation_tokens = ( + getattr(prompt_details, "cache_creation_tokens", 0) or 0 + ) + model = kwargs.get("model", "unknown") cost_usd = _extract_cost_usd( @@ -357,9 +375,23 @@ class TokenTrackingCallback(CustomLogger): cost_micros=cost_micros, call_kind=call_kind, ) + + # Per-LLM-call wall-clock latency (LiteLLM passes datetime objects). + call_latency_s: float | None = None + try: + if start_time is not None and end_time is not None: + delta = end_time - start_time + call_latency_s = getattr(delta, "total_seconds", lambda: float(delta))() + except Exception: + call_latency_s = None + + cache_hit_ratio: float | None = None + if prompt_tokens > 0 and (cached_tokens > 0 or cache_creation_tokens > 0): + cache_hit_ratio = cached_tokens / prompt_tokens + logger.info( "[TokenTracking] Captured: model=%s kind=%s prompt=%d completion=%d total=%d " - "cost=$%.6f (%d micros) (accumulator now has %d calls)", + "cost=$%.6f (%d micros) (accumulator now has %d calls)%s%s", model, call_kind, prompt_tokens, @@ -368,6 +400,17 @@ class TokenTrackingCallback(CustomLogger): cost_usd, cost_micros, len(acc.calls), + f" latency={call_latency_s:.3f}s" if call_latency_s is not None else "", + ( + f" cache_read={cached_tokens} cache_write={cache_creation_tokens}" + f" hit_ratio={cache_hit_ratio:.1%}" + if cache_hit_ratio is not None + else ( + f" cache_read={cached_tokens} cache_write={cache_creation_tokens}" + if (cached_tokens or cache_creation_tokens) + else "" + ) + ), ) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index 9a69b6164..c9faa1691 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -39,10 +39,6 @@ from app.agents.new_chat.llm_config import ( load_agent_config, load_global_llm_config_by_id, ) -from app.agents.new_chat.memory_extraction import ( - extract_and_save_memory, - extract_and_save_team_memory, -) from app.agents.new_chat.mention_resolver import resolve_mentions, substitute_in_text from app.agents.new_chat.middleware.busy_mutex import ( end_turn, @@ -64,8 +60,6 @@ from app.db import ( ) from app.prompts import TITLE_GENERATION_PROMPT from app.services.auto_model_pin_service import ( - is_recently_healthy, - mark_healthy, mark_runtime_cooldown, resolve_or_get_pinned_llm_config_id, ) @@ -283,7 +277,6 @@ class StreamResult: accumulated_text: str = "" is_interrupted: bool = False sandbox_files: list[str] = field(default_factory=list) - agent_called_update_memory: bool = False request_id: str | None = None turn_id: str = "" filesystem_mode: str = "cloud" @@ -506,54 +499,6 @@ def _is_provider_rate_limited(exc: BaseException) -> bool: ) -_PREFLIGHT_TIMEOUT_SEC: float = 2.5 -_PREFLIGHT_MAX_TOKENS: int = 1 - - -async def _preflight_llm(llm: Any) -> None: - """Issue a minimal completion to confirm the pinned model isn't 429'ing. - - Used before agent build / planner / classifier / title-gen so a known-bad - free OpenRouter deployment is detected and repinned before it cascades - into multiple wasted internal calls. The probe is intentionally cheap: - one token, low timeout, tagged ``surfsense:internal`` so token tracking - and SSE pipelines treat it as overhead rather than user output. - - Raises the original exception when the provider responds with a - rate-limit-shaped error so the caller can drive the cooldown/repin - branch via :func:`_is_provider_rate_limited`. Other transient failures - are swallowed — the caller continues to the normal stream path and the - in-stream recovery loop remains the safety net. - """ - from litellm import acompletion - - model = getattr(llm, "model", None) - if not model or model == "auto": - # Auto-mode router doesn't have a single deployment to ping; the - # router itself handles per-deployment rate-limit accounting. - return - - try: - await acompletion( - model=model, - messages=[{"role": "user", "content": "ping"}], - api_key=getattr(llm, "api_key", None), - api_base=getattr(llm, "api_base", None), - max_tokens=_PREFLIGHT_MAX_TOKENS, - timeout=_PREFLIGHT_TIMEOUT_SEC, - stream=False, - metadata={"tags": ["surfsense:internal", "auto-pin-preflight"]}, - ) - except Exception as exc: - if _is_provider_rate_limited(exc): - raise - logging.getLogger(__name__).debug( - "auto_pin_preflight non_rate_limit_error model=%s err=%s", - model, - exc, - ) - - async def _build_main_agent_for_thread( agent_factory: Any, *, @@ -571,9 +516,9 @@ async def _build_main_agent_for_thread( disabled_tools: list[str] | None = None, mentioned_document_ids: list[int] | None = None, ) -> Any: - """Single (re)build path so the agent factory cannot drift across - initial build, preflight repin, and mid-stream 429 recovery for one - ``thread_id``: a graph swap mid-turn would corrupt checkpointer state.""" + """Single (re)build path so the agent factory cannot drift across the + initial build and mid-stream 429 recovery for one ``thread_id``: a + graph swap mid-turn would corrupt checkpointer state.""" return await agent_factory( llm=llm, search_space_id=search_space_id, @@ -591,29 +536,6 @@ async def _build_main_agent_for_thread( ) -async def _settle_speculative_agent_build(task: asyncio.Task[Any]) -> None: - """Wait for a discarded speculative agent build to release shared state. - - Used by the parallel preflight + agent-build path. The speculative build - closes over the request-scoped ``AsyncSession`` (for the brief connector - discovery / tool-factory window before its CPU work moves into a worker - thread). If preflight reports a 429 we want to fall back to the original - repin → reload → rebuild path, but we MUST NOT touch ``session`` again - until any in-flight session work owned by the speculative build has - fully settled — :class:`sqlalchemy.ext.asyncio.AsyncSession` is not - concurrency-safe and the same hazard cost us a hard ``InvalidRequestError`` - earlier in this PR (see ``connector_service`` parallel-gather revert). - - We simply ``await`` the task and swallow any exception: in this path the - build's outcome is irrelevant — success populates the agent cache (a free - side effect), failure is discarded. The wasted CPU is acceptable since - 429 fallbacks are rare and the original sequential code also paid the - full build cost on the same path. - """ - with contextlib.suppress(BaseException): - await task - - def _classify_stream_exception( exc: Exception, *, @@ -1241,39 +1163,6 @@ async def stream_new_chat( yield streaming_service.format_done() return - # Auto-mode preflight ping. Runs ONLY for thread-pinned auto cfgs - # (negative ids selected via ``resolve_or_get_pinned_llm_config_id``) - # whose health hasn't already been confirmed within the TTL window. - # Detecting a 429 here lets us repin BEFORE the planner/classifier/ - # title-generation LLM calls fan out and each independently hit the - # same upstream rate limit. - # - # PERF: preflight is a network round-trip to the LLM provider (~1-5s) - # and is independent of the agent build (CPU-bound, ~5-7s). They used - # to run sequentially → ``preflight + build`` on cold cache = 11.5s. - # We now kick off preflight as a background task FIRST, then run the - # synchronous setup work and the agent build in parallel. In the - # success path (the common case) total wall time drops to roughly - # ``max(preflight, build)`` — the preflight finishes during the - # agent compile and we just consume its result. In the rare 429 - # path the speculative build is awaited to completion (so its - # session usage is fully released) via - # :func:`_settle_speculative_agent_build`, then discarded, and - # we fall back to the original repin-and-rebuild flow. - preflight_needed = ( - requested_llm_config_id == 0 - and llm_config_id < 0 - and not is_recently_healthy(llm_config_id) - ) - preflight_task: asyncio.Task[None] | None = None - _t_preflight = 0.0 - if preflight_needed: - _t_preflight = time.perf_counter() - preflight_task = asyncio.create_task( - _preflight_llm(llm), - name=f"auto_pin_preflight:{llm_config_id}", - ) - # Create connector service _t0 = time.perf_counter() connector_service = ConnectorService(session, search_space_id=search_space_id) @@ -1307,136 +1196,26 @@ async def stream_new_chat( if use_multi_agent else create_surfsense_deep_agent ) - # Speculative agent build — runs in parallel with the preflight - # task (if any). Built with the *current* ``llm`` / ``agent_config``; - # if preflight reports 429 we will discard this future and rebuild - # against the freshly pinned config below. - agent_build_task = asyncio.create_task( - _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - ), - name="agent_build:stream_new_chat", + # Build the agent inline. Provider 429s surface through the + # in-stream recovery loop below (``_is_provider_rate_limited``), + # which repins the thread to an eligible alternative config and + # rebuilds the agent before the user sees any output. + agent = await _build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, + mentioned_document_ids=mentioned_document_ids, ) - - agent: Any = None - if preflight_task is not None: - try: - await preflight_task - mark_healthy(llm_config_id) - _perf_log.info( - "[stream_new_chat] auto_pin_preflight ok config_id=%s took=%.3fs (parallel)", - llm_config_id, - time.perf_counter() - _t_preflight, - ) - except Exception as preflight_exc: - # Both branches below need the session: the non-429 path - # may unwind via cleanup that uses ``session``, and the - # 429 path explicitly calls ``resolve_or_get_pinned_llm_config_id`` - # against it. Wait for the speculative build to release its - # session usage before we proceed. - await _settle_speculative_agent_build(agent_build_task) - if not _is_provider_rate_limited(preflight_exc): - raise - # 429: speculative agent is discarded; run the original - # repin → reload → rebuild path against the freshly - # pinned config. - previous_config_id = llm_config_id - mark_runtime_cooldown( - previous_config_id, reason="preflight_rate_limited" - ) - try: - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=0, - exclude_config_ids={previous_config_id}, - requires_image_input=_requires_image_input, - ) - ).resolved_llm_config_id - except ValueError as pin_error: - yield _emit_stream_error( - message=str(pin_error), - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - - llm, agent_config, llm_load_error = await _load_llm_bundle( - llm_config_id - ) - if llm_load_error or not llm: - yield _emit_stream_error( - message=llm_load_error or "Failed to create LLM instance", - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - # Trust the freshly-resolved cfg for the remainder of this - # turn rather than recursing into another preflight; the - # in-stream 429 recovery loop is still in place as the - # safety net if even this fallback hits an upstream cap. - mark_healthy(llm_config_id) - _log_chat_stream_error( - flow=flow, - error_kind="rate_limited", - error_code="RATE_LIMITED", - severity="info", - is_expected=True, - request_id=request_id, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - message=( - "Auto-pinned model failed preflight; switched to another " - "eligible model and continuing." - ), - extra={ - "auto_runtime_recover": True, - "preflight": True, - "previous_config_id": previous_config_id, - "fallback_config_id": llm_config_id, - }, - ) - # Rebuild against the new llm/agent_config. Sequential - # here because we no longer have anything to overlap with. - agent = await agent_factory( - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - disabled_tools=disabled_tools, - mentioned_document_ids=mentioned_document_ids, - filesystem_selection=filesystem_selection, - ) - - if agent is None: - # Either no preflight was needed, or preflight succeeded — - # in both cases the speculative build is the agent we want. - agent = await agent_build_task _perf_log.info( "[stream_new_chat] Agent created in %.3fs", time.perf_counter() - _t0 ) @@ -2208,36 +1987,6 @@ async def stream_new_chat( }, ) - # Fire background memory extraction if the agent didn't handle it. - # Shared threads write to team memory; private threads write to user memory. - if not stream_result.agent_called_update_memory: - memory_seed = user_query.strip() or ( - f"[{len(user_image_data_urls or [])} image(s)]" - if user_image_data_urls - else "(message)" - ) - if visibility == ChatVisibility.SEARCH_SPACE: - task = asyncio.create_task( - extract_and_save_team_memory( - user_message=memory_seed, - search_space_id=search_space_id, - llm=llm, - author_display_name=current_user_display_name, - ) - ) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - elif user_id: - task = asyncio.create_task( - extract_and_save_memory( - user_message=memory_seed, - user_id=user_id, - llm=llm, - ) - ) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - # Finish the step and message yield streaming_service.format_data("turn-status", {"status": "idle"}) yield streaming_service.format_finish_step() @@ -2682,25 +2431,6 @@ async def stream_resume_chat( yield streaming_service.format_done() return - # Auto-mode preflight ping (resume path). Mirrors ``stream_new_chat``: - # one cheap probe before the agent is rebuilt so a 429'd pin gets - # repinned without burning planner/classifier/title calls first. - # See ``stream_new_chat`` for the full rationale on the speculative - # parallel build pattern below. - preflight_needed = ( - requested_llm_config_id == 0 - and llm_config_id < 0 - and not is_recently_healthy(llm_config_id) - ) - preflight_task: asyncio.Task[None] | None = None - _t_preflight = 0.0 - if preflight_needed: - _t_preflight = time.perf_counter() - preflight_task = asyncio.create_task( - _preflight_llm(llm), - name=f"auto_pin_preflight_resume:{llm_config_id}", - ) - _t0 = time.perf_counter() connector_service = ConnectorService(session, search_space_id=search_space_id) @@ -2730,115 +2460,25 @@ async def stream_resume_chat( if _app_config.MULTI_AGENT_CHAT_ENABLED else create_surfsense_deep_agent ) - agent_build_task = asyncio.create_task( - _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - ), - name="agent_build:stream_resume", + # Build the agent inline. Provider 429s are handled by the + # in-stream recovery loop, which repins to an eligible + # alternative config and rebuilds the agent before the user sees + # any output. + agent = await _build_main_agent_for_thread( + agent_factory, + llm=llm, + search_space_id=search_space_id, + db_session=session, + connector_service=connector_service, + checkpointer=checkpointer, + user_id=user_id, + thread_id=chat_id, + agent_config=agent_config, + firecrawl_api_key=firecrawl_api_key, + thread_visibility=visibility, + filesystem_selection=filesystem_selection, + disabled_tools=disabled_tools, ) - - agent: Any = None - if preflight_task is not None: - try: - await preflight_task - mark_healthy(llm_config_id) - _perf_log.info( - "[stream_resume] auto_pin_preflight ok config_id=%s took=%.3fs (parallel)", - llm_config_id, - time.perf_counter() - _t_preflight, - ) - except Exception as preflight_exc: - # Same session-safety rationale as ``stream_new_chat``. - await _settle_speculative_agent_build(agent_build_task) - if not _is_provider_rate_limited(preflight_exc): - raise - previous_config_id = llm_config_id - mark_runtime_cooldown( - previous_config_id, reason="preflight_rate_limited" - ) - try: - llm_config_id = ( - await resolve_or_get_pinned_llm_config_id( - session, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - selected_llm_config_id=0, - exclude_config_ids={previous_config_id}, - ) - ).resolved_llm_config_id - except ValueError as pin_error: - yield _emit_stream_error( - message=str(pin_error), - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - - llm, agent_config, llm_load_error = await _load_llm_bundle( - llm_config_id - ) - if llm_load_error or not llm: - yield _emit_stream_error( - message=llm_load_error or "Failed to create LLM instance", - error_kind="server_error", - error_code="SERVER_ERROR", - ) - yield streaming_service.format_done() - return - mark_healthy(llm_config_id) - _log_chat_stream_error( - flow="resume", - error_kind="rate_limited", - error_code="RATE_LIMITED", - severity="info", - is_expected=True, - request_id=request_id, - thread_id=chat_id, - search_space_id=search_space_id, - user_id=user_id, - message=( - "Auto-pinned model failed preflight; switched to another " - "eligible model and continuing." - ), - extra={ - "auto_runtime_recover": True, - "preflight": True, - "previous_config_id": previous_config_id, - "fallback_config_id": llm_config_id, - }, - ) - agent = await _build_main_agent_for_thread( - agent_factory, - llm=llm, - search_space_id=search_space_id, - db_session=session, - connector_service=connector_service, - checkpointer=checkpointer, - user_id=user_id, - thread_id=chat_id, - agent_config=agent_config, - firecrawl_api_key=firecrawl_api_key, - thread_visibility=visibility, - filesystem_selection=filesystem_selection, - disabled_tools=disabled_tools, - ) - - if agent is None: - agent = await agent_build_task _perf_log.info( "[stream_resume] Agent created in %.3fs", time.perf_counter() - _t0 ) diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py index 9a309f9d7..50e7a1360 100644 --- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/event_stream.py @@ -48,4 +48,3 @@ async def stream_output( yield frame result.accumulated_text = state.accumulated_text - result.agent_called_update_memory = state.called_update_memory diff --git a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py index 391f14f24..1d3f1e88a 100644 --- a/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py +++ b/surfsense_backend/app/tasks/chat/streaming/graph_stream/result.py @@ -11,7 +11,6 @@ class StreamingResult: accumulated_text: str = "" is_interrupted: bool = False sandbox_files: list[str] = field(default_factory=list) - agent_called_update_memory: bool = False request_id: str | None = None turn_id: str = "" filesystem_mode: str = "cloud" diff --git a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py index 57ab617c5..ad4a17d08 100644 --- a/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py +++ b/surfsense_backend/app/tasks/chat/streaming/handlers/tool_end.py @@ -36,9 +36,6 @@ def iter_tool_end_frames( raw_output = event.get("data", {}).get("output", "") staged_file_path = state.file_path_by_run.pop(run_id, None) if run_id else None - if tool_name == "update_memory": - state.called_update_memory = True - if hasattr(raw_output, "content"): content = raw_output.content if isinstance(content, str): diff --git a/surfsense_backend/app/tasks/chat/streaming/relay/state.py b/surfsense_backend/app/tasks/chat/streaming/relay/state.py index 27898403d..f99fc8edb 100644 --- a/surfsense_backend/app/tasks/chat/streaming/relay/state.py +++ b/surfsense_backend/app/tasks/chat/streaming/relay/state.py @@ -32,7 +32,6 @@ class AgentEventRelayState: last_active_step_items: list[str] = field(default_factory=list) just_finished_tool: bool = False active_tool_depth: int = 0 - called_update_memory: bool = False current_reasoning_id: str | None = None pending_tool_call_chunks: list[dict[str, Any]] = field(default_factory=list) lc_tool_call_id_by_run: dict[str, str] = field(default_factory=dict) diff --git a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py index 5e784cb4f..180f21412 100644 --- a/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/discord_indexer.py @@ -670,7 +670,9 @@ async def index_discord_messages( # Heavy processing (embeddings, chunks) chunks = await create_document_chunks(item["combined_document_string"]) - doc_embedding = embed_text(item["combined_document_string"]) + doc_embedding = await asyncio.to_thread( + embed_text, item["combined_document_string"] + ) # Update document to READY with actual content document.title = f"{item['guild_name']}#{item['channel_name']}" diff --git a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py index a698bfd46..555d60273 100644 --- a/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/luma_indexer.py @@ -6,6 +6,7 @@ Implements 2-phase document status updates for real-time UI feedback: - Phase 2: Process each event: pending → processing → ready/failed """ +import asyncio import time from collections.abc import Awaitable, Callable from datetime import datetime, timedelta @@ -465,7 +466,9 @@ async def index_luma_events( summary_content = ( f"Luma Event: {item['event_name']}\n\n{item['event_markdown']}" ) - summary_embedding = embed_text(summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, summary_content + ) chunks = await create_document_chunks(item["event_markdown"]) diff --git a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py index 12cdf384e..25994895a 100644 --- a/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py +++ b/surfsense_backend/app/tasks/connector_indexers/teams_indexer.py @@ -9,6 +9,7 @@ Uses 2-phase document status updates for real-time UI feedback: - Phase 2: Process each document: pending → processing → ready/failed """ +import asyncio import time from collections.abc import Awaitable, Callable from datetime import UTC, datetime @@ -581,7 +582,9 @@ async def index_teams_messages( # Heavy processing (embeddings, chunks) chunks = await create_document_chunks(item["combined_document_string"]) - doc_embedding = embed_text(item["combined_document_string"]) + doc_embedding = await asyncio.to_thread( + embed_text, item["combined_document_string"] + ) # Update document to READY with actual content document.title = f"{item['team_name']} - {item['channel_name']}" diff --git a/surfsense_backend/app/tasks/document_processors/_save.py b/surfsense_backend/app/tasks/document_processors/_save.py index ae45f7a69..d633dd4f6 100644 --- a/surfsense_backend/app/tasks/document_processors/_save.py +++ b/surfsense_backend/app/tasks/document_processors/_save.py @@ -2,6 +2,7 @@ Unified document save/update logic for file processors. """ +import asyncio import logging from sqlalchemy.exc import SQLAlchemyError @@ -43,7 +44,7 @@ async def _generate_summary( """ if not enable_summary: summary = f"File: {file_name}\n\n{markdown_content[:4000]}" - return summary, embed_text(summary) + return summary, await asyncio.to_thread(embed_text, summary) if etl_service == "DOCLING": from app.services.docling_service import create_docling_service @@ -65,7 +66,7 @@ async def _generate_summary( parts.append(f"**{formatted_key}:** {value}") enhanced = "\n".join(parts) + "\n\n# DOCUMENT SUMMARY\n\n" + summary_text - return enhanced, embed_text(enhanced) + return enhanced, await asyncio.to_thread(embed_text, enhanced) # Standard summary (Unstructured / LlamaCloud / others) meta = { diff --git a/surfsense_backend/app/utils/document_converters.py b/surfsense_backend/app/utils/document_converters.py index ed52c1b7b..9bc8103c5 100644 --- a/surfsense_backend/app/utils/document_converters.py +++ b/surfsense_backend/app/utils/document_converters.py @@ -1,3 +1,4 @@ +import asyncio import hashlib import logging import threading @@ -221,7 +222,9 @@ async def generate_document_summary( else: enhanced_summary_content = summary_content - summary_embedding = embed_text(enhanced_summary_content) + summary_embedding = await asyncio.to_thread( + embed_text, enhanced_summary_content + ) return enhanced_summary_content, summary_embedding @@ -237,7 +240,7 @@ async def create_document_chunks(content: str) -> list[Chunk]: List of Chunk objects with embeddings """ chunk_texts = [c.text for c in config.chunker_instance.chunk(content)] - chunk_embeddings = embed_texts(chunk_texts) + chunk_embeddings = await asyncio.to_thread(embed_texts, chunk_texts) return [ Chunk(content=text, embedding=emb) for text, emb in zip(chunk_texts, chunk_embeddings, strict=False) diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 26fee1bc3..cd2a6921a 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "surf-new-backend" -version = "0.0.24" +version = "0.0.25" description = "SurfSense Backend" requires-python = ">=3.12" dependencies = [ diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py b/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py index 1f338ee3e..c2f52659c 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_memory_response_content.py @@ -2,28 +2,12 @@ import pytest -from app.agents.new_chat.tools.update_memory import _save_memory +from app.services.memory import MemoryScope, save_memory from app.utils.content_utils import extract_text_content pytestmark = pytest.mark.unit -class _Recorder: - def __init__(self) -> None: - self.applied_content: str | None = None - self.commit_calls = 0 - self.rollback_calls = 0 - - def apply(self, content: str) -> None: - self.applied_content = content - - async def commit(self) -> None: - self.commit_calls += 1 - - async def rollback(self) -> None: - self.rollback_calls += 1 - - def test_extract_text_content_keeps_no_update_bare_string_from_content_blocks() -> None: content = [ {"type": "thinking", "thinking": "No"}, @@ -69,21 +53,12 @@ def test_extract_text_content_preserves_plain_string_responses() -> None: @pytest.mark.asyncio async def test_save_memory_rejects_non_string_payload_before_commit() -> None: - recorder = _Recorder() - - result = await _save_memory( - updated_memory=["NO_UPDATE"], # type: ignore[arg-type] - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="memory", - scope="user", + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content=["NO_UPDATE"], # type: ignore[arg-type] + session=None, # type: ignore[arg-type] ) - assert result["status"] == "error" - assert "must be a string" in result["message"] - assert recorder.applied_content is None - assert recorder.commit_calls == 0 - assert recorder.rollback_calls == 0 + assert result.status == "error" + assert "must be a string" in result.message diff --git a/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py b/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py index 4cf53969d..c3de15c58 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py +++ b/surfsense_backend/tests/unit/agents/new_chat/test_prompt_caching.py @@ -12,13 +12,19 @@ prompt caching. It mutates ``llm.model_kwargs`` so the kwargs flow to the deepagent stack accumulates multiple ``SystemMessage``\ s in ``state["messages"]`` and ``role: system`` would tag every one of them, blowing past Anthropic's 4-block ``cache_control`` cap. -2. Adds ``prompt_cache_key``/``prompt_cache_retention`` only for - single-model OPENAI/DEEPSEEK/XAI configs (where OpenAI's automatic - prompt-cache surface is available). -3. Treats ``ChatLiteLLMRouter`` (auto-mode) as universal-only — no - OpenAI-only kwargs because the router fans out across providers. -4. Idempotent: user-supplied values in ``model_kwargs`` are preserved. -5. Defensive: LLMs without a writable ``model_kwargs`` are silently +2. Adds ``prompt_cache_key`` for OPENAI/DEEPSEEK/XAI/AZURE/AZURE_OPENAI + configs (Microsoft's Azure transformer was added to LiteLLM in + https://github.com/BerriAI/litellm/pull/20989, Feb 2026). +3. Adds ``prompt_cache_retention="24h"`` ONLY for OPENAI/DEEPSEEK/XAI. + Azure's server-side support landed in Microsoft's docs on 2026-05-13 + but LiteLLM 1.83.14 hasn't wired it through yet, so we let Azure use + its default in-memory retention rather than send a param that + ``litellm.drop_params`` would silently strip. +4. Treats ``ChatLiteLLMRouter`` (auto-mode) as universal-only — no + destination-specific kwargs because the router fans out across + providers. +5. Idempotent: user-supplied values in ``model_kwargs`` are preserved. +6. Defensive: LLMs without a writable ``model_kwargs`` are silently skipped rather than raising. """ @@ -191,9 +197,9 @@ def test_does_not_overwrite_user_supplied_prompt_cache_key() -> None: @pytest.mark.parametrize("provider", ["OPENAI", "DEEPSEEK", "XAI"]) def test_sets_openai_family_extras(provider: str) -> None: - """OpenAI-style providers gain ``prompt_cache_key`` (raises hit rate - via routing affinity) and ``prompt_cache_retention="24h"`` (extends - cache TTL beyond the default 5-10 min).""" + """Native OpenAI-style providers gain ``prompt_cache_key`` (raises + hit rate via routing affinity) and ``prompt_cache_retention="24h"`` + (extends cache TTL beyond the default 5-10 min).""" cfg = _make_cfg(provider=provider) llm = _FakeLLM() @@ -203,6 +209,27 @@ def test_sets_openai_family_extras(provider: str) -> None: assert llm.model_kwargs["prompt_cache_retention"] == "24h" +@pytest.mark.parametrize("provider", ["AZURE", "AZURE_OPENAI"]) +def test_azure_gets_prompt_cache_key_only(provider: str) -> None: + """Azure configs gain ``prompt_cache_key`` for routing affinity + (Microsoft auto-caches every GPT-4o+ deployment at ≥1024 tokens; + the key clusters same-prefix requests on the same backend GPU pool + so hit rate climbs). They DO NOT get ``prompt_cache_retention`` + because LiteLLM 1.83.14's Azure transformer omits it from its + supported params list — ``drop_params`` would silently strip it. + Azure's default in-memory retention (5-10 min, max 1 h) is already + enough to cover intra-conversation turns; revisit when LiteLLM + bumps Azure to match its OpenAI surface.""" + cfg = _make_cfg(provider=provider, model_name="gpt-5.4") + llm = _FakeLLM(model="azure/gpt-5.4") + + apply_litellm_prompt_caching(llm, agent_config=cfg, thread_id=42) + + assert llm.model_kwargs["prompt_cache_key"] == "surfsense-thread-42" + assert "prompt_cache_retention" not in llm.model_kwargs + assert "cache_control_injection_points" in llm.model_kwargs + + def test_skips_prompt_cache_key_when_no_thread_id() -> None: """Without a thread id we can't construct a per-thread key. Retention is still useful so we set it (it's free).""" @@ -215,12 +242,26 @@ def test_skips_prompt_cache_key_when_no_thread_id() -> None: assert llm.model_kwargs["prompt_cache_retention"] == "24h" +def test_azure_skips_prompt_cache_key_when_no_thread_id() -> None: + """Azure without a thread id ends up with no extras (retention is + Azure-skipped, key needs a thread id) — universal injection points + still land.""" + cfg = _make_cfg(provider="AZURE", model_name="gpt-5.4") + llm = _FakeLLM(model="azure/gpt-5.4") + + apply_litellm_prompt_caching(llm, agent_config=cfg, thread_id=None) + + assert "prompt_cache_key" not in llm.model_kwargs + assert "prompt_cache_retention" not in llm.model_kwargs + assert "cache_control_injection_points" in llm.model_kwargs + + @pytest.mark.parametrize( "provider", ["ANTHROPIC", "BEDROCK", "VERTEX_AI", "GOOGLE_AI_STUDIO", "GROQ", "MOONSHOT"], ) def test_no_openai_extras_for_other_providers(provider: str) -> None: - """Non-OpenAI-family providers don't expose ``prompt_cache_key`` — + """Non-OpenAI-style providers don't expose ``prompt_cache_key`` — skip it. ``cache_control_injection_points`` is still set (universal).""" cfg = _make_cfg(provider=provider) llm = _FakeLLM() diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py new file mode 100644 index 000000000..bae97ba9f --- /dev/null +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_mcp_tools_cache.py @@ -0,0 +1,130 @@ +"""Unit tests for ``mcp_tools_cache``.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from types import SimpleNamespace + +import pytest + +from app.agents.new_chat.tools.mcp_tools_cache import ( + CachedMCPToolDef, + CachedMCPTools, + read_cached_tools, +) + +pytestmark = pytest.mark.unit + + +def _make_connector(config: dict | None) -> SimpleNamespace: + return SimpleNamespace(id=42, config=config) + + +def test_read_returns_none_when_config_is_none() -> None: + assert read_cached_tools(_make_connector(None)) is None + + +def test_read_returns_none_when_cached_tools_missing() -> None: + assert read_cached_tools(_make_connector({"server_config": {}})) is None + + +def test_read_returns_none_when_cached_tools_is_not_a_dict() -> None: + assert read_cached_tools(_make_connector({"cached_tools": []})) is None + assert read_cached_tools(_make_connector({"cached_tools": "stale"})) is None + + +def test_read_parses_minimal_valid_payload() -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "2026-05-20T10:00:00+00:00", + "tools": [ + { + "name": "list_issues", + "description": "List Linear issues", + "input_schema": {"type": "object"}, + } + ], + } + } + ) + ) + assert parsed is not None + assert parsed.server_version is None + assert parsed.server_name is None + assert parsed.transport is None + assert len(parsed.tools) == 1 + assert parsed.tools[0].name == "list_issues" + + +def test_read_parses_full_payload_with_serverinfo() -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "2026-05-20T10:00:00+00:00", + "server_version": "1.2.3", + "server_name": "atlassian-mcp", + "transport": "streamable-http", + "tools": [ + {"name": "create_issue", "input_schema": {}}, + {"name": "list_issues", "input_schema": {}}, + ], + } + } + ) + ) + assert parsed is not None + assert parsed.server_version == "1.2.3" + assert parsed.server_name == "atlassian-mcp" + assert parsed.transport == "streamable-http" + assert [t.name for t in parsed.tools] == ["create_issue", "list_issues"] + + +def test_read_returns_none_for_corrupt_payload(caplog) -> None: + parsed = read_cached_tools( + _make_connector( + { + "cached_tools": { + "discovered_at": "not-a-date", + "tools": "should-be-a-list", + } + } + ) + ) + assert parsed is None + assert any("corrupt cached_tools" in r.getMessage() for r in caplog.records) + + +def test_read_returns_none_when_tools_missing() -> None: + parsed = read_cached_tools( + _make_connector( + {"cached_tools": {"discovered_at": "2026-05-20T10:00:00+00:00"}} + ) + ) + assert parsed is None + + +def test_tool_def_defaults_description_and_schema() -> None: + td = CachedMCPToolDef.model_validate({"name": "ping"}) + assert td.description == "" + assert td.input_schema == {} + + +def test_model_dump_json_mode_is_round_trippable() -> None: + original = CachedMCPTools( + discovered_at=datetime(2026, 5, 20, 10, 0, 0, tzinfo=UTC), + server_version="1.2.3", + server_name="atlassian-mcp", + transport="streamable-http", + tools=[CachedMCPToolDef(name="list_issues")], + ) + payload = original.model_dump(mode="json") + + assert payload["discovered_at"] == "2026-05-20T10:00:00Z" + assert payload["tools"][0]["name"] == "list_issues" + + reparsed = CachedMCPTools.model_validate(payload) + assert reparsed.discovered_at == original.discovered_at + assert reparsed.tools[0].name == "list_issues" diff --git a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py index f7fbacf50..c941d7d65 100644 --- a/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py +++ b/surfsense_backend/tests/unit/agents/new_chat/tools/test_update_memory_scope.py @@ -1,24 +1,24 @@ -"""Unit tests for memory scope validation and bullet format validation.""" +"""Unit tests for heading-based memory validation.""" import pytest -from app.agents.new_chat.tools.update_memory import ( - _save_memory, - _validate_bullet_format, - _validate_memory_scope, +from app.services.memory import MemoryScope, save_memory +from app.services.memory.validation import ( + validate_bullet_format, + validate_memory_scope, ) pytestmark = pytest.mark.unit -class _Recorder: +class _FakeSession: def __init__(self) -> None: - self.applied_content: str | None = None + self.added = [] self.commit_calls = 0 self.rollback_calls = 0 - def apply(self, content: str) -> None: - self.applied_content = content + def add(self, obj) -> None: + self.added.append(obj) async def commit(self) -> None: self.commit_calls += 1 @@ -27,172 +27,148 @@ class _Recorder: self.rollback_calls += 1 -# --------------------------------------------------------------------------- -# _validate_memory_scope — marker-based -# --------------------------------------------------------------------------- - - -def test_validate_memory_scope_rejects_pref_marker_in_team_scope() -> None: - content = "- (2026-04-10) [pref] Prefers dark mode\n" - result = _validate_memory_scope(content, "team") +def test_validate_memory_scope_rejects_new_personal_heading_in_team() -> None: + content = "## Preferences\n- 2026-04-10: Prefers dark mode\n" + result, _warnings = validate_memory_scope(content, "team") assert result is not None assert result["status"] == "error" - assert "[pref]" in result["message"] + assert "preferences" in result["message"] -def test_validate_memory_scope_rejects_instr_marker_in_team_scope() -> None: - content = "- (2026-04-10) [instr] Always respond in Spanish\n" - result = _validate_memory_scope(content, "team") - assert result is not None - assert result["status"] == "error" - assert "[instr]" in result["message"] +def test_validate_memory_scope_allows_old_marker_payload_in_team_scope() -> None: + content = "- (2026-04-10) [pref] Legacy personal marker remains readable\n" + result, _warnings = validate_memory_scope(content, "team") + assert result is None -def test_validate_memory_scope_rejects_both_personal_markers_in_team() -> None: +def test_validate_memory_scope_allows_team_headings() -> None: + content = "## Engineering Conventions\n- 2026-04-10: Uses PostgreSQL\n" + result, _warnings = validate_memory_scope(content, "team") + assert result is None + + +def test_validate_bullet_format_accepts_new_and_legacy_bullets() -> None: content = ( - "- (2026-04-10) [pref] Prefers dark mode\n" - "- (2026-04-10) [instr] Always respond in Spanish\n" + "## Facts\n" + "- 2026-04-10: Senior Python developer\n" + "- (2026-04-10) [fact] Legacy fact is preserved\n" ) - result = _validate_memory_scope(content, "team") - assert result is not None - assert result["status"] == "error" - assert "[instr]" in result["message"] - assert "[pref]" in result["message"] - - -def test_validate_memory_scope_allows_fact_in_team_scope() -> None: - content = "- (2026-04-10) [fact] Office is in downtown Seattle\n" - result = _validate_memory_scope(content, "team") - assert result is None - - -def test_validate_memory_scope_allows_all_markers_in_user_scope() -> None: - content = ( - "- (2026-04-10) [fact] Python developer\n" - "- (2026-04-10) [pref] Prefers concise answers\n" - "- (2026-04-10) [instr] Always use bullet points\n" - ) - result = _validate_memory_scope(content, "user") - assert result is None - - -def test_validate_memory_scope_allows_any_heading_in_team() -> None: - content = "## Architecture\n- (2026-04-10) [fact] Uses PostgreSQL for persistence\n" - result = _validate_memory_scope(content, "team") - assert result is None - - -def test_validate_memory_scope_allows_any_heading_in_user() -> None: - content = "## My Projects\n- (2026-04-10) [fact] Working on SurfSense\n" - result = _validate_memory_scope(content, "user") - assert result is None - - -# --------------------------------------------------------------------------- -# _validate_bullet_format -# --------------------------------------------------------------------------- - - -def test_validate_bullet_format_passes_valid_bullets() -> None: - content = ( - "## Work\n" - "- (2026-04-10) [fact] Senior Python developer\n" - "- (2026-04-10) [pref] Prefers dark mode\n" - "- (2026-04-10) [instr] Always respond in bullet points\n" - ) - warnings = _validate_bullet_format(content) + warnings = validate_bullet_format(content) assert warnings == [] -def test_validate_bullet_format_warns_on_missing_marker() -> None: - content = "- (2026-04-10) Senior Python developer\n" - warnings = _validate_bullet_format(content) +def test_validate_bullet_format_warns_on_nonstandard_bullet() -> None: + content = "## Facts\n- Senior Python developer\n" + warnings = validate_bullet_format(content) assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_warns_on_missing_date() -> None: - content = "- [fact] Senior Python developer\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_warns_on_unknown_marker() -> None: - content = "- (2026-04-10) [context] Working on project X\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - assert "Malformed bullet" in warnings[0] - - -def test_validate_bullet_format_ignores_non_bullet_lines() -> None: - content = "## Some Heading\nSome paragraph text\n" - warnings = _validate_bullet_format(content) - assert warnings == [] - - -def test_validate_bullet_format_warns_on_old_format_without_marker() -> None: - content = "## About the user\n- (2026-04-10) Likes cats\n" - warnings = _validate_bullet_format(content) - assert len(warnings) == 1 - - -# --------------------------------------------------------------------------- -# _save_memory — end-to-end with marker scope check -# --------------------------------------------------------------------------- + assert "Non-standard memory bullet" in warnings[0] @pytest.mark.asyncio -async def test_save_memory_blocks_pref_in_team_before_commit() -> None: - recorder = _Recorder() - result = await _save_memory( - updated_memory="- (2026-04-10) [pref] Prefers dark mode\n", - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="team memory", - scope="team", +async def test_save_memory_normalizes_legacy_marker_bullets(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="- (2026-04-10) [fact] Legacy fact is preserved\n", + session=session, ) - assert result["status"] == "error" - assert recorder.commit_calls == 0 - assert recorder.applied_content is None + + assert result.status == "saved" + assert target.memory_md == "## Memory\n- 2026-04-10: Legacy fact is preserved" @pytest.mark.asyncio -async def test_save_memory_allows_fact_in_team_and_commits() -> None: - recorder = _Recorder() - content = "- (2026-04-10) [fact] Weekly standup on Mondays\n" - result = await _save_memory( - updated_memory=content, - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="team memory", - scope="team", +async def test_save_memory_blocks_new_personal_heading_in_team_before_commit( + monkeypatch, +) -> None: + target = type("Target", (), {"shared_memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content="## Preferences\n- 2026-04-10: Prefers dark mode\n", + session=session, ) - assert result["status"] == "saved" - assert recorder.commit_calls == 1 - assert recorder.applied_content == content + assert result.status == "error" + assert session.commit_calls == 0 + assert target.shared_memory_md == "" @pytest.mark.asyncio -async def test_save_memory_includes_format_warnings() -> None: - recorder = _Recorder() - content = "- (2026-04-10) Missing marker text\n" - result = await _save_memory( - updated_memory=content, - old_memory=None, - llm=None, - apply_fn=recorder.apply, - commit_fn=recorder.commit, - rollback_fn=recorder.rollback, - label="memory", - scope="user", +async def test_save_memory_allows_grandfathered_personal_heading_in_team( + monkeypatch, +) -> None: + content = "## Preferences\n- 2026-04-10: Prefers dark mode\n" + target = type("Target", (), {"shared_memory_md": content})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content=content, + session=session, ) - assert result["status"] == "saved" - assert "format_warnings" in result - assert len(result["format_warnings"]) == 1 + assert result.status == "saved" + assert session.commit_calls == 1 + assert target.shared_memory_md == content.strip() + assert result.warnings + + +@pytest.mark.asyncio +async def test_save_memory_strips_preamble_before_heading(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="Sure, here is the update:\n\n## Facts\n- 2026-04-10: Likes cats\n", + session=session, + ) + assert result.status == "saved" + assert target.memory_md == "## Facts\n- 2026-04-10: Likes cats" + + +@pytest.mark.asyncio +async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: + target = type("Target", (), {"memory_md": ""})() + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="NO_UPDATE because there is nothing durable to remember.", + session=session, + ) + assert result.status == "error" + assert "## heading" in result.message + assert session.commit_calls == 0 diff --git a/surfsense_backend/tests/unit/services/test_memory_service.py b/surfsense_backend/tests/unit/services/test_memory_service.py new file mode 100644 index 000000000..820e6aa28 --- /dev/null +++ b/surfsense_backend/tests/unit/services/test_memory_service.py @@ -0,0 +1,187 @@ +"""Unit tests for the first-class memory service.""" + +from types import SimpleNamespace + +import pytest + +from app.services.memory import ( + MemoryScope, + reset_memory, + save_memory, +) + +pytestmark = pytest.mark.unit + + +class _FakeSession: + def __init__(self) -> None: + self.commit_calls = 0 + self.rollback_calls = 0 + self.added = [] + + def add(self, obj) -> None: + self.added.append(obj) + + async def commit(self) -> None: + self.commit_calls += 1 + + async def rollback(self) -> None: + self.rollback_calls += 1 + + +@pytest.mark.asyncio +async def test_save_memory_saves_heading_based_memory(monkeypatch) -> None: + target = SimpleNamespace(memory_md="") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="## Facts\n- 2026-05-19: Anish works on SurfSense\n", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md.startswith("## Facts") + assert session.commit_calls == 1 + + +@pytest.mark.asyncio +async def test_save_memory_accepts_legacy_marker_payload(monkeypatch) -> None: + target = SimpleNamespace(memory_md="") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="- (2026-05-19) [fact] Legacy marker memory\n", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md == "## Memory\n- 2026-05-19: Legacy marker memory" + + +@pytest.mark.asyncio +async def test_save_memory_rejects_long_no_heading_payload(monkeypatch) -> None: + target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="reasoning text before NO_UPDATE should not become saved memory", + session=session, + ) + + assert result.status == "error" + assert session.commit_calls == 0 + assert target.memory_md.startswith("## Facts") + + +@pytest.mark.asyncio +async def test_save_memory_no_update_sentinel_is_no_op(monkeypatch) -> None: + existing = "## Preferences\n- 2026-05-20: Existing preference\n" + target = SimpleNamespace(memory_md=existing) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content="NO_UPDATE", + session=session, + ) + + assert result.status == "no_op" + assert result.memory_md == existing + assert target.memory_md == existing + assert session.commit_calls == 0 + + +@pytest.mark.asyncio +async def test_save_memory_no_update_sentinel_is_case_insensitive(monkeypatch) -> None: + existing = "## Preferences\n- 2026-05-20: Existing preference\n" + target = SimpleNamespace(memory_md=existing) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + content=" no update ", + session=session, + ) + + assert result.status == "no_op" + assert result.memory_md == existing + assert target.memory_md == existing + assert session.commit_calls == 0 + + +@pytest.mark.asyncio +async def test_save_memory_grandfathers_existing_team_personal_heading( + monkeypatch, +) -> None: + content = "## Preferences\n- 2026-05-19: Existing legacy heading\n" + target = SimpleNamespace(shared_memory_md=content) + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await save_memory( + scope=MemoryScope.TEAM, + target_id=1, + content=content, + session=session, + ) + + assert result.status == "saved" + assert result.warnings + assert session.commit_calls == 1 + + +@pytest.mark.asyncio +async def test_reset_memory_clears_memory(monkeypatch) -> None: + target = SimpleNamespace(memory_md="## Facts\n- 2026-05-19: Existing\n") + session = _FakeSession() + + async def fake_load_target(**_kwargs): + return target + + monkeypatch.setattr("app.services.memory.service._load_target", fake_load_target) + + result = await reset_memory( + scope=MemoryScope.USER, + target_id="00000000-0000-0000-0000-000000000000", + session=session, + ) + + assert result.status == "saved" + assert target.memory_md == "" diff --git a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py index c0123b76d..c53dad5fb 100644 --- a/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py +++ b/surfsense_backend/tests/unit/tasks/chat/streaming/test_stream_output.py @@ -89,7 +89,6 @@ async def test_stream_output_emits_text_lifecycle_and_updates_result() -> None: "text_end:text-1", ] assert result.accumulated_text == "Hello world" - assert result.agent_called_update_memory is False async def test_stream_output_passes_runtime_context_to_agent() -> None: diff --git a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py index 208204ca9..19b06201f 100644 --- a/surfsense_backend/tests/unit/test_stream_new_chat_contract.py +++ b/surfsense_backend/tests/unit/test_stream_new_chat_contract.py @@ -209,128 +209,6 @@ def test_stream_exception_classifies_openrouter_429_payload(): assert extra is None -@pytest.mark.asyncio -async def test_preflight_swallows_non_rate_limit_errors_and_re_raises_429(monkeypatch): - """``_preflight_llm`` is best-effort. - - - On rate-limit shaped exceptions (provider 429) it MUST re-raise so the - caller can drive the cooldown/repin branch. - - On any other transient failure it MUST swallow the error so the normal - stream path continues without surfacing preflight noise to the user. - """ - from types import SimpleNamespace - - from app.tasks.chat.stream_new_chat import _preflight_llm - - class _RateLimitedError(Exception): - """Class-name carries 'RateLimit' so _is_provider_rate_limited triggers.""" - - rate_calls: list[dict] = [] - other_calls: list[dict] = [] - - async def _fake_acompletion_429(**kwargs): - rate_calls.append(kwargs) - raise _RateLimitedError("simulated 429") - - async def _fake_acompletion_other(**kwargs): - other_calls.append(kwargs) - raise RuntimeError("some unrelated transient failure") - - fake_llm = SimpleNamespace( - model="openrouter/google/gemma-4-31b-it:free", - api_key="test", - api_base=None, - ) - - import litellm # type: ignore[import-not-found] - - monkeypatch.setattr(litellm, "acompletion", _fake_acompletion_429) - with pytest.raises(_RateLimitedError): - await _preflight_llm(fake_llm) - assert len(rate_calls) == 1 - assert rate_calls[0]["max_tokens"] == 1 - assert rate_calls[0]["stream"] is False - - monkeypatch.setattr(litellm, "acompletion", _fake_acompletion_other) - # MUST NOT raise: non-rate-limit failures are swallowed. - await _preflight_llm(fake_llm) - assert len(other_calls) == 1 - - -@pytest.mark.asyncio -async def test_preflight_skipped_for_auto_router_model(): - """Router-mode ``model='auto'`` has no single deployment to ping; the - LiteLLM router itself owns per-deployment rate-limit accounting, so the - preflight helper must short-circuit instead of issuing a probe.""" - from types import SimpleNamespace - - from app.tasks.chat.stream_new_chat import _preflight_llm - - fake_llm = SimpleNamespace(model="auto", api_key="x", api_base=None) - # Should return without raising or making any LiteLLM call. - await _preflight_llm(fake_llm) - - -@pytest.mark.asyncio -async def test_settle_speculative_agent_build_swallows_exceptions(): - """``_settle_speculative_agent_build`` MUST always return cleanly so the - caller can safely re-touch the request-scoped session afterwards. - - The helper guards the parallel preflight + agent-build path: when the - speculative build is being discarded (429 or non-429 preflight failure) - we await it solely to release any in-flight ``AsyncSession`` usage — - the build's outcome is irrelevant. Any exception (including - ``CancelledError``) leaking out would skip the caller's recovery flow - and re-introduce the very session-concurrency hazard the helper exists - to prevent. - """ - import asyncio - - from app.tasks.chat.stream_new_chat import _settle_speculative_agent_build - - async def _raises() -> None: - raise RuntimeError("speculative build crashed") - - async def _succeeds() -> str: - return "agent" - - async def _slow() -> None: - await asyncio.sleep(0.05) - - for coro in (_raises(), _succeeds(), _slow()): - task = asyncio.create_task(coro) - await _settle_speculative_agent_build(task) - assert task.done() - - -@pytest.mark.asyncio -async def test_settle_speculative_agent_build_handles_already_done_task(): - """Done tasks (success or failure) must still be settled without raising.""" - import asyncio - - from app.tasks.chat.stream_new_chat import _settle_speculative_agent_build - - async def _ok() -> str: - return "ok" - - async def _bad() -> None: - raise ValueError("nope") - - ok_task = asyncio.create_task(_ok()) - bad_task = asyncio.create_task(_bad()) - # Drive both to completion before settling. - await asyncio.sleep(0) - await asyncio.sleep(0) - - await _settle_speculative_agent_build(ok_task) - await _settle_speculative_agent_build(bad_task) - assert ok_task.result() == "ok" - # ``bad_task`` exception was consumed by the settle helper; calling - # ``.exception()`` after the fact must still return the original error - # (the helper observes it but doesn't clear it). - assert isinstance(bad_task.exception(), ValueError) - - def test_stream_exception_classifies_thread_busy(): exc = BusyError(request_id="thread-123") kind, code, severity, is_expected, user_message, extra = _classify_stream_exception( diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock index c4e6b5c89..953aebbef 100644 --- a/surfsense_backend/uv.lock +++ b/surfsense_backend/uv.lock @@ -7947,7 +7947,7 @@ wheels = [ [[package]] name = "surf-new-backend" -version = "0.0.24" +version = "0.0.25" source = { editable = "." } dependencies = [ { name = "alembic" }, diff --git a/surfsense_browser_extension/package.json b/surfsense_browser_extension/package.json index 028e653b3..2f17899a8 100644 --- a/surfsense_browser_extension/package.json +++ b/surfsense_browser_extension/package.json @@ -1,7 +1,7 @@ { "name": "surfsense_browser_extension", "displayName": "Surfsense Browser Extension", - "version": "0.0.24", + "version": "0.0.25", "description": "Extension to collect Browsing History for SurfSense.", "author": "https://github.com/MODSetter", "engines": { diff --git a/surfsense_desktop/package.json b/surfsense_desktop/package.json index 68032e9f4..0ad279ece 100644 --- a/surfsense_desktop/package.json +++ b/surfsense_desktop/package.json @@ -1,6 +1,6 @@ { "name": "surfsense-desktop", - "version": "0.0.24", + "version": "0.0.25", "description": "SurfSense Desktop App", "main": "dist/main.js", "scripts": { diff --git a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx index 581bfe17f..7e703a70c 100644 --- a/surfsense_web/app/(home)/login/GoogleLoginButton.tsx +++ b/surfsense_web/app/(home)/login/GoogleLoginButton.tsx @@ -5,6 +5,7 @@ import { Logo } from "@/components/Logo"; import { Button } from "@/components/ui/button"; import { trackLoginAttempt } from "@/lib/posthog/events"; import { AmbientBackground } from "./AmbientBackground"; +import { BACKEND_URL } from "@/lib/env-config"; function GoogleGLogo({ className }: { className?: string }) { return ( @@ -50,7 +51,7 @@ export function GoogleLoginButton() { // cross-origin fetch requests may not be sent on subsequent redirects. // The authorize-redirect endpoint does a server-side redirect to Google // and sets the CSRF cookie properly for same-site context. - window.location.href = `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/auth/google/authorize-redirect`; + window.location.href = `${BACKEND_URL}/auth/google/authorize-redirect`; }; return (
diff --git a/surfsense_web/app/api/zero/query/route.ts b/surfsense_web/app/api/zero/query/route.ts index a91edcd6f..8caac9cd4 100644 --- a/surfsense_web/app/api/zero/query/route.ts +++ b/surfsense_web/app/api/zero/query/route.ts @@ -4,11 +4,9 @@ import { NextResponse } from "next/server"; import type { Context } from "@/types/zero"; import { queries } from "@/zero/queries"; import { schema } from "@/zero/schema"; +import { BACKEND_URL } from "@/lib/env-config"; -const backendURL = - process.env.FASTAPI_BACKEND_INTERNAL_URL || - process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || - "http://localhost:8000"; +const backendURL = BACKEND_URL; async function authenticateRequest( request: Request diff --git a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx index d439cbb67..ecd5ab6b1 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/new-chat/[[...chat_id]]/page.tsx @@ -118,7 +118,7 @@ import { trackChatResponseReceived, } from "@/lib/posthog/events"; import Loading from "../loading"; - +import { BACKEND_URL } from "@/lib/env-config"; const MobileEditorPanel = dynamic( () => import("@/components/editor-panel/editor-panel").then((m) => ({ @@ -777,7 +777,7 @@ export default function NewChatPage() { if (threadId) { const token = getBearerToken(); if (token) { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; try { const response = await fetch( `${backendUrl}/api/v1/threads/${threadId}/cancel-active-turn`, @@ -978,7 +978,7 @@ export default function NewChatPage() { let streamBatcher: FrameBatchedUpdater | null = null; try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const selection = await getAgentFilesystemSelection(searchSpaceId, { localFilesystemEnabled, }); @@ -1520,7 +1520,7 @@ export default function NewChatPage() { } try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const selection = await getAgentFilesystemSelection(searchSpaceId, { localFilesystemEnabled, }); diff --git a/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/layout-shell.tsx b/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/layout-shell.tsx index 96d77d131..22f68edab 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/layout-shell.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/layout-shell.tsx @@ -3,7 +3,6 @@ import { BookText, Bot, - Brain, CircleUser, Earth, ImageIcon, @@ -27,7 +26,6 @@ export type SearchSpaceSettingsTab = | "vision-models" | "team-roles" | "prompts" - | "team-memory" | "public-links"; const DEFAULT_TAB: SearchSpaceSettingsTab = "general"; @@ -89,11 +87,6 @@ export function SearchSpaceSettingsLayoutShell({ label: t("nav_system_instructions"), icon: , }, - { - value: "team-memory" as const, - label: "Team Memory", - icon: , - }, { value: "public-links" as const, label: t("nav_public_links"), diff --git a/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/team-memory/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/team-memory/page.tsx deleted file mode 100644 index 0652b012e..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/search-space-settings/team-memory/page.tsx +++ /dev/null @@ -1,6 +0,0 @@ -import { TeamMemoryManager } from "@/components/settings/team-memory-manager"; - -export default async function Page({ params }: { params: Promise<{ search_space_id: string }> }) { - const { search_space_id } = await params; - return ; -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx deleted file mode 100644 index 3542f0925..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/MemoryContent.tsx +++ /dev/null @@ -1,293 +0,0 @@ -"use client"; - -import { useAtomValue } from "jotai"; -import { ArrowUp, ChevronDown, ClipboardCopy, Download, Info, Pencil } from "lucide-react"; -import { useCallback, useEffect, useRef, useState } from "react"; -import { toast } from "sonner"; -import { z } from "zod"; -import { activeSearchSpaceIdAtom } from "@/atoms/search-spaces/search-space-query.atoms"; -import { PlateEditor } from "@/components/editor/plate-editor"; -import { Alert, AlertDescription } from "@/components/ui/alert"; -import { Button } from "@/components/ui/button"; -import { - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, -} from "@/components/ui/dropdown-menu"; -import { Spinner } from "@/components/ui/spinner"; - -import { baseApiService } from "@/lib/apis/base-api.service"; - -const MEMORY_HARD_LIMIT = 25_000; - -const MemoryReadSchema = z.object({ - memory_md: z.string(), -}); - -export function MemoryContent() { - const activeSearchSpaceId = useAtomValue(activeSearchSpaceIdAtom); - const [memory, setMemory] = useState(""); - const [loading, setLoading] = useState(true); - const [saving, setSaving] = useState(false); - const [editQuery, setEditQuery] = useState(""); - const [editing, setEditing] = useState(false); - const [showInput, setShowInput] = useState(false); - const textareaRef = useRef(null); - const inputContainerRef = useRef(null); - - const fetchMemory = useCallback(async () => { - try { - setLoading(true); - const data = await baseApiService.get("/api/v1/users/me/memory", MemoryReadSchema); - setMemory(data.memory_md); - } catch { - toast.error("Failed to load memory"); - } finally { - setLoading(false); - } - }, []); - - useEffect(() => { - fetchMemory(); - }, [fetchMemory]); - - useEffect(() => { - if (!showInput) return; - - const handlePointerDownOutside = (event: MouseEvent | TouchEvent) => { - const target = event.target; - if (!(target instanceof Node)) return; - if (inputContainerRef.current?.contains(target)) return; - - setShowInput(false); - }; - - document.addEventListener("mousedown", handlePointerDownOutside); - document.addEventListener("touchstart", handlePointerDownOutside, { passive: true }); - - return () => { - document.removeEventListener("mousedown", handlePointerDownOutside); - document.removeEventListener("touchstart", handlePointerDownOutside); - }; - }, [showInput]); - - const handleClear = async () => { - try { - setSaving(true); - const data = await baseApiService.put("/api/v1/users/me/memory", MemoryReadSchema, { - body: { memory_md: "" }, - }); - setMemory(data.memory_md); - toast.success("Memory cleared"); - } catch { - toast.error("Failed to clear memory"); - } finally { - setSaving(false); - } - }; - - const handleEdit = async () => { - const query = editQuery.trim(); - if (!query) return; - - try { - setEditing(true); - const data = await baseApiService.post("/api/v1/users/me/memory/edit", MemoryReadSchema, { - body: { query, search_space_id: Number(activeSearchSpaceId) }, - }); - setMemory(data.memory_md); - setEditQuery(""); - setShowInput(false); - toast.success("Memory updated"); - } catch { - toast.error("Failed to edit memory"); - } finally { - setEditing(false); - } - }; - - const openInput = () => { - setShowInput(true); - requestAnimationFrame(() => textareaRef.current?.focus()); - }; - - const handleDownload = () => { - if (!memory) return; - try { - const blob = new Blob([memory], { type: "text/markdown;charset=utf-8" }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - a.download = "personal-memory.md"; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - } catch { - toast.error("Failed to download memory"); - } - }; - - const handleCopyMarkdown = async () => { - if (!memory) return; - try { - await navigator.clipboard.writeText(memory); - toast.success("Copied to clipboard"); - } catch { - toast.error("Failed to copy memory"); - } - }; - - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - handleEdit(); - } - }; - - const displayMemory = memory.replace(/\(\d{4}-\d{2}-\d{2}\)\s*\[(fact|pref|instr)\]\s*/g, ""); - const charCount = memory.length; - - const getCounterColor = () => { - if (charCount > MEMORY_HARD_LIMIT) return "text-red-500"; - if (charCount > 15_000) return "text-orange-500"; - if (charCount > 10_000) return "text-yellow-500"; - return "text-muted-foreground"; - }; - - if (loading) { - return ( -
- -
- ); - } - - if (!memory) { - return ( -
-

What does SurfSense remember?

-

- Nothing yet. SurfSense picks up on your preferences and context as you chat. -

-
- ); - } - - return ( -
- - - -

- SurfSense uses this personal memory to personalize your responses across all - conversations. -

-
-
- -
-
- -
- - {showInput ? ( -
-
- setEditQuery(e.target.value)} - onKeyDown={handleKeyDown} - placeholder="Tell SurfSense what to remember or forget" - disabled={editing} - className="flex-1 bg-transparent text-sm outline-none placeholder:text-muted-foreground/70" - /> - -
-
- ) : ( - - )} -
- -
- - {charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} - characters - chars - {charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"} - {charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"} - -
- - - - - - - - - Copy as Markdown - - - - Download as Markdown - - - -
-
-
- ); -} diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx index 820021622..037568db3 100644 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx +++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/layout-shell.tsx @@ -1,7 +1,6 @@ "use client"; import { - Brain, CircleUser, Keyboard, KeyRound, @@ -26,7 +25,6 @@ export type UserSettingsTab = | "api-key" | "prompts" | "community-prompts" - | "memory" | "agent-permissions" | "agent-status" | "purchases" @@ -75,11 +73,6 @@ export function UserSettingsLayoutShell({ searchSpaceId, children }: UserSetting label: "Community Prompts", icon: , }, - { - value: "memory" as const, - label: "Memory", - icon: , - }, { value: "agent-permissions" as const, label: "Agent Permissions", diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx deleted file mode 100644 index b10c5bce5..000000000 --- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/memory/page.tsx +++ /dev/null @@ -1,5 +0,0 @@ -import { MemoryContent } from "../components/MemoryContent"; - -export default function Page() { - return ; -} diff --git a/surfsense_web/atoms/editor/editor-panel.atom.ts b/surfsense_web/atoms/editor/editor-panel.atom.ts index 28563e7d3..c302c66ee 100644 --- a/surfsense_web/atoms/editor/editor-panel.atom.ts +++ b/surfsense_web/atoms/editor/editor-panel.atom.ts @@ -3,10 +3,11 @@ import { rightPanelCollapsedAtom, rightPanelTabAtom } from "@/atoms/layout/right interface EditorPanelState { isOpen: boolean; - kind: "document" | "local_file"; + kind: "document" | "local_file" | "memory"; documentId: number | null; localFilePath: string | null; searchSpaceId: number | null; + memoryScope: "user" | "team" | null; title: string | null; } @@ -16,6 +17,7 @@ const initialState: EditorPanelState = { documentId: null, localFilePath: null, searchSpaceId: null, + memoryScope: null, title: null, }; @@ -38,6 +40,12 @@ export const openEditorPanelAtom = atom( title?: string; searchSpaceId?: number; } + | { + kind: "memory"; + memoryScope: "user" | "team"; + title?: string; + searchSpaceId?: number; + } ) => { if (!get(editorPanelAtom).isOpen) { set(preEditorCollapsedAtom, get(rightPanelCollapsedAtom)); @@ -49,6 +57,21 @@ export const openEditorPanelAtom = atom( documentId: null, localFilePath: payload.localFilePath, searchSpaceId: payload.searchSpaceId ?? null, + memoryScope: null, + title: payload.title ?? null, + }); + set(rightPanelTabAtom, "editor"); + set(rightPanelCollapsedAtom, false); + return; + } + if (payload.kind === "memory") { + set(editorPanelAtom, { + isOpen: true, + kind: "memory", + documentId: null, + localFilePath: null, + searchSpaceId: payload.searchSpaceId ?? null, + memoryScope: payload.memoryScope, title: payload.title ?? null, }); set(rightPanelTabAtom, "editor"); @@ -61,6 +84,7 @@ export const openEditorPanelAtom = atom( documentId: payload.documentId, localFilePath: null, searchSpaceId: payload.searchSpaceId, + memoryScope: null, title: payload.title ?? null, }); set(rightPanelTabAtom, "editor"); diff --git a/surfsense_web/components/assistant-ui/assistant-message.tsx b/surfsense_web/components/assistant-ui/assistant-message.tsx index ac1732441..f6c91e8bf 100644 --- a/surfsense_web/components/assistant-ui/assistant-message.tsx +++ b/surfsense_web/components/assistant-ui/assistant-message.tsx @@ -14,6 +14,7 @@ import { ClipboardPaste, CopyIcon, DownloadIcon, + Dot, ExternalLink, Globe, MessageCircleReply, @@ -330,9 +331,14 @@ const MessageInfoDropdown: FC<{ chatTurnId: string | null | undefined }> = ({ ch {icon} {name} - - {counts.total_tokens.toLocaleString()} tokens - {costMicros && costMicros > 0 ? ` · ${formatTurnCost(costMicros)}` : ""} + + {counts.total_tokens.toLocaleString()} tokens + {costMicros && costMicros > 0 ? ( + <> + ); @@ -342,11 +348,14 @@ const MessageInfoDropdown: FC<{ chatTurnId: string | null | undefined }> = ({ ch className="focus:bg-accent focus:text-accent-foreground relative flex cursor-default flex-col items-start gap-0.5 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none" onSelect={(e) => e.preventDefault()} > - - {usage.total_tokens.toLocaleString()} tokens - {usage.cost_micros && usage.cost_micros > 0 - ? ` · ${formatTurnCost(usage.cost_micros)}` - : ""} + + {usage.total_tokens.toLocaleString()} tokens + {usage.cost_micros && usage.cost_micros > 0 ? ( + <> + )} diff --git a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json index b4e85eab0..466446da9 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json +++ b/surfsense_web/components/assistant-ui/connector-popup/config/connector-status-config.json @@ -19,6 +19,11 @@ "enabled": false, "status": "maintenance", "statusMessage": "Rework in progress." + }, + "COMPOSIO_GOOGLE_DRIVE_CONNECTOR": { + "enabled": false, + "status": "maintenance", + "statusMessage": "Temporarily unavailable due to an upstream Composio bug (ComposioHQ/composio#3471) that returns malformed presigned URLs for Drive file downloads. Use the native Google Drive connector in the meantime." } }, "globalSettings": { diff --git a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx index 695e97d7b..01b86a538 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connect-forms/components/obsidian-connect-form.tsx @@ -8,10 +8,12 @@ import { EnumConnectorName } from "@/contracts/enums/connector"; import { useApiKey } from "@/hooks/use-api-key"; import { getConnectorBenefits } from "../connector-benefits"; import type { ConnectFormProps } from "../index"; +import { BACKEND_URL } from "@/lib/env-config"; const PLUGIN_RELEASES_URL = "https://github.com/MODSetter/SurfSense/releases?q=obsidian&expanded=true"; + /** * Obsidian connect form for the plugin-only architecture. * diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx index af818671a..fe6724ed8 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/components/circleback-config.tsx @@ -10,7 +10,7 @@ import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { authenticatedFetch } from "@/lib/auth-utils"; import type { ConnectorConfigProps } from "../index"; - +import { BACKEND_URL } from "@/lib/env-config"; export interface CirclebackConfigProps extends ConnectorConfigProps { onNameChange?: (name: string) => void; } @@ -42,7 +42,7 @@ export const CirclebackConfig: FC = ({ connector, onNameC const doFetch = async () => { if (!connector.search_space_id) return; - const baseUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL; + const baseUrl = BACKEND_URL; if (!baseUrl) { console.error("NEXT_PUBLIC_FASTAPI_BACKEND_URL is not configured"); setIsLoading(false); diff --git a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx index b64f97414..90b28dd1a 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/connector-configs/views/connector-edit-view.tsx @@ -21,7 +21,7 @@ import { getReauthEndpoint, LIVE_CONNECTOR_TYPES } from "../../constants/connect import { getConnectorDisplayName } from "../../tabs/all-connectors-tab"; import { MCPServiceConfig } from "../components/mcp-service-config"; import { getConnectorConfigComponent } from "../index"; - +import { BACKEND_URL } from "@/lib/env-config"; const VISION_LLM_CONNECTOR_TYPES = new Set([ EnumConnectorName.GOOGLE_DRIVE_CONNECTOR, EnumConnectorName.COMPOSIO_GOOGLE_DRIVE_CONNECTOR, @@ -94,7 +94,7 @@ export const ConnectorEditView: FC = ({ if (!spaceId || !reauthEndpoint) return; setReauthing(true); try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const url = new URL(`${backendUrl}${reauthEndpoint}`); url.searchParams.set("connector_id", String(connector.id)); url.searchParams.set("space_id", String(spaceId)); diff --git a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts index b49bfda96..d1d675ad1 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts +++ b/surfsense_web/components/assistant-ui/connector-popup/hooks/use-connector-dialog.ts @@ -43,7 +43,7 @@ import { parseOAuthAuthResponse, validateIndexingConfigState, } from "../constants/connector-popup.schemas"; - +import { BACKEND_URL } from "@/lib/env-config"; const OAUTH_RESULT_COOKIE = "connector_oauth_result"; function readOAuthResultCookie(): string | null { @@ -364,7 +364,7 @@ export const useConnectorDialog = () => { try { // Check if authEndpoint already has query parameters const separator = connector.authEndpoint.includes("?") ? "&" : "?"; - const url = `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`; + const url = `${BACKEND_URL}${connector.authEndpoint}${separator}space_id=${searchSpaceId}`; const response = await authenticatedFetch(url, { method: "GET" }); diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index 65960db22..41dae221e 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -16,7 +16,7 @@ import { cn } from "@/lib/utils"; import { getReauthEndpoint, LIVE_CONNECTOR_TYPES } from "../constants/connector-constants"; import { useConnectorStatus } from "../hooks/use-connector-status"; import { getConnectorDisplayName } from "../tabs/all-connectors-tab"; - +import { BACKEND_URL } from "@/lib/env-config"; interface ConnectorAccountsListViewProps { connectorType: string; connectorTitle: string; @@ -59,7 +59,7 @@ export const ConnectorAccountsListView: FC = ({ if (!searchSpaceId || !endpoint) return; setReauthingId(connector.id); try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; + const backendUrl = BACKEND_URL; const url = new URL(`${backendUrl}${endpoint}`); url.searchParams.set("connector_id", String(connector.id)); url.searchParams.set("space_id", String(searchSpaceId)); diff --git a/surfsense_web/components/documents/DocumentNode.tsx b/surfsense_web/components/documents/DocumentNode.tsx index 0f3cd4a19..a13bd0079 100644 --- a/surfsense_web/components/documents/DocumentNode.tsx +++ b/surfsense_web/components/documents/DocumentNode.tsx @@ -9,6 +9,7 @@ import { MoreHorizontal, Move, Pencil, + RotateCcw, Trash2, } from "lucide-react"; import React, { useCallback, useRef, useState } from "react"; @@ -61,8 +62,13 @@ interface DocumentNodeProps { onEdit: (doc: DocumentNodeDoc) => void; onDelete: (doc: DocumentNodeDoc) => void; onMove: (doc: DocumentNodeDoc) => void; + onReset?: (doc: DocumentNodeDoc) => void; onExport?: (doc: DocumentNodeDoc, format: string) => void; onVersionHistory?: (doc: DocumentNodeDoc) => void; + canDelete?: boolean; + canMove?: boolean; + canMention?: boolean; + canEdit?: boolean; contextMenuOpen?: boolean; onContextMenuOpenChange?: (open: boolean) => void; } @@ -76,8 +82,13 @@ export const DocumentNode = React.memo(function DocumentNode({ onEdit, onDelete, onMove, + onReset, onExport, onVersionHistory, + canDelete = true, + canMove = true, + canMention = true, + canEdit = true, contextMenuOpen, onContextMenuOpenChange, }: DocumentNodeProps) { @@ -85,8 +96,13 @@ export const DocumentNode = React.memo(function DocumentNode({ const isFailed = statusState === "failed"; const isProcessing = statusState === "pending" || statusState === "processing"; const isUnavailable = isProcessing || isFailed; - const isSelectable = !isUnavailable; - const isEditable = EDITABLE_DOCUMENT_TYPES.has(doc.document_type) && !isUnavailable; + const isMemoryDocument = + doc.document_type === "USER_MEMORY" || doc.document_type === "TEAM_MEMORY"; + const isSelectable = canMention && !isUnavailable; + const isEditable = + canEdit && + (isMemoryDocument || EDITABLE_DOCUMENT_TYPES.has(doc.document_type)) && + !isUnavailable; const handleCheckChange = useCallback(() => { if (isSelectable) { @@ -94,13 +110,22 @@ export const DocumentNode = React.memo(function DocumentNode({ } }, [doc, isMentioned, isSelectable, onToggleChatMention]); + const handlePrimaryClick = useCallback(() => { + if (canMention) { + handleCheckChange(); + return; + } + onPreview(doc); + }, [canMention, doc, handleCheckChange, onPreview]); + const [{ isDragging }, drag] = useDrag( () => ({ type: DND_TYPES.DOCUMENT, item: { id: doc.id }, + canDrag: canMove, collect: (monitor) => ({ isDragging: monitor.isDragging() }), }), - [doc.id] + [canMove, doc.id] ); const [dropdownOpen, setDropdownOpen] = useState(false); @@ -130,9 +155,11 @@ export const DocumentNode = React.memo(function DocumentNode({ const attachRef = useCallback( (node: HTMLDivElement | null) => { (rowRef as React.MutableRefObject).current = node; - drag(node); + if (canMove) { + drag(node); + } }, - [drag] + [canMove, drag] ); return ( @@ -187,12 +214,32 @@ export const DocumentNode = React.memo(function DocumentNode({ ); } return ( - e.stopPropagation()} - className="h-3.5 w-3.5 shrink-0" - /> + <> + {isMemoryDocument ? ( + + + + ) : canMention ? ( + e.stopPropagation()} + className="h-3.5 w-3.5 shrink-0" + /> + ) : ( + + {getDocumentTypeIcon( + doc.document_type as DocumentTypeEnum, + "h-3.5 w-3.5 text-muted-foreground" + )} + + )} + ); })()} @@ -205,8 +252,8 @@ export const DocumentNode = React.memo(function DocumentNode({ -
- - ) : ( - - )} - - -
- - {charCount.toLocaleString()} / {MEMORY_HARD_LIMIT.toLocaleString()} - characters - chars - {charCount > 15_000 && charCount <= MEMORY_HARD_LIMIT && " - Approaching limit"} - {charCount > MEMORY_HARD_LIMIT && " - Exceeds limit"} - -
- - - - - - - - - Copy as Markdown - - - - Download as Markdown - - - -
-
- - ); -} diff --git a/surfsense_web/components/tool-ui/generate-podcast.tsx b/surfsense_web/components/tool-ui/generate-podcast.tsx index b690530a1..866c0082d 100644 --- a/surfsense_web/components/tool-ui/generate-podcast.tsx +++ b/surfsense_web/components/tool-ui/generate-podcast.tsx @@ -15,7 +15,7 @@ import { import { baseApiService } from "@/lib/apis/base-api.service"; import { authenticatedFetch } from "@/lib/auth-utils"; import { clearActivePodcastTaskId, setActivePodcastTaskId } from "@/lib/chat/podcast-state"; - +import { BACKEND_URL } from "@/lib/env-config"; /** * Zod schemas for runtime validation */ @@ -194,7 +194,7 @@ function PodcastPlayer({ // Authenticated view - fetch audio and details in parallel const [audioResponse, details] = await Promise.all([ authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/podcasts/${podcastId}/audio`, + `${BACKEND_URL}/api/v1/podcasts/${podcastId}/audio`, { method: "GET", signal: controller.signal } ), baseApiService.get(`/api/v1/podcasts/${podcastId}`), diff --git a/surfsense_web/components/tool-ui/generate-resume.tsx b/surfsense_web/components/tool-ui/generate-resume.tsx index 448e8c531..5533674bf 100644 --- a/surfsense_web/components/tool-ui/generate-resume.tsx +++ b/surfsense_web/components/tool-ui/generate-resume.tsx @@ -13,6 +13,7 @@ import { Button } from "@/components/ui/button"; import { useMediaQuery } from "@/hooks/use-media-query"; import { baseApiService } from "@/lib/apis/base-api.service"; import { getAuthHeaders } from "@/lib/auth-utils"; +import { BACKEND_URL } from "@/lib/env-config"; pdfjsLib.GlobalWorkerOptions.workerSrc = new URL( "pdfjs-dist/build/pdf.worker.min.mjs", @@ -222,7 +223,7 @@ function ResumeCard({ const previewPath = shareToken ? `/api/v1/public/${shareToken}/reports/${reportId}/preview` : `/api/v1/reports/${reportId}/preview`; - setPdfUrl(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}${previewPath}`); + setPdfUrl(`${BACKEND_URL}${previewPath}`); if (autoOpen && isDesktop && !autoOpenedRef.current) { autoOpenedRef.current = true; diff --git a/surfsense_web/components/tool-ui/video-presentation/generate-video-presentation.tsx b/surfsense_web/components/tool-ui/video-presentation/generate-video-presentation.tsx index 6d8bfa5a5..00f9db23f 100644 --- a/surfsense_web/components/tool-ui/video-presentation/generate-video-presentation.tsx +++ b/surfsense_web/components/tool-ui/video-presentation/generate-video-presentation.tsx @@ -19,6 +19,7 @@ import { type CompiledSlide, } from "./combined-player"; import { getPptxExportErrorToast, getVideoDownloadErrorToast } from "./errors"; +import { BACKEND_URL } from "@/lib/env-config"; const GenerateVideoPresentationArgsSchema = z.object({ source_content: z.string(), @@ -136,7 +137,7 @@ function VideoPresentationPlayer({ const [isPptxExporting, setIsPptxExporting] = useState(false); const [pptxProgress, setPptxProgress] = useState(null); - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL ?? ""; + const backendUrl = BACKEND_URL ?? ""; const audioBlobUrlsRef = useRef([]); const loadPresentation = useCallback(async () => { diff --git a/surfsense_web/components/ui/fixed-toolbar.tsx b/surfsense_web/components/ui/fixed-toolbar.tsx index 540088ea5..591d2a5d3 100644 --- a/surfsense_web/components/ui/fixed-toolbar.tsx +++ b/surfsense_web/components/ui/fixed-toolbar.tsx @@ -14,7 +14,7 @@ export function FixedToolbar({ return ( ; case "SURFSENSE_DOCS": return ; + case "USER_MEMORY": + case "TEAM_MEMORY": + return ; case "DEEP": return ; case "DEEPER": diff --git a/surfsense_web/contracts/types/document.types.ts b/surfsense_web/contracts/types/document.types.ts index 7b8784568..ccc15fa62 100644 --- a/surfsense_web/contracts/types/document.types.ts +++ b/surfsense_web/contracts/types/document.types.ts @@ -29,6 +29,8 @@ export const documentTypeEnum = z.enum([ "LOCAL_FOLDER_FILE", "SURFSENSE_DOCS", "NOTE", + "USER_MEMORY", + "TEAM_MEMORY", "COMPOSIO_GOOGLE_DRIVE_CONNECTOR", "COMPOSIO_GMAIL_CONNECTOR", "COMPOSIO_GOOGLE_CALENDAR_CONNECTOR", diff --git a/surfsense_web/contracts/types/search-space.types.ts b/surfsense_web/contracts/types/search-space.types.ts index 7449f82b1..08918e2af 100644 --- a/surfsense_web/contracts/types/search-space.types.ts +++ b/surfsense_web/contracts/types/search-space.types.ts @@ -56,7 +56,6 @@ export const updateSearchSpaceRequest = z.object({ description: true, citations_enabled: true, qna_custom_instructions: true, - shared_memory_md: true, ai_file_sort_enabled: true, }) .partial(), diff --git a/surfsense_web/hooks/use-search-source-connectors.ts b/surfsense_web/hooks/use-search-source-connectors.ts index e53629235..bc1ec49b5 100644 --- a/surfsense_web/hooks/use-search-source-connectors.ts +++ b/surfsense_web/hooks/use-search-source-connectors.ts @@ -1,6 +1,6 @@ import { useCallback, useEffect, useState } from "react"; import { authenticatedFetch } from "@/lib/auth-utils"; - +import { BACKEND_URL } from "@/lib/env-config"; export interface SearchSourceConnector { id: number; name: string; @@ -108,7 +108,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: // Build URL with optional search_space_id query parameter const url = new URL( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors` + `${BACKEND_URL}/api/v1/search-source-connectors` ); if (spaceId !== undefined) { url.searchParams.append("search_space_id", spaceId.toString()); @@ -170,7 +170,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: try { // Add search_space_id as a query parameter const url = new URL( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors` + `${BACKEND_URL}/api/v1/search-source-connectors` ); url.searchParams.append("search_space_id", spaceId.toString()); @@ -208,7 +208,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: ) => { try { const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, { method: "PUT", headers: { "Content-Type": "application/json" }, @@ -239,7 +239,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: const deleteConnector = async (connectorId: number) => { try { const response = await authenticatedFetch( - `${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, + `${BACKEND_URL}/api/v1/search-source-connectors/${connectorId}`, { method: "DELETE", headers: { "Content-Type": "application/json" }, @@ -284,7 +284,7 @@ export const useSearchSourceConnectors = (lazy: boolean = false, searchSpaceId?: const response = await authenticatedFetch( `${ - process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL + BACKEND_URL }/api/v1/search-source-connectors/${connectorId}/index?${params.toString()}`, { method: "POST", diff --git a/surfsense_web/lib/apis/base-api.service.ts b/surfsense_web/lib/apis/base-api.service.ts index 269fd916c..0819cbc7c 100644 --- a/surfsense_web/lib/apis/base-api.service.ts +++ b/surfsense_web/lib/apis/base-api.service.ts @@ -9,7 +9,7 @@ import { NetworkError, NotFoundError, } from "../error"; - +import { BACKEND_URL } from "@/lib/env-config"; enum ResponseType { JSON = "json", TEXT = "text", @@ -390,4 +390,4 @@ class BaseApiService { } } -export const baseApiService = new BaseApiService(process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || ""); +export const baseApiService = new BaseApiService(BACKEND_URL); diff --git a/surfsense_web/lib/auth-utils.ts b/surfsense_web/lib/auth-utils.ts index 4848f4ab7..645a6d1ba 100644 --- a/surfsense_web/lib/auth-utils.ts +++ b/surfsense_web/lib/auth-utils.ts @@ -1,7 +1,7 @@ /** * Authentication utilities for handling token expiration and redirects */ - +import { BACKEND_URL } from "@/lib/env-config"; const REDIRECT_PATH_KEY = "surfsense_redirect_path"; const BEARER_TOKEN_KEY = "surfsense_bearer_token"; const REFRESH_TOKEN_KEY = "surfsense_refresh_token"; @@ -194,8 +194,7 @@ export async function logout(): Promise { // Call backend to revoke the refresh token if (refreshToken) { try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - const response = await fetch(`${backendUrl}/auth/jwt/revoke`, { + const response = await fetch(`${BACKEND_URL}/auth/jwt/revoke`, { method: "POST", headers: { "Content-Type": "application/json", @@ -273,8 +272,7 @@ export async function refreshAccessToken(): Promise { isRefreshing = true; refreshPromise = (async () => { try { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - const response = await fetch(`${backendUrl}/auth/jwt/refresh`, { + const response = await fetch(`${BACKEND_URL}/auth/jwt/refresh`, { method: "POST", headers: { "Content-Type": "application/json", diff --git a/surfsense_web/lib/chat/thread-persistence.ts b/surfsense_web/lib/chat/thread-persistence.ts index 4cf9b8e68..abe6bc02c 100644 --- a/surfsense_web/lib/chat/thread-persistence.ts +++ b/surfsense_web/lib/chat/thread-persistence.ts @@ -4,7 +4,7 @@ */ import { baseApiService } from "@/lib/apis/base-api.service"; - +import { BACKEND_URL } from "@/lib/env-config"; // ============================================================================= // Types matching backend schemas // ============================================================================= @@ -228,6 +228,5 @@ export interface RegenerateParams { * Get the URL for the regenerate endpoint (for streaming fetch) */ export function getRegenerateUrl(threadId: number): string { - const backendUrl = process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL || "http://localhost:8000"; - return `${backendUrl}/api/v1/threads/${threadId}/regenerate`; + return `${BACKEND_URL}/api/v1/threads/${threadId}/regenerate`; } diff --git a/surfsense_web/package.json b/surfsense_web/package.json index 640d5c207..213adbaad 100644 --- a/surfsense_web/package.json +++ b/surfsense_web/package.json @@ -1,6 +1,6 @@ { "name": "surfsense_web", - "version": "0.0.24", + "version": "0.0.25", "private": true, "packageManager": "pnpm@10.26.0", "description": "SurfSense Frontend",