mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-03 21:02:40 +02:00
feat: various UI fixes, prompt optimizations, and allowing duplicate docs
- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality.
This commit is contained in:
parent
e6433f78c4
commit
b9a66cb417
26 changed files with 1540 additions and 852 deletions
|
|
@ -25,17 +25,33 @@ class TestProviderVariantDetection:
|
|||
@pytest.mark.parametrize(
|
||||
"model_name,expected",
|
||||
[
|
||||
# GPT-4 family routes to "classic" (autonomous-persistence style)
|
||||
("openai:gpt-4o-mini", "openai_classic"),
|
||||
("openai:gpt-4-turbo", "openai_classic"),
|
||||
# GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
|
||||
("openai:gpt-5", "openai_reasoning"),
|
||||
("openai:gpt-5-codex", "openai_reasoning"),
|
||||
("openai:o1-preview", "openai_reasoning"),
|
||||
("openai:o3-mini", "openai_reasoning"),
|
||||
# Codex family beats reasoning (more specific). Mirrors OpenCode
|
||||
# ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
|
||||
("openai:gpt-5-codex", "openai_codex"),
|
||||
("openai:gpt-codex", "openai_codex"),
|
||||
("openai:codex-mini", "openai_codex"),
|
||||
# Anthropic + Google
|
||||
("anthropic:claude-3-5-sonnet", "anthropic"),
|
||||
("anthropic/claude-opus-4", "anthropic"),
|
||||
("google:gemini-2.0-flash", "google"),
|
||||
("vertex:gemini-1.5-pro", "google"),
|
||||
# Newly-covered families
|
||||
("moonshot:kimi-k2", "kimi"),
|
||||
("openrouter:moonshot/kimi-k2.5", "kimi"),
|
||||
("xai:grok-2", "grok"),
|
||||
("openrouter:x-ai/grok-3", "grok"),
|
||||
("openai:deepseek-v3", "deepseek"),
|
||||
("deepseek:deepseek-r1", "deepseek"),
|
||||
# Unknown families fall back to default (no provider block emitted)
|
||||
("groq:mixtral-8x7b", "default"),
|
||||
("together:llama-3.1-70b", "default"),
|
||||
(None, "default"),
|
||||
("", "default"),
|
||||
],
|
||||
|
|
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
|
|||
def test_detection(self, model_name: str | None, expected: str) -> None:
|
||||
assert detect_provider_variant(model_name) == expected
|
||||
|
||||
def test_codex_takes_precedence_over_reasoning(self) -> None:
|
||||
"""Regression guard: ``gpt-5-codex`` must NOT match the generic
|
||||
``gpt-5`` reasoning regex first. Codex is the more specialised
|
||||
prompt and mirrors OpenCode's dispatch order.
|
||||
"""
|
||||
from app.agents.new_chat.prompts.composer import detect_provider_variant
|
||||
|
||||
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
|
||||
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
|
||||
|
||||
|
||||
class TestCompose:
|
||||
def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
|
||||
|
|
@ -149,6 +175,52 @@ class TestCompose:
|
|||
prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
|
||||
assert "<provider_hints>" not in prompt
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,expected_marker",
|
||||
[
|
||||
# Each marker is a unique-ish phrase from the corresponding fragment.
|
||||
# If a fragment is renamed/rewritten such that the marker is gone,
|
||||
# update both the fragment and this test deliberately.
|
||||
("openai:gpt-5-codex", "Codex-class"),
|
||||
("openai:gpt-5", "OpenAI reasoning model"),
|
||||
("openai:gpt-4o", "classic OpenAI chat model"),
|
||||
("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
|
||||
("google:gemini-2.0-flash", "Google Gemini"),
|
||||
("moonshot:kimi-k2", "Moonshot Kimi"),
|
||||
("xai:grok-2", "xAI Grok"),
|
||||
("deepseek:deepseek-r1", "DeepSeek"),
|
||||
],
|
||||
)
|
||||
def test_each_known_variant_renders_with_its_marker(
|
||||
self,
|
||||
fixed_today: datetime,
|
||||
model_name: str,
|
||||
expected_marker: str,
|
||||
) -> None:
|
||||
"""Every supported variant must produce a ``<provider_hints>`` block
|
||||
containing its identifying marker. This pins the dispatch + the
|
||||
on-disk fragments together so a missing/renamed file is caught
|
||||
immediately.
|
||||
"""
|
||||
prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
|
||||
assert "<provider_hints>" in prompt, (
|
||||
f"variant for {model_name!r} did not emit a provider_hints block; "
|
||||
"the corresponding providers/<variant>.md may be missing"
|
||||
)
|
||||
assert expected_marker in prompt, (
|
||||
f"variant for {model_name!r} emitted hints but lacked the "
|
||||
f"expected marker {expected_marker!r} — the fragment may have "
|
||||
"drifted from the dispatch table"
|
||||
)
|
||||
|
||||
def test_provider_blocks_are_byte_stable_across_calls(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
"""Cache-stability guard: same model id → byte-identical prompt."""
|
||||
a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
assert a == b
|
||||
|
||||
def test_custom_system_instructions_override_default(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
"""Unit tests for kb_persistence filesystem-parity invariants.
|
||||
|
||||
Specifically, these tests pin down that the agent-driven write_file flow
|
||||
treats path uniqueness — not content uniqueness — as the only hard
|
||||
invariant. This mirrors a real filesystem: ``cp a b`` produces two files
|
||||
with identical bytes living at different paths, and that should round-trip
|
||||
through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.middleware import kb_persistence
|
||||
from app.db import Document
|
||||
|
||||
|
||||
class _FakeResult:
|
||||
"""Minimal stand-in for ``sqlalchemy.engine.Result``."""
|
||||
|
||||
def __init__(self, value: Any = None) -> None:
|
||||
self._value = value
|
||||
|
||||
def scalar_one_or_none(self) -> Any:
|
||||
return self._value
|
||||
|
||||
def scalar(self) -> Any:
|
||||
return self._value
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
"""Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
|
||||
|
||||
Records every ``add`` so we can assert against the resulting Documents
|
||||
and Chunks. ``execute`` always returns "no row" by default — i.e. no
|
||||
folder hierarchy preexists and no path collision exists. Tests that
|
||||
want a path collision can override that on a per-call basis.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.added: list[Any] = []
|
||||
self.execute = AsyncMock(return_value=_FakeResult(None))
|
||||
self.flush = AsyncMock()
|
||||
|
||||
# Simulate ``await session.flush()`` assigning an id to the doc;
|
||||
# we increment a counter so each Document gets a unique id.
|
||||
self._next_id = 1
|
||||
|
||||
async def _flush_assigning_ids() -> None:
|
||||
for obj in self.added:
|
||||
if getattr(obj, "id", None) is None:
|
||||
obj.id = self._next_id
|
||||
self._next_id += 1
|
||||
|
||||
self.flush.side_effect = _flush_assigning_ids
|
||||
|
||||
def add(self, obj: Any) -> None:
|
||||
self.added.append(obj)
|
||||
|
||||
def add_all(self, objs: list[Any]) -> None:
|
||||
self.added.extend(objs)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Avoid loading the embedding model in unit tests."""
|
||||
monkeypatch.setattr(
|
||||
kb_persistence,
|
||||
"embed_texts",
|
||||
lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
|
||||
)
|
||||
monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_allows_identical_content_at_different_paths() -> None:
|
||||
"""The core regression: ``cp /a/notes.md /b/notes-copy.md``.
|
||||
|
||||
Both create calls must succeed even though the bytes are byte-for-byte
|
||||
identical, because path is the only filesystem-style unique key.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
content = "# Same body\n\nIdentical content used by two different paths.\n"
|
||||
|
||||
first = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/a/notes.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(first, Document)
|
||||
assert first.title == "notes.md"
|
||||
|
||||
# Second create with byte-identical content at a different path should
|
||||
# not raise — that's the whole point of the filesystem-parity fix.
|
||||
second = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/b/notes-copy.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(second, Document)
|
||||
assert second.title == "notes-copy.md"
|
||||
|
||||
# Both rows share the same content_hash but live at distinct paths
|
||||
# (distinct ``unique_identifier_hash``). That's the desired contract.
|
||||
assert first.content_hash == second.content_hash
|
||||
assert first.unique_identifier_hash != second.unique_identifier_hash
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_still_rejects_path_collision() -> None:
|
||||
"""Path uniqueness remains the hard invariant.
|
||||
|
||||
If ``unique_identifier_hash`` already points at an existing row in
|
||||
the same search space, the create call must raise ``ValueError``
|
||||
with a clear message — matching the behavior the commit loop relies
|
||||
on to upsert via the existing-row code path.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
|
||||
# Path with no folder parts so ``_ensure_folder_hierarchy`` is a
|
||||
# no-op and the only SELECT executed is the path-collision check.
|
||||
# That SELECT returns an existing doc id, triggering the guard.
|
||||
session.execute = AsyncMock(return_value=_FakeResult(value=99))
|
||||
|
||||
with pytest.raises(ValueError, match="already exists at path"):
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="anything",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_does_not_query_for_content_hash_collision(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Regression guard: the legacy second SELECT (content_hash collision
|
||||
pre-check) must be gone. Counting ``execute`` calls is a brittle but
|
||||
effective way to lock that in.
|
||||
|
||||
The current flow runs exactly one ``execute`` for the path-collision
|
||||
SELECT (no folder parts in this path → ``_ensure_folder_hierarchy``
|
||||
short-circuits). If a future refactor reintroduces a content-hash
|
||||
SELECT, this test will fail loud.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="hello",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
# Path-collision SELECT only. No content_hash SELECT.
|
||||
assert session.execute.await_count == 1, (
|
||||
f"Unexpected execute count {session.execute.await_count}; "
|
||||
"did the legacy content_hash collision pre-check get re-added?"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue