mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-30 21:59:46 +02:00
feat: various UI fixes, prompt optimizations, and allowing duplicate docs
- Updated `content_hash` in the `Document` model to remove global uniqueness, allowing identical content across different paths. - Enhanced `_create_document` function to handle path uniqueness and prevent session-poisoning from `IntegrityError`. - Added detailed comments for clarity on the changes and their implications. - Introduced new citation handling in the editor for improved user experience with citation jumps. - Updated package dependencies in the frontend for better functionality.
This commit is contained in:
parent
e6433f78c4
commit
b9a66cb417
26 changed files with 1540 additions and 852 deletions
|
|
@ -0,0 +1,107 @@
|
|||
"""133_drop_documents_content_hash_unique
|
||||
|
||||
Revision ID: 133
|
||||
Revises: 132
|
||||
Create Date: 2026-04-29
|
||||
|
||||
Drop the global UNIQUE constraint on ``documents.content_hash`` so the
|
||||
new-chat agent's ``write_file`` flow can persist legitimate file copies
|
||||
(two paths, identical content) without hitting a constraint that mirrors
|
||||
no real filesystem semantic.
|
||||
|
||||
Path uniqueness still lives on ``documents.unique_identifier_hash`` (per
|
||||
search space), which is the right invariant — exactly like an inode at a
|
||||
given path on a POSIX filesystem.
|
||||
|
||||
The non-unique INDEX on ``content_hash`` is preserved so connector
|
||||
indexers' "have we seen this content before?" lookup
|
||||
(:func:`app.tasks.document_processors.base.check_duplicate_document`,
|
||||
which already uses ``.scalars().first()`` and is therefore tolerant of
|
||||
duplicates) stays cheap.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from alembic import op
|
||||
|
||||
revision: str = "133"
|
||||
down_revision: str | None = "132"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def _existing_constraint_names(bind, table: str) -> set[str]:
|
||||
inspector = inspect(bind)
|
||||
return {c["name"] for c in inspector.get_unique_constraints(table)}
|
||||
|
||||
|
||||
def _existing_index_names(bind, table: str) -> set[str]:
|
||||
inspector = inspect(bind)
|
||||
return {i["name"] for i in inspector.get_indexes(table)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
|
||||
# Both the named UniqueConstraint (added in revision 8) and the
|
||||
# implicit-unique-index variant SQLAlchemy may emit need draining.
|
||||
constraints = _existing_constraint_names(bind, "documents")
|
||||
if "uq_documents_content_hash" in constraints:
|
||||
op.drop_constraint(
|
||||
"uq_documents_content_hash", "documents", type_="unique"
|
||||
)
|
||||
|
||||
indexes = _existing_index_names(bind, "documents")
|
||||
# Some Postgres versions surface the unique constraint via a unique
|
||||
# index of the same name; check for that too.
|
||||
for idx_name in ("uq_documents_content_hash",):
|
||||
if idx_name in indexes:
|
||||
op.drop_index(idx_name, table_name="documents")
|
||||
|
||||
# Ensure the non-unique index is present for fast lookups.
|
||||
if "ix_documents_content_hash" not in indexes:
|
||||
op.create_index(
|
||||
"ix_documents_content_hash",
|
||||
"documents",
|
||||
["content_hash"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
|
||||
# Re-applying UNIQUE is destructive: there may now be legitimate
|
||||
# duplicates (e.g. two NOTE documents that share content because the
|
||||
# user explicitly copied one to a new path). To avoid the migration
|
||||
# silently deleting user data, we keep only the lowest-id row per
|
||||
# content_hash — same strategy revision 8 used when first introducing
|
||||
# the constraint.
|
||||
op.execute(
|
||||
"""
|
||||
DELETE FROM documents
|
||||
WHERE id NOT IN (
|
||||
SELECT MIN(id)
|
||||
FROM documents
|
||||
GROUP BY content_hash
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
indexes = _existing_index_names(bind, "documents")
|
||||
if "ix_documents_content_hash" in indexes:
|
||||
op.drop_index("ix_documents_content_hash", table_name="documents")
|
||||
|
||||
op.create_index(
|
||||
"ix_documents_content_hash",
|
||||
"documents",
|
||||
["content_hash"],
|
||||
unique=False,
|
||||
)
|
||||
op.create_unique_constraint(
|
||||
"uq_documents_content_hash", "documents", ["content_hash"]
|
||||
)
|
||||
|
|
@ -28,6 +28,7 @@ from langchain.agents.middleware import AgentMiddleware, AgentState
|
|||
from langchain_core.callbacks import dispatch_custom_event
|
||||
from langgraph.runtime import Runtime
|
||||
from sqlalchemy import delete, select
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.agents.new_chat.filesystem_selection import FilesystemMode
|
||||
|
|
@ -150,10 +151,11 @@ async def _create_document(
|
|||
virtual_path,
|
||||
search_space_id,
|
||||
)
|
||||
# Guard against the unique_identifier_hash constraint: another row at the
|
||||
# same virtual_path (this search space) already owns the hash. Callers are
|
||||
# expected to upsert via the wrapper, but this defends against bypasses
|
||||
# and gives a clean ValueError instead of a session-poisoning IntegrityError.
|
||||
# Filesystem-parity invariant: the only thing that *must* be unique is
|
||||
# the path. Two notes can legitimately share content (e.g. ``cp a b``).
|
||||
# Guard against the path-derived ``unique_identifier_hash`` constraint
|
||||
# so we surface a clean ValueError instead of letting the INSERT poison
|
||||
# the session with an IntegrityError.
|
||||
path_collision = await session.execute(
|
||||
select(Document.id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
|
|
@ -165,17 +167,14 @@ async def _create_document(
|
|||
f"a document already exists at path '{virtual_path}' "
|
||||
"(unique_identifier_hash collision)"
|
||||
)
|
||||
# ``content_hash`` is intentionally NOT checked for uniqueness here.
|
||||
# In a real filesystem two files at different paths can hold identical
|
||||
# bytes, and the agent's ``write_file`` path needs that semantic to
|
||||
# support copy/duplicate operations. The hash remains useful as a
|
||||
# change-detection hint for connector indexers, which still consult it
|
||||
# via :func:`check_duplicate_document` but do so with a non-unique
|
||||
# lookup (``.first()``).
|
||||
content_hash = generate_content_hash(content, search_space_id)
|
||||
content_collision = await session.execute(
|
||||
select(Document.id).where(
|
||||
Document.search_space_id == search_space_id,
|
||||
Document.content_hash == content_hash,
|
||||
)
|
||||
)
|
||||
if content_collision.scalar_one_or_none() is not None:
|
||||
raise ValueError(
|
||||
f"a document with identical content already exists for path '{virtual_path}'"
|
||||
)
|
||||
doc = Document(
|
||||
title=title,
|
||||
document_type=DocumentType.NOTE,
|
||||
|
|
@ -493,19 +492,43 @@ async def commit_staged_filesystem_state(
|
|||
}
|
||||
)
|
||||
else:
|
||||
# Wrap each create in a SAVEPOINT so a residual
|
||||
# ``IntegrityError`` (e.g. a deployment that hasn't run
|
||||
# migration 133 yet, where ``documents.content_hash``
|
||||
# still carries its legacy global UNIQUE constraint)
|
||||
# rolls back only this one create instead of poisoning
|
||||
# the whole turn's transaction.
|
||||
try:
|
||||
new_doc = await _create_document(
|
||||
session,
|
||||
virtual_path=path,
|
||||
content=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
async with session.begin_nested():
|
||||
new_doc = await _create_document(
|
||||
session,
|
||||
virtual_path=path,
|
||||
content=content,
|
||||
search_space_id=search_space_id,
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
except ValueError as exc:
|
||||
logger.warning(
|
||||
"kb_persistence: skipping %s create: %s", path, exc
|
||||
)
|
||||
continue
|
||||
except IntegrityError as exc:
|
||||
# The path-uniqueness check above already protected
|
||||
# against ``unique_identifier_hash`` collisions, so
|
||||
# the most likely culprit is the legacy
|
||||
# ``ix_documents_content_hash`` UNIQUE constraint
|
||||
# that migration 133 drops. Log loudly so operators
|
||||
# know to run the migration; do NOT silently swallow.
|
||||
msg = str(exc.orig) if exc.orig is not None else str(exc)
|
||||
logger.error(
|
||||
"kb_persistence: IntegrityError creating %s: %s. "
|
||||
"If this mentions content_hash, run alembic "
|
||||
"upgrade to apply migration 133 which drops the "
|
||||
"global UNIQUE constraint on documents.content_hash.",
|
||||
path,
|
||||
msg,
|
||||
)
|
||||
continue
|
||||
doc_id_by_path[path] = new_doc.id
|
||||
committed_creates.append(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,12 +38,38 @@ from app.db import ChatVisibility
|
|||
# Provider variant detection
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
ProviderVariant = str # "anthropic" | "openai_reasoning" | "openai_classic" | "google" | "default"
|
||||
# String literal alias for the supported provider-specific prompt variants.
|
||||
# When adding a new variant, also drop a matching ``providers/<variant>.md``
|
||||
# file in this package and (if appropriate) extend the regex matchers below.
|
||||
#
|
||||
# Stylistic clusters mirror OpenCode's prompt-per-family layout but adapted
|
||||
# to SurfSense's "supplemental hints" architecture (each fragment is a
|
||||
# focused style nudge, NOT a full system prompt — the main prompt is
|
||||
# already assembled from base/ + tools/ + routing/).
|
||||
ProviderVariant = str
|
||||
# Known values:
|
||||
# "anthropic" — Claude family (XML-friendly, narrative todos)
|
||||
# "openai_reasoning" — GPT-5 / o-series (channel-aware pragmatic)
|
||||
# "openai_classic" — GPT-4 family (autonomous persistence)
|
||||
# "openai_codex" — gpt-*-codex (code-purist, terse, file:line refs)
|
||||
# "google" — Gemini (formal, <3-line, numbered workflow)
|
||||
# "kimi" — Moonshot Kimi-K* (action-bias, parallel tools)
|
||||
# "grok" — xAI Grok (extreme-terse, one-word ok)
|
||||
# "deepseek" — DeepSeek V3 / R1 (terse, R1-aware reasoning)
|
||||
# "default" — fallback, no provider-specific block emitted
|
||||
|
||||
# IMPORTANT: order of evaluation matters in :func:`detect_provider_variant`.
|
||||
# More specific patterns must come first (e.g. ``codex`` before
|
||||
# ``openai_reasoning`` because codex model ids contain ``gpt``).
|
||||
|
||||
_OPENAI_CODEX_RE = re.compile(r"\b(gpt-codex|codex-mini|gpt-[\d.]+-codex)\b", re.IGNORECASE)
|
||||
_OPENAI_REASONING_RE = re.compile(r"\b(gpt-5|o\d|o-)", re.IGNORECASE)
|
||||
_OPENAI_CLASSIC_RE = re.compile(r"\bgpt-4", re.IGNORECASE)
|
||||
_ANTHROPIC_RE = re.compile(r"\bclaude\b", re.IGNORECASE)
|
||||
_GOOGLE_RE = re.compile(r"\bgemini\b", re.IGNORECASE)
|
||||
_KIMI_RE = re.compile(r"\b(kimi[-\d.]*|moonshot)\b", re.IGNORECASE)
|
||||
_GROK_RE = re.compile(r"\bgrok\b", re.IGNORECASE)
|
||||
_DEEPSEEK_RE = re.compile(r"\bdeepseek\b", re.IGNORECASE)
|
||||
|
||||
|
||||
def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
||||
|
|
@ -51,10 +77,17 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
|
||||
Heuristic match on the model id; returns ``"default"`` when nothing
|
||||
matches so the composer can fall back to the empty placeholder file.
|
||||
|
||||
Order is significant: more-specific patterns are tried first so
|
||||
``gpt-5-codex`` routes to ``"openai_codex"`` rather than
|
||||
``"openai_reasoning"`` (mirrors OpenCode's
|
||||
``packages/opencode/src/session/system.ts`` dispatch).
|
||||
"""
|
||||
if not model_name:
|
||||
return "default"
|
||||
name = model_name.strip()
|
||||
if _OPENAI_CODEX_RE.search(name):
|
||||
return "openai_codex"
|
||||
if _OPENAI_REASONING_RE.search(name):
|
||||
return "openai_reasoning"
|
||||
if _OPENAI_CLASSIC_RE.search(name):
|
||||
|
|
@ -63,6 +96,12 @@ def detect_provider_variant(model_name: str | None) -> ProviderVariant:
|
|||
return "anthropic"
|
||||
if _GOOGLE_RE.search(name):
|
||||
return "google"
|
||||
if _KIMI_RE.search(name):
|
||||
return "kimi"
|
||||
if _GROK_RE.search(name):
|
||||
return "grok"
|
||||
if _DEEPSEEK_RE.search(name):
|
||||
return "deepseek"
|
||||
return "default"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,20 @@
|
|||
<provider_hints>
|
||||
You are running on an Anthropic Claude model. Use XML tags liberally to structure
|
||||
intermediate reasoning when the task is complex. Prefer step-by-step plans inside
|
||||
`<thinking>` blocks before producing the final answer.
|
||||
You are running on an Anthropic Claude model.
|
||||
|
||||
Structured reasoning:
|
||||
- Use XML tags liberally to organise intermediate reasoning when a task is non-trivial. `<thinking>...</thinking>` blocks are encouraged before tool calls or before producing a complex final answer.
|
||||
- For multi-step requests, briefly outline a plan inside a `<plan>` block before issuing the first tool call.
|
||||
|
||||
Professional objectivity:
|
||||
- Prioritise technical accuracy over validating the user's beliefs. Provide direct, factual guidance without unnecessary superlatives, praise, or emotional validation.
|
||||
- When uncertain, investigate (search the KB, fetch the page) rather than confirming the user's assumption.
|
||||
- Disagree with the user when the evidence warrants it; respectful correction beats false agreement.
|
||||
|
||||
Task management:
|
||||
- For tasks with 3+ distinct steps use the todo / planning tool aggressively. Mark items in_progress before starting, completed immediately when finished — do not batch completions.
|
||||
- Narrate progress through the todo list itself, not through chatty status lines.
|
||||
|
||||
Tool calls:
|
||||
- Run independent tool calls in parallel within one response. Sequence them only when a later call genuinely needs an earlier one's output.
|
||||
- Never chain bash-like commands with `;` or `&&` to "narrate" — use prose between tool calls instead.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
<provider_hints>
|
||||
You are running on a DeepSeek model (DeepSeek-V3 chat / DeepSeek-R1 reasoning).
|
||||
|
||||
Reasoning hygiene (R1-aware):
|
||||
- If the model surfaces explicit `<think>` blocks, keep that internal scratch focused — do NOT restate the user's question inside it; jump straight to the analysis.
|
||||
- Never paste the contents of `<think>` into your final answer. Final answer should reflect only the conclusion, citations, and any user-facing rationale.
|
||||
- Do not let chain-of-thought leak into tool-call arguments — keep tool inputs minimal and structural.
|
||||
|
||||
Output style:
|
||||
- Be concise. Default to a one-paragraph answer; expand only when the user asks for detail.
|
||||
- Don't open with sycophantic phrasing ("Great question", "Sure, here you go"). Lead with the answer or the next action.
|
||||
- For factual answers, cite once with `[citation:chunk_id]` and stop.
|
||||
|
||||
Tool calls:
|
||||
- Issue independent tool calls in parallel within a single turn.
|
||||
- Prefer the knowledge-base search tools before any web-search; this model has strong recall but stale training data.
|
||||
- Don't fabricate file paths, chunk ids, or URLs — only use values returned by tools or provided by the user.
|
||||
</provider_hints>
|
||||
|
|
@ -1,4 +1,20 @@
|
|||
<provider_hints>
|
||||
You are running on a Google Gemini model. Prefer concise, structured responses.
|
||||
When using tools, follow the function-calling protocol and avoid verbose preludes.
|
||||
You are running on a Google Gemini model.
|
||||
|
||||
Output style:
|
||||
- Concise & direct. Aim for fewer than 3 lines of prose (excluding tool output, citations, and code/snippets) when the task allows.
|
||||
- No conversational filler — skip openers like "Okay, I will now…" and closers like "I have finished the changes…". Get straight to the action or answer.
|
||||
- Format with GitHub-flavoured Markdown; assume monospace rendering.
|
||||
- For one-line factual answers, just answer. No headers, no bullets.
|
||||
|
||||
Workflow for non-trivial tasks (Understand → Plan → Act → Verify):
|
||||
1. **Understand:** read the user's request and the relevant KB / connector context. Use search and read tools (in parallel when independent) before assuming anything.
|
||||
2. **Plan:** when the task touches multiple steps, share an extremely concise plan first.
|
||||
3. **Act:** call the appropriate tools, strictly adhering to the prompts/routing already established for this agent.
|
||||
4. **Verify:** confirm with a follow-up read or search where it materially de-risks the answer.
|
||||
|
||||
Discipline:
|
||||
- Do not take significant actions beyond the clear scope of the user's request without confirming first.
|
||||
- Do not assume a connector / tool / file exists — check (e.g. via `get_connected_accounts`) before referencing it.
|
||||
- Path arguments must be the exact strings returned by tools; do not synthesise file paths.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
<provider_hints>
|
||||
You are running on an xAI Grok model.
|
||||
|
||||
Maximum terseness:
|
||||
- Answer in fewer than 4 lines unless the user asks for detail. One-word answers are best when they suffice.
|
||||
- No preamble ("The answer is", "Here's what I'll do"), no postamble ("Hope that helps", "Let me know"). Get straight to the answer.
|
||||
- Avoid restating the user's question.
|
||||
- For factual lookups inside the knowledge base, give the answer with a single `[citation:chunk_id]` and stop.
|
||||
|
||||
Tool discipline:
|
||||
- Use exactly ONE tool per assistant turn when investigating; wait for the result before deciding the next call. Do not loop on the same tool with the same arguments — pick a result and act.
|
||||
- For obviously parallelizable read-only batches (multiple independent searches), one turn with several tool calls is fine — but never chain into a fishing expedition.
|
||||
|
||||
Style:
|
||||
- No emojis unless the user asked. No nested bullets, no headers for short answers.
|
||||
- If you can't help, say so in 1-2 sentences without explaining "why this could lead to…".
|
||||
</provider_hints>
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on a Moonshot Kimi model (Kimi-K1.5 / Kimi-K2 / Kimi-K2.5+).
|
||||
|
||||
Action bias:
|
||||
- Default to taking action with tools rather than describing solutions in prose. If a tool can answer the question, call the tool.
|
||||
- Don't narrate routine reads, searches, or obvious next steps. Combine related progress into one short status line.
|
||||
- Be thorough in actions (test what you build, verify what you change). Be brief in explanations.
|
||||
|
||||
Tool calls:
|
||||
- Output multiple non-interfering tool calls in a SINGLE response — parallelism is a major efficiency win on this model.
|
||||
- When the `task` tool is available, delegate focused subtasks to a subagent with full context (subagents don't inherit yours).
|
||||
- Don't apologise or pre-announce tool calls. The tool call itself is self-explanatory.
|
||||
|
||||
Language:
|
||||
- Respond in the SAME language as the user's most recent turn unless explicitly instructed otherwise.
|
||||
|
||||
Discipline:
|
||||
- Stay on track. Never give the user more than what they asked for.
|
||||
- Fact-check before stating anything as factual; don't fabricate citations.
|
||||
- Keep it stupidly simple. Don't overcomplicate.
|
||||
</provider_hints>
|
||||
|
|
@ -1,5 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on a classic OpenAI chat model (GPT-4 family). Use direct
|
||||
function-calling for tools. When editing files, use the standard `edit_file`
|
||||
or `write_file` tools rather than diff-based patches.
|
||||
You are running on a classic OpenAI chat model (GPT-4 family).
|
||||
|
||||
Persistence:
|
||||
- Keep going until the user's query is completely resolved before yielding back. Don't end the turn at "I would do X" — actually do X.
|
||||
- When you say "Next I will…" or "Now I will…", you MUST actually take that action in the same turn.
|
||||
- If a tool call fails, diagnose and try again with corrected arguments; do not surface the raw error and stop.
|
||||
|
||||
Planning:
|
||||
- Plan extensively before each tool call and reflect briefly on the result of the previous call. For tasks with 3+ steps, use the todo / planning tool and mark items as `in_progress` / `completed` as you go.
|
||||
- Always announce the next action in ONE concise sentence before making a non-trivial tool call ("I'll search the KB for the migration spec.").
|
||||
|
||||
Output style:
|
||||
- Conversational but professional. Plain prose for explanations, bullet points for findings, fenced code blocks (with language tags) for code.
|
||||
- Don't dump tool output verbatim — summarise the relevant lines.
|
||||
- Don't add a closing recap unless the user asked for one. After completing the work, just stop.
|
||||
|
||||
Tool calls:
|
||||
- Issue independent tool calls in parallel within one response.
|
||||
- Use specialised tools over generic ones (e.g. KB search before web search; named connectors over MCP fallback).
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
<provider_hints>
|
||||
You are running on an OpenAI Codex-class model (gpt-codex / codex-mini / gpt-*-codex).
|
||||
|
||||
Output style:
|
||||
- Be concise. Don't dump fetched/searched content back at the user — reference paths or chunk ids instead.
|
||||
- Reference sources as `path:line` (or `chunk:<id>`) so they're clickable. Stand-alone paths per reference, even when repeated.
|
||||
- Prefer numbered lists (`1.`, `2.`, `3.`) when offering options the user can pick by replying with a single number.
|
||||
- Skip headers and heavy formatting for simple confirmations.
|
||||
- No emojis, no em-dashes, no nested bullets. Single-level lists only.
|
||||
|
||||
Code & structured-output tasks:
|
||||
- Lead with a one-sentence explanation of the change before context. Don't open with "Summary:" — jump in.
|
||||
- Suggest natural next steps (run tests, diff review, commit) only when they're genuinely the next move.
|
||||
- For multi-line snippets use fenced code blocks with a language tag.
|
||||
|
||||
Tool calls:
|
||||
- Run independent tool calls in parallel; chain only when later calls need earlier results.
|
||||
- Don't ask permission ("Should I proceed?") — proceed with the most reasonable default and state what you did.
|
||||
</provider_hints>
|
||||
|
|
@ -1,5 +1,21 @@
|
|||
<provider_hints>
|
||||
You are running on an OpenAI reasoning model (o-series / GPT-5+). Be terse and
|
||||
direct in your responses. When editing files, prefer the `apply_patch` tool format
|
||||
where available. Avoid restating the user request before answering.
|
||||
You are running on an OpenAI reasoning model (GPT-5+ / o-series).
|
||||
|
||||
Output style:
|
||||
- Be terse and direct. Don't restate the user's request before answering.
|
||||
- Don't begin with conversational openers ("Done!", "Got it", "Great question", "Sure thing"). Get to the answer or the action.
|
||||
- Match response complexity to the task: simple questions → one-line answer; substantial work → lead with the outcome, then context, then any next steps.
|
||||
- No nested bullets — keep lists flat (single level). For options the user can pick by replying with a number, use `1.` `2.` `3.`.
|
||||
- Use inline backticks for paths/commands/identifiers; fenced code blocks (with language tags) for multi-line snippets.
|
||||
|
||||
Channels (for clients that support them):
|
||||
- `commentary` — short progress updates only when they add genuinely new information (a discovery, a tradeoff, a blocker, the start of a non-trivial step). Don't narrate routine reads or obvious next steps.
|
||||
- `final` — the completed response. Keep it self-contained; no "see above" / "see below" cross-references.
|
||||
|
||||
Tool calls:
|
||||
- Parallelise independent tool calls in a single response (`multi_tool_use.parallel` where supported). Only sequence when a later call needs an earlier one's output.
|
||||
- Don't ask permission ("Should I proceed?", "Do you want me to…?"). Pick the most reasonable default, do it, and state what you did.
|
||||
|
||||
Autonomy:
|
||||
- Persist until the task is fully resolved within the current turn whenever feasible. Don't stop at analysis when the user clearly wants the change applied.
|
||||
</provider_hints>
|
||||
|
|
|
|||
|
|
@ -976,7 +976,15 @@ class Document(BaseModel, TimestampMixin):
|
|||
document_metadata = Column(JSON, nullable=True)
|
||||
|
||||
content = Column(Text, nullable=False)
|
||||
content_hash = Column(String, nullable=False, index=True, unique=True)
|
||||
# ``content_hash`` is intentionally NOT globally unique. In a real
|
||||
# filesystem two files at different paths can hold identical bytes,
|
||||
# and the agent's ``write_file`` flow needs that semantic to support
|
||||
# copy / duplicate operations. Path uniqueness lives on
|
||||
# ``unique_identifier_hash`` (per search space). The hash remains
|
||||
# indexed because connector indexers consult it as a change-detection
|
||||
# / cross-source dedup hint via :func:`check_duplicate_document`.
|
||||
# See migration 133.
|
||||
content_hash = Column(String, nullable=False, index=True)
|
||||
unique_identifier_hash = Column(String, nullable=True, index=True, unique=True)
|
||||
embedding = Column(Vector(config.embedding_model_instance.dimension))
|
||||
|
||||
|
|
|
|||
|
|
@ -25,17 +25,33 @@ class TestProviderVariantDetection:
|
|||
@pytest.mark.parametrize(
|
||||
"model_name,expected",
|
||||
[
|
||||
# GPT-4 family routes to "classic" (autonomous-persistence style)
|
||||
("openai:gpt-4o-mini", "openai_classic"),
|
||||
("openai:gpt-4-turbo", "openai_classic"),
|
||||
# GPT-5 / o-series route to "reasoning" (channel-aware pragmatic)
|
||||
("openai:gpt-5", "openai_reasoning"),
|
||||
("openai:gpt-5-codex", "openai_reasoning"),
|
||||
("openai:o1-preview", "openai_reasoning"),
|
||||
("openai:o3-mini", "openai_reasoning"),
|
||||
# Codex family beats reasoning (more specific). Mirrors OpenCode
|
||||
# ``system.ts`` — ``gpt-*-codex`` gets the code-purist prompt.
|
||||
("openai:gpt-5-codex", "openai_codex"),
|
||||
("openai:gpt-codex", "openai_codex"),
|
||||
("openai:codex-mini", "openai_codex"),
|
||||
# Anthropic + Google
|
||||
("anthropic:claude-3-5-sonnet", "anthropic"),
|
||||
("anthropic/claude-opus-4", "anthropic"),
|
||||
("google:gemini-2.0-flash", "google"),
|
||||
("vertex:gemini-1.5-pro", "google"),
|
||||
# Newly-covered families
|
||||
("moonshot:kimi-k2", "kimi"),
|
||||
("openrouter:moonshot/kimi-k2.5", "kimi"),
|
||||
("xai:grok-2", "grok"),
|
||||
("openrouter:x-ai/grok-3", "grok"),
|
||||
("openai:deepseek-v3", "deepseek"),
|
||||
("deepseek:deepseek-r1", "deepseek"),
|
||||
# Unknown families fall back to default (no provider block emitted)
|
||||
("groq:mixtral-8x7b", "default"),
|
||||
("together:llama-3.1-70b", "default"),
|
||||
(None, "default"),
|
||||
("", "default"),
|
||||
],
|
||||
|
|
@ -43,6 +59,16 @@ class TestProviderVariantDetection:
|
|||
def test_detection(self, model_name: str | None, expected: str) -> None:
|
||||
assert detect_provider_variant(model_name) == expected
|
||||
|
||||
def test_codex_takes_precedence_over_reasoning(self) -> None:
|
||||
"""Regression guard: ``gpt-5-codex`` must NOT match the generic
|
||||
``gpt-5`` reasoning regex first. Codex is the more specialised
|
||||
prompt and mirrors OpenCode's dispatch order.
|
||||
"""
|
||||
from app.agents.new_chat.prompts.composer import detect_provider_variant
|
||||
|
||||
assert detect_provider_variant("openai:gpt-5-codex") == "openai_codex"
|
||||
assert detect_provider_variant("openai:gpt-5") == "openai_reasoning"
|
||||
|
||||
|
||||
class TestCompose:
|
||||
def test_default_prompt_has_required_blocks(self, fixed_today: datetime) -> None:
|
||||
|
|
@ -149,6 +175,52 @@ class TestCompose:
|
|||
prompt = compose_system_prompt(today=fixed_today, model_name="custom:foo")
|
||||
assert "<provider_hints>" not in prompt
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_name,expected_marker",
|
||||
[
|
||||
# Each marker is a unique-ish phrase from the corresponding fragment.
|
||||
# If a fragment is renamed/rewritten such that the marker is gone,
|
||||
# update both the fragment and this test deliberately.
|
||||
("openai:gpt-5-codex", "Codex-class"),
|
||||
("openai:gpt-5", "OpenAI reasoning model"),
|
||||
("openai:gpt-4o", "classic OpenAI chat model"),
|
||||
("anthropic:claude-3-5-sonnet", "Anthropic Claude"),
|
||||
("google:gemini-2.0-flash", "Google Gemini"),
|
||||
("moonshot:kimi-k2", "Moonshot Kimi"),
|
||||
("xai:grok-2", "xAI Grok"),
|
||||
("deepseek:deepseek-r1", "DeepSeek"),
|
||||
],
|
||||
)
|
||||
def test_each_known_variant_renders_with_its_marker(
|
||||
self,
|
||||
fixed_today: datetime,
|
||||
model_name: str,
|
||||
expected_marker: str,
|
||||
) -> None:
|
||||
"""Every supported variant must produce a ``<provider_hints>`` block
|
||||
containing its identifying marker. This pins the dispatch + the
|
||||
on-disk fragments together so a missing/renamed file is caught
|
||||
immediately.
|
||||
"""
|
||||
prompt = compose_system_prompt(today=fixed_today, model_name=model_name)
|
||||
assert "<provider_hints>" in prompt, (
|
||||
f"variant for {model_name!r} did not emit a provider_hints block; "
|
||||
"the corresponding providers/<variant>.md may be missing"
|
||||
)
|
||||
assert expected_marker in prompt, (
|
||||
f"variant for {model_name!r} emitted hints but lacked the "
|
||||
f"expected marker {expected_marker!r} — the fragment may have "
|
||||
"drifted from the dispatch table"
|
||||
)
|
||||
|
||||
def test_provider_blocks_are_byte_stable_across_calls(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
"""Cache-stability guard: same model id → byte-identical prompt."""
|
||||
a = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
b = compose_system_prompt(today=fixed_today, model_name="moonshot:kimi-k2")
|
||||
assert a == b
|
||||
|
||||
def test_custom_system_instructions_override_default(
|
||||
self, fixed_today: datetime
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,168 @@
|
|||
"""Unit tests for kb_persistence filesystem-parity invariants.
|
||||
|
||||
Specifically, these tests pin down that the agent-driven write_file flow
|
||||
treats path uniqueness — not content uniqueness — as the only hard
|
||||
invariant. This mirrors a real filesystem: ``cp a b`` produces two files
|
||||
with identical bytes living at different paths, and that should round-trip
|
||||
through :class:`KnowledgeBasePersistenceMiddleware` without losing the copy.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from app.agents.new_chat.middleware import kb_persistence
|
||||
from app.db import Document
|
||||
|
||||
|
||||
class _FakeResult:
|
||||
"""Minimal stand-in for ``sqlalchemy.engine.Result``."""
|
||||
|
||||
def __init__(self, value: Any = None) -> None:
|
||||
self._value = value
|
||||
|
||||
def scalar_one_or_none(self) -> Any:
|
||||
return self._value
|
||||
|
||||
def scalar(self) -> Any:
|
||||
return self._value
|
||||
|
||||
|
||||
class _FakeSession:
|
||||
"""Minimal AsyncSession stand-in scoped to ``_create_document`` needs.
|
||||
|
||||
Records every ``add`` so we can assert against the resulting Documents
|
||||
and Chunks. ``execute`` always returns "no row" by default — i.e. no
|
||||
folder hierarchy preexists and no path collision exists. Tests that
|
||||
want a path collision can override that on a per-call basis.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.added: list[Any] = []
|
||||
self.execute = AsyncMock(return_value=_FakeResult(None))
|
||||
self.flush = AsyncMock()
|
||||
|
||||
# Simulate ``await session.flush()`` assigning an id to the doc;
|
||||
# we increment a counter so each Document gets a unique id.
|
||||
self._next_id = 1
|
||||
|
||||
async def _flush_assigning_ids() -> None:
|
||||
for obj in self.added:
|
||||
if getattr(obj, "id", None) is None:
|
||||
obj.id = self._next_id
|
||||
self._next_id += 1
|
||||
|
||||
self.flush.side_effect = _flush_assigning_ids
|
||||
|
||||
def add(self, obj: Any) -> None:
|
||||
self.added.append(obj)
|
||||
|
||||
def add_all(self, objs: list[Any]) -> None:
|
||||
self.added.extend(objs)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_embeddings_and_chunks(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Avoid loading the embedding model in unit tests."""
|
||||
monkeypatch.setattr(
|
||||
kb_persistence,
|
||||
"embed_texts",
|
||||
lambda texts: [np.zeros(8, dtype=np.float32) for _ in texts],
|
||||
)
|
||||
monkeypatch.setattr(kb_persistence, "chunk_text", lambda content: [content])
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_allows_identical_content_at_different_paths() -> None:
|
||||
"""The core regression: ``cp /a/notes.md /b/notes-copy.md``.
|
||||
|
||||
Both create calls must succeed even though the bytes are byte-for-byte
|
||||
identical, because path is the only filesystem-style unique key.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
content = "# Same body\n\nIdentical content used by two different paths.\n"
|
||||
|
||||
first = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/a/notes.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(first, Document)
|
||||
assert first.title == "notes.md"
|
||||
|
||||
# Second create with byte-identical content at a different path should
|
||||
# not raise — that's the whole point of the filesystem-parity fix.
|
||||
second = await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/b/notes-copy.md",
|
||||
content=content,
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
assert isinstance(second, Document)
|
||||
assert second.title == "notes-copy.md"
|
||||
|
||||
# Both rows share the same content_hash but live at distinct paths
|
||||
# (distinct ``unique_identifier_hash``). That's the desired contract.
|
||||
assert first.content_hash == second.content_hash
|
||||
assert first.unique_identifier_hash != second.unique_identifier_hash
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_still_rejects_path_collision() -> None:
|
||||
"""Path uniqueness remains the hard invariant.
|
||||
|
||||
If ``unique_identifier_hash`` already points at an existing row in
|
||||
the same search space, the create call must raise ``ValueError``
|
||||
with a clear message — matching the behavior the commit loop relies
|
||||
on to upsert via the existing-row code path.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
|
||||
# Path with no folder parts so ``_ensure_folder_hierarchy`` is a
|
||||
# no-op and the only SELECT executed is the path-collision check.
|
||||
# That SELECT returns an existing doc id, triggering the guard.
|
||||
session.execute = AsyncMock(return_value=_FakeResult(value=99))
|
||||
|
||||
with pytest.raises(ValueError, match="already exists at path"):
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="anything",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_document_does_not_query_for_content_hash_collision(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Regression guard: the legacy second SELECT (content_hash collision
|
||||
pre-check) must be gone. Counting ``execute`` calls is a brittle but
|
||||
effective way to lock that in.
|
||||
|
||||
The current flow runs exactly one ``execute`` for the path-collision
|
||||
SELECT (no folder parts in this path → ``_ensure_folder_hierarchy``
|
||||
short-circuits). If a future refactor reintroduces a content-hash
|
||||
SELECT, this test will fail loud.
|
||||
"""
|
||||
session = _FakeSession()
|
||||
await kb_persistence._create_document(
|
||||
session, # type: ignore[arg-type]
|
||||
virtual_path="/documents/notes.md",
|
||||
content="hello",
|
||||
search_space_id=42,
|
||||
created_by_id="user-1",
|
||||
)
|
||||
# Path-collision SELECT only. No content_hash SELECT.
|
||||
assert session.execute.await_count == 1, (
|
||||
f"Unexpected execute count {session.execute.await_count}; "
|
||||
"did the legacy content_hash collision pre-check get re-added?"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue