mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-07 07:55:16 +02:00
360 lines
11 KiB
Python
360 lines
11 KiB
Python
|
|
"""Unit tests for the MCP docs discovery tools."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
from unittest.mock import AsyncMock, patch
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
from fastapi import HTTPException
|
||
|
|
|
||
|
|
from api.mcp_server.tools import docs_search as docs_search_module
|
||
|
|
from api.mcp_server.tools.docs_search import (
|
||
|
|
_docs_index,
|
||
|
|
_extract_page_title,
|
||
|
|
_resolve_docs_root,
|
||
|
|
_score_page,
|
||
|
|
_strip_frontmatter,
|
||
|
|
_tokenize_query,
|
||
|
|
list_docs,
|
||
|
|
read_doc,
|
||
|
|
search_docs,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def _clear_docs_caches() -> None:
|
||
|
|
docs_search_module._docs_index.cache_clear()
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def fake_docs_root(tmp_path: Path) -> Path:
|
||
|
|
docs_root = tmp_path / "docs"
|
||
|
|
docs_root.mkdir()
|
||
|
|
|
||
|
|
(docs_root / "getting-started").mkdir()
|
||
|
|
(docs_root / "getting-started" / "index.mdx").write_text(
|
||
|
|
"---\n"
|
||
|
|
'title: "Getting started"\n'
|
||
|
|
'description: "Start using Dograh."\n'
|
||
|
|
"---\n\n"
|
||
|
|
"# Getting started\n\n"
|
||
|
|
"Welcome to Dograh.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
(docs_root / "voice-agent").mkdir()
|
||
|
|
(docs_root / "voice-agent" / "introduction.mdx").write_text(
|
||
|
|
"---\n"
|
||
|
|
'title: "Voice Agent Builder"\n'
|
||
|
|
'description: "Build conversational workflows."\n'
|
||
|
|
"---\n\n"
|
||
|
|
"# Voice Agent Builder\n\n"
|
||
|
|
"Build workflows with nodes and tools.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
(docs_root / "voice-agent" / "tools").mkdir()
|
||
|
|
(docs_root / "voice-agent" / "tools" / "mcp-tool.mdx").write_text(
|
||
|
|
"---\n"
|
||
|
|
'title: "MCP Tool"\n'
|
||
|
|
'description: "Connect external MCP servers."\n'
|
||
|
|
'llm_hint: "Use for MCP server setup, remote tools, or model context protocol questions."\n'
|
||
|
|
"aliases:\n"
|
||
|
|
' - "model context protocol"\n'
|
||
|
|
"---\n\n"
|
||
|
|
"# MCP Tool\n\n"
|
||
|
|
"Connect an external MCP server to your voice agent.\n\n"
|
||
|
|
"## Authentication\n\n"
|
||
|
|
"Provide the MCP endpoint URL and headers.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
(docs_root / "deployment").mkdir()
|
||
|
|
(docs_root / "deployment" / "docker.mdx").write_text(
|
||
|
|
"---\n"
|
||
|
|
'title: "Docker"\n'
|
||
|
|
'description: "Deploy Dograh with Docker."\n'
|
||
|
|
'llm_hint: "Use for Docker deployment, local setup, remote setup, TURN server, coturn, or WebRTC connectivity questions."\n'
|
||
|
|
"aliases:\n"
|
||
|
|
' - "coturn"\n'
|
||
|
|
' - "turn server"\n'
|
||
|
|
"---\n\n"
|
||
|
|
"# Docker\n\n"
|
||
|
|
"Run Dograh with Docker.\n\n"
|
||
|
|
"## Troubleshooting WebRTC Connectivity\n\n"
|
||
|
|
"If audio fails or ICE fails, configure a TURN server. Coturn is the recommended choice.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
# Hidden/orphaned docs page: present on disk but not in docs.json, so it
|
||
|
|
# must not be indexed by the MCP tools.
|
||
|
|
(docs_root / "internal-only.mdx").write_text(
|
||
|
|
"---\n"
|
||
|
|
'title: "Internal TURN Notes"\n'
|
||
|
|
"---\n\n"
|
||
|
|
"# Internal TURN Notes\n\n"
|
||
|
|
"This page mentions zyxinternalturntoken but is not user-facing.\n",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
(docs_root / "AGENTS.md").write_text("# Internal instructions\n", encoding="utf-8")
|
||
|
|
|
||
|
|
(docs_root / "docs.json").write_text(
|
||
|
|
"""{
|
||
|
|
"navigation": {
|
||
|
|
"tabs": [
|
||
|
|
{
|
||
|
|
"tab": "Guides",
|
||
|
|
"groups": [
|
||
|
|
{
|
||
|
|
"group": "Getting started",
|
||
|
|
"pages": [
|
||
|
|
"getting-started/index"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"group": "Voice Agent Builder",
|
||
|
|
"pages": [
|
||
|
|
"voice-agent/introduction",
|
||
|
|
{
|
||
|
|
"group": "Tools",
|
||
|
|
"pages": [
|
||
|
|
"voice-agent/tools/mcp-tool"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"tab": "Developer",
|
||
|
|
"groups": [
|
||
|
|
{
|
||
|
|
"group": "Deployment",
|
||
|
|
"pages": [
|
||
|
|
"deployment/docker"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
}
|
||
|
|
""",
|
||
|
|
encoding="utf-8",
|
||
|
|
)
|
||
|
|
|
||
|
|
_clear_docs_caches()
|
||
|
|
with patch.dict(os.environ, {"DOGRAH_DOCS_PATH": str(docs_root)}):
|
||
|
|
yield docs_root
|
||
|
|
_clear_docs_caches()
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.fixture
|
||
|
|
def authed_user():
|
||
|
|
class _FakeUser:
|
||
|
|
selected_organization_id = 1
|
||
|
|
id = 42
|
||
|
|
|
||
|
|
with patch(
|
||
|
|
"api.mcp_server.tools.docs_search.authenticate_mcp_request",
|
||
|
|
new=AsyncMock(return_value=_FakeUser()),
|
||
|
|
):
|
||
|
|
yield _FakeUser()
|
||
|
|
|
||
|
|
|
||
|
|
def test_tokenize_query_dedupes_and_drops_stopwords():
|
||
|
|
assert _tokenize_query("How do I configure a TURN server TURN?") == [
|
||
|
|
"configure",
|
||
|
|
"turn",
|
||
|
|
"server",
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def test_tokenize_query_empty_input_returns_empty():
|
||
|
|
assert _tokenize_query("") == []
|
||
|
|
assert _tokenize_query("?? // !!") == []
|
||
|
|
|
||
|
|
|
||
|
|
def test_strip_frontmatter_removes_yaml_block():
|
||
|
|
body = '---\ntitle: "X"\n---\n\n# Heading\n'
|
||
|
|
assert _strip_frontmatter(body).startswith("# Heading")
|
||
|
|
|
||
|
|
|
||
|
|
def test_extract_page_title_prefers_frontmatter():
|
||
|
|
body = '---\ntitle: "Front Title"\n---\n\n# Heading Title\n'
|
||
|
|
assert _extract_page_title(body, fallback="x.mdx") == "Front Title"
|
||
|
|
|
||
|
|
|
||
|
|
def test_extract_page_title_falls_back_to_first_heading():
|
||
|
|
body = "# Heading Title\nbody\n"
|
||
|
|
assert _extract_page_title(body, fallback="x.mdx") == "Heading Title"
|
||
|
|
|
||
|
|
|
||
|
|
def test_score_page_uses_llm_hint_and_aliases():
|
||
|
|
page = docs_search_module.DocPage(
|
||
|
|
path="deployment/docker",
|
||
|
|
file_path="deployment/docker.mdx",
|
||
|
|
title="Docker",
|
||
|
|
description="Deploy Dograh with Docker.",
|
||
|
|
llm_hint="Use for TURN server and coturn setup.",
|
||
|
|
aliases=("coturn",),
|
||
|
|
breadcrumb=("Developer", "Deployment"),
|
||
|
|
content="Docker deployment.",
|
||
|
|
sections=(
|
||
|
|
docs_search_module.DocSection(
|
||
|
|
title="Troubleshooting WebRTC Connectivity",
|
||
|
|
slug="troubleshooting-webrtc-connectivity",
|
||
|
|
level=2,
|
||
|
|
content="Configure a TURN server with coturn.",
|
||
|
|
),
|
||
|
|
),
|
||
|
|
order=0,
|
||
|
|
)
|
||
|
|
score, section = _score_page(page, ["coturn"])
|
||
|
|
assert score > 0
|
||
|
|
assert section is not None
|
||
|
|
assert section.slug == "troubleshooting-webrtc-connectivity"
|
||
|
|
|
||
|
|
|
||
|
|
def test_resolve_docs_root_honors_env_override(tmp_path: Path):
|
||
|
|
docs = tmp_path / "custom_docs"
|
||
|
|
docs.mkdir()
|
||
|
|
(docs / "docs.json").write_text("{}", encoding="utf-8")
|
||
|
|
with patch.dict(os.environ, {"DOGRAH_DOCS_PATH": str(docs)}):
|
||
|
|
assert _resolve_docs_root() == docs.resolve()
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_ranks_turn_doc_and_uses_route_path(
|
||
|
|
fake_docs_root, authed_user
|
||
|
|
):
|
||
|
|
results = await search_docs("How do I configure coturn for WebRTC?")
|
||
|
|
assert results
|
||
|
|
assert results[0]["path"] == "deployment/docker"
|
||
|
|
assert results[0]["section_slug"] == "troubleshooting-webrtc-connectivity"
|
||
|
|
assert "TURN server" in results[0]["llm_hint"]
|
||
|
|
assert "snippet" not in results[0]
|
||
|
|
assert "score" not in results[0]
|
||
|
|
assert "url" not in results[0]
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_indexes_only_docs_json_pages(fake_docs_root, authed_user):
|
||
|
|
results = await search_docs("zyxinternalturntoken")
|
||
|
|
assert results == []
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_respects_limit(fake_docs_root, authed_user):
|
||
|
|
results = await search_docs("dograh", limit=1)
|
||
|
|
assert len(results) == 1
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_returns_empty_when_no_match(fake_docs_root, authed_user):
|
||
|
|
assert await search_docs("xyzzy unrelated zzz") == []
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_returns_empty_when_no_corpus(
|
||
|
|
tmp_path, authed_user, monkeypatch
|
||
|
|
):
|
||
|
|
nonexistent = tmp_path / "no-docs-here"
|
||
|
|
monkeypatch.setenv("DOGRAH_DOCS_PATH", str(nonexistent))
|
||
|
|
_clear_docs_caches()
|
||
|
|
with patch(
|
||
|
|
"api.mcp_server.tools.docs_search._resolve_docs_root", return_value=None
|
||
|
|
):
|
||
|
|
assert await search_docs("anything") == []
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_rejects_empty_query(fake_docs_root, authed_user):
|
||
|
|
with pytest.raises(ValueError, match="non-empty string"):
|
||
|
|
await search_docs("")
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_rejects_query_with_only_stopwords(
|
||
|
|
fake_docs_root, authed_user
|
||
|
|
):
|
||
|
|
with pytest.raises(ValueError, match="non-stopword"):
|
||
|
|
await search_docs("how do I")
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_search_docs_rejects_zero_limit(fake_docs_root, authed_user):
|
||
|
|
with pytest.raises(ValueError, match="at least 1"):
|
||
|
|
await search_docs("Dograh", limit=0)
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_list_docs_returns_top_level_sections(fake_docs_root, authed_user):
|
||
|
|
results = await list_docs()
|
||
|
|
assert results[0]["kind"] == "section"
|
||
|
|
assert results[0]["path"] == "guides/getting-started"
|
||
|
|
assert results[1]["path"] == "guides/voice-agent-builder"
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_list_docs_depth_expands_children(fake_docs_root, authed_user):
|
||
|
|
results = await list_docs("guides/voice-agent-builder", depth=2)
|
||
|
|
paths = [item["path"] for item in results]
|
||
|
|
assert "voice-agent/introduction" in paths
|
||
|
|
assert "guides/voice-agent-builder/tools" in paths
|
||
|
|
assert "voice-agent/tools/mcp-tool" in paths
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_list_docs_rejects_unknown_section(fake_docs_root, authed_user):
|
||
|
|
with pytest.raises(HTTPException, match="Unknown docs section"):
|
||
|
|
await list_docs("nope")
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_read_doc_returns_full_page_and_sections(fake_docs_root, authed_user):
|
||
|
|
result = await read_doc("deployment/docker")
|
||
|
|
assert result["path"] == "deployment/docker"
|
||
|
|
assert result["title"] == "Docker"
|
||
|
|
assert "url" not in result
|
||
|
|
section_slugs = [section["slug"] for section in result["sections"]]
|
||
|
|
assert "docker" in section_slugs
|
||
|
|
assert "troubleshooting-webrtc-connectivity" in section_slugs
|
||
|
|
assert "Coturn" in result["content"] or "coturn" in result["content"].lower()
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_read_doc_can_target_section(fake_docs_root, authed_user):
|
||
|
|
result = await read_doc(
|
||
|
|
"deployment/docker",
|
||
|
|
section="troubleshooting-webrtc-connectivity",
|
||
|
|
)
|
||
|
|
assert result["section_slug"] == "troubleshooting-webrtc-connectivity"
|
||
|
|
assert "ICE fails" in result["content"] or "TURN server" in result["content"]
|
||
|
|
assert "Run Dograh with Docker." not in result["content"]
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_read_doc_rejects_unknown_page(fake_docs_root, authed_user):
|
||
|
|
with pytest.raises(HTTPException, match="Unknown docs page"):
|
||
|
|
await read_doc("missing/page")
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_read_doc_rejects_unknown_section(fake_docs_root, authed_user):
|
||
|
|
with pytest.raises(HTTPException, match="Unknown section"):
|
||
|
|
await read_doc("deployment/docker", section="missing-section")
|
||
|
|
|
||
|
|
|
||
|
|
def test_docs_index_uses_docs_json_navigation(fake_docs_root):
|
||
|
|
index = _docs_index()
|
||
|
|
assert "internal-only" not in index.pages_by_path
|
||
|
|
assert "guides/voice-agent-builder/tools" in index.sections_by_path
|
||
|
|
assert index.pages_by_path["voice-agent/tools/mcp-tool"].breadcrumb == (
|
||
|
|
"Guides",
|
||
|
|
"Voice Agent Builder",
|
||
|
|
"Tools",
|
||
|
|
)
|