mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-06 20:15:17 +02:00
refactor(agents): colocate main_agent-only kernel into main_agent/
Move modules out of agents/shared/ that are consumed by a single package (main_agent), placing each next to its only consumer instead of in a "shared" grab-bag: - agent_cache.py -> main_agent/runtime/agent_cache_store.py - connector_searchable_types.py -> main_agent/runtime/ - plugin_loader.py + plugins/ -> main_agent/plugins/ - skills/ + skills_backends.py -> main_agent/skills/ - tools/invalid_tool.py -> main_agent/tools/ Drop the skills_backends re-export from the shared middleware barrel and repoint all consumers + tests. No behavior change; import-all, error-contract, and the moved tests stay green.
This commit is contained in:
parent
c51aca6ccc
commit
a7d7155039
24 changed files with 33 additions and 46 deletions
|
|
@ -45,12 +45,6 @@ from app.agents.shared.middleware.noop_injection import NoopInjectionMiddleware
|
|||
from app.agents.shared.middleware.otel_span import OtelSpanMiddleware
|
||||
from app.agents.shared.middleware.permission import PermissionMiddleware
|
||||
from app.agents.shared.middleware.retry_after import RetryAfterMiddleware
|
||||
from app.agents.shared.middleware.skills_backends import (
|
||||
BuiltinSkillsBackend,
|
||||
SearchSpaceSkillsBackend,
|
||||
build_skills_backend_factory,
|
||||
default_skills_sources,
|
||||
)
|
||||
from app.agents.shared.middleware.tool_call_repair import (
|
||||
ToolCallNameRepairMiddleware,
|
||||
)
|
||||
|
|
@ -58,7 +52,6 @@ from app.agents.shared.middleware.tool_call_repair import (
|
|||
__all__ = [
|
||||
"ActionLogMiddleware",
|
||||
"AnonymousDocumentMiddleware",
|
||||
"BuiltinSkillsBackend",
|
||||
"BusyMutexMiddleware",
|
||||
"ClearToolUsesEdit",
|
||||
"DedupHITLToolCallsMiddleware",
|
||||
|
|
@ -74,14 +67,11 @@ __all__ = [
|
|||
"OtelSpanMiddleware",
|
||||
"PermissionMiddleware",
|
||||
"RetryAfterMiddleware",
|
||||
"SearchSpaceSkillsBackend",
|
||||
"SpillToBackendEdit",
|
||||
"SpillingContextEditingMiddleware",
|
||||
"SurfSenseCompactionMiddleware",
|
||||
"ToolCallNameRepairMiddleware",
|
||||
"ToolDefinition",
|
||||
"build_skills_backend_factory",
|
||||
"commit_staged_filesystem_state",
|
||||
"create_surfsense_compaction_middleware",
|
||||
"default_skills_sources",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,340 +0,0 @@
|
|||
"""Skills backends for SurfSense.
|
||||
|
||||
Implements two minimal :class:`deepagents.backends.protocol.BackendProtocol`
|
||||
subclasses tailored for use with :class:`deepagents.middleware.skills.SkillsMiddleware`.
|
||||
|
||||
The middleware only needs four methods to load skills from a backend:
|
||||
|
||||
* ``ls_info`` / ``als_info`` — list directories under a source path.
|
||||
* ``download_files`` / ``adownload_files`` — fetch ``SKILL.md`` bytes.
|
||||
|
||||
Other ``BackendProtocol`` methods (``read``/``write``/``edit``/``grep_raw`` …)
|
||||
default to ``NotImplementedError`` from the base class. They are never reached
|
||||
by the skills middleware because skill content is rendered into the system
|
||||
prompt at agent build time, not edited at runtime.
|
||||
|
||||
Two backends are provided:
|
||||
|
||||
* :class:`BuiltinSkillsBackend` — disk-backed read of bundled skills from
|
||||
``app/agents/shared/skills/builtin/``.
|
||||
* :class:`SearchSpaceSkillsBackend` — a thin read-only wrapper over
|
||||
:class:`KBPostgresBackend` that filters notes under the privileged folder
|
||||
``/documents/_skills/``.
|
||||
|
||||
Both backends are intentionally read-only: skill authoring happens out of band
|
||||
(via filesystem or a search-space-admin route), so we never expose
|
||||
``write`` / ``edit`` / ``upload_files``. The base class' ``NotImplementedError``
|
||||
gives a clean failure mode if anything tries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from deepagents.backends.composite import CompositeBackend
|
||||
from deepagents.backends.protocol import (
|
||||
BackendProtocol,
|
||||
FileDownloadResponse,
|
||||
FileInfo,
|
||||
)
|
||||
from deepagents.backends.state import StateBackend
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.tools import ToolRuntime
|
||||
|
||||
from app.agents.shared.middleware.kb_postgres_backend import KBPostgresBackend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Limit per Agent Skills spec; matches deepagents.middleware.skills.MAX_SKILL_FILE_SIZE.
|
||||
_MAX_SKILL_FILE_SIZE = 10 * 1024 * 1024
|
||||
|
||||
|
||||
def _default_builtin_root() -> Path:
|
||||
"""Return the absolute path to the bundled builtin skills directory.
|
||||
|
||||
Located at ``app/agents/shared/skills/builtin/`` relative to this module
|
||||
(this module lives at ``app/agents/shared/middleware/skills_backends.py``).
|
||||
"""
|
||||
return (Path(__file__).resolve().parent.parent / "skills" / "builtin").resolve()
|
||||
|
||||
|
||||
class BuiltinSkillsBackend(BackendProtocol):
|
||||
"""Read-only disk-backed skills source.
|
||||
|
||||
Maps a virtual ``/skills/builtin/`` namespace onto a directory on local disk,
|
||||
where each skill is its own subdirectory containing a ``SKILL.md`` file::
|
||||
|
||||
<root>/<skill-name>/SKILL.md
|
||||
|
||||
The middleware calls :meth:`als_info` with the source path and expects a
|
||||
``list[FileInfo]`` whose ``is_dir=True`` entries are descended into. Then it
|
||||
calls :meth:`adownload_files` with the synthesized ``SKILL.md`` paths and
|
||||
parses YAML frontmatter from the returned ``content`` bytes.
|
||||
|
||||
Mounting under :class:`~deepagents.backends.composite.CompositeBackend` at
|
||||
prefix ``/skills/builtin/`` means the middleware can issue paths like
|
||||
``/skills/builtin/kb-research/SKILL.md`` which the composite strips down to
|
||||
``/kb-research/SKILL.md`` before forwarding here. We treat any leading
|
||||
slash as anchoring at :attr:`root`.
|
||||
"""
|
||||
|
||||
def __init__(self, root: Path | str | None = None) -> None:
|
||||
self.root: Path = Path(root).resolve() if root else _default_builtin_root()
|
||||
if not self.root.exists():
|
||||
logger.info(
|
||||
"BuiltinSkillsBackend root %s does not exist; skills will be empty.",
|
||||
self.root,
|
||||
)
|
||||
|
||||
def _resolve(self, path: str) -> Path:
|
||||
"""Resolve a virtual posix path under :attr:`root`, refusing escapes."""
|
||||
bare = path.lstrip("/")
|
||||
candidate = (self.root / bare).resolve() if bare else self.root
|
||||
# Refuse symlink/.. traversal that escapes the root.
|
||||
try:
|
||||
candidate.relative_to(self.root)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"path {path!r} escapes builtin skills root") from exc
|
||||
return candidate
|
||||
|
||||
def ls_info(self, path: str) -> list[FileInfo]:
|
||||
try:
|
||||
target = self._resolve(path)
|
||||
except ValueError as exc:
|
||||
logger.warning("BuiltinSkillsBackend.ls_info refused: %s", exc)
|
||||
return []
|
||||
if not target.exists() or not target.is_dir():
|
||||
return []
|
||||
|
||||
infos: list[FileInfo] = []
|
||||
# Build virtual paths anchored at "/" because CompositeBackend already
|
||||
# stripped the route prefix before calling us.
|
||||
target_virtual = (
|
||||
"/"
|
||||
if target == self.root
|
||||
else ("/" + str(target.relative_to(self.root)).replace("\\", "/"))
|
||||
)
|
||||
for child in sorted(target.iterdir()):
|
||||
if child.name == "__pycache__" or child.name.startswith("."):
|
||||
continue
|
||||
child_virtual = (
|
||||
target_virtual.rstrip("/") + "/" + child.name
|
||||
if target_virtual != "/"
|
||||
else "/" + child.name
|
||||
)
|
||||
info: FileInfo = {
|
||||
"path": child_virtual,
|
||||
"is_dir": child.is_dir(),
|
||||
}
|
||||
if child.is_file():
|
||||
with contextlib.suppress(OSError): # pragma: no cover - defensive
|
||||
info["size"] = child.stat().st_size
|
||||
infos.append(info)
|
||||
return infos
|
||||
|
||||
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
|
||||
responses: list[FileDownloadResponse] = []
|
||||
for p in paths:
|
||||
try:
|
||||
target = self._resolve(p)
|
||||
except ValueError:
|
||||
responses.append(FileDownloadResponse(path=p, error="invalid_path"))
|
||||
continue
|
||||
if not target.exists():
|
||||
responses.append(FileDownloadResponse(path=p, error="file_not_found"))
|
||||
continue
|
||||
if target.is_dir():
|
||||
responses.append(FileDownloadResponse(path=p, error="is_directory"))
|
||||
continue
|
||||
try:
|
||||
# Hard cap to avoid loading rogue mega-files into memory.
|
||||
size = target.stat().st_size
|
||||
if size > _MAX_SKILL_FILE_SIZE:
|
||||
logger.warning(
|
||||
"Builtin skill file %s exceeds %d bytes; truncating.",
|
||||
target,
|
||||
_MAX_SKILL_FILE_SIZE,
|
||||
)
|
||||
with target.open("rb") as fh:
|
||||
content = fh.read(_MAX_SKILL_FILE_SIZE)
|
||||
else:
|
||||
content = target.read_bytes()
|
||||
except PermissionError:
|
||||
responses.append(
|
||||
FileDownloadResponse(path=p, error="permission_denied")
|
||||
)
|
||||
continue
|
||||
except OSError as exc: # pragma: no cover - defensive
|
||||
logger.warning("Builtin skill read failed %s: %s", target, exc)
|
||||
responses.append(FileDownloadResponse(path=p, error="file_not_found"))
|
||||
continue
|
||||
responses.append(FileDownloadResponse(path=p, content=content, error=None))
|
||||
return responses
|
||||
|
||||
|
||||
class SearchSpaceSkillsBackend(BackendProtocol):
|
||||
"""Read-only view of search-space-authored skills.
|
||||
|
||||
Wraps a :class:`KBPostgresBackend` and only ever reads under the privileged
|
||||
folder ``/documents/_skills/`` (configurable). The folder is intended to be
|
||||
writable only by search-space admins; this backend never writes.
|
||||
|
||||
The skills middleware expects a layout like::
|
||||
|
||||
/<source_root>/<skill-name>/SKILL.md
|
||||
|
||||
But the KB stores documents like ``/documents/_skills/<name>/SKILL.md``.
|
||||
We expose the inner namespace by remapping each path. When mounted under
|
||||
:class:`CompositeBackend` at prefix ``/skills/space/`` the paths the
|
||||
middleware sees become ``/skills/space/<name>/SKILL.md``; the composite
|
||||
strips ``/skills/space/`` and hands us ``/<name>/SKILL.md``, which we
|
||||
rewrite to ``/documents/_skills/<name>/SKILL.md`` before forwarding to the
|
||||
KB.
|
||||
|
||||
No new database table is needed: the privileged folder convention is
|
||||
enforced server-side outside of this class. We intentionally swallow any
|
||||
write/edit attempts (the base class raises ``NotImplementedError``).
|
||||
"""
|
||||
|
||||
DEFAULT_KB_ROOT: str = "/documents/_skills"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
kb_backend: KBPostgresBackend,
|
||||
*,
|
||||
kb_root: str = DEFAULT_KB_ROOT,
|
||||
) -> None:
|
||||
self._kb = kb_backend
|
||||
# Normalize trailing slash off so we can join cleanly.
|
||||
self._kb_root = kb_root.rstrip("/") or "/"
|
||||
|
||||
def _to_kb(self, path: str) -> str:
|
||||
"""Rewrite a virtual path into the underlying KB namespace."""
|
||||
bare = path.lstrip("/")
|
||||
if not bare:
|
||||
return self._kb_root
|
||||
return f"{self._kb_root}/{bare}"
|
||||
|
||||
def _from_kb(self, kb_path: str) -> str:
|
||||
"""Rewrite a KB path back into our virtual namespace."""
|
||||
if not kb_path.startswith(self._kb_root):
|
||||
return kb_path # pragma: no cover - defensive
|
||||
rel = kb_path[len(self._kb_root) :]
|
||||
return rel if rel.startswith("/") else "/" + rel
|
||||
|
||||
def ls_info(self, path: str) -> list[FileInfo]:
|
||||
# KBPostgresBackend exposes only the async API meaningfully; the sync
|
||||
# path falls back to ``asyncio.to_thread(...)`` in the base class. We
|
||||
# keep this stub to satisfy abstract resolution; the middleware calls
|
||||
# ``als_info``.
|
||||
raise NotImplementedError("SearchSpaceSkillsBackend is async-only")
|
||||
|
||||
async def als_info(self, path: str) -> list[FileInfo]:
|
||||
kb_path = self._to_kb(path)
|
||||
try:
|
||||
infos = await self._kb.als_info(kb_path)
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
logger.warning("SearchSpaceSkillsBackend.als_info failed: %s", exc)
|
||||
return []
|
||||
remapped: list[FileInfo] = []
|
||||
for info in infos:
|
||||
kb_p = info.get("path", "")
|
||||
if not kb_p.startswith(self._kb_root):
|
||||
continue
|
||||
remapped.append({**info, "path": self._from_kb(kb_p)})
|
||||
return remapped
|
||||
|
||||
def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
|
||||
raise NotImplementedError("SearchSpaceSkillsBackend is async-only")
|
||||
|
||||
async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
|
||||
kb_paths = [self._to_kb(p) for p in paths]
|
||||
responses = await self._kb.adownload_files(kb_paths)
|
||||
# Re-map response paths back to the virtual namespace so the middleware
|
||||
# correlates them to the input list correctly.
|
||||
remapped: list[FileDownloadResponse] = []
|
||||
for original, resp in zip(paths, responses, strict=True):
|
||||
remapped.append(replace(resp, path=original))
|
||||
return remapped
|
||||
|
||||
|
||||
SKILLS_BUILTIN_PREFIX = "/skills/builtin/"
|
||||
SKILLS_SPACE_PREFIX = "/skills/space/"
|
||||
|
||||
|
||||
def build_skills_backend_factory(
|
||||
*,
|
||||
builtin_root: Path | str | None = None,
|
||||
search_space_id: int | None = None,
|
||||
) -> Callable[[ToolRuntime], BackendProtocol]:
|
||||
"""Return a runtime-aware factory for the skills :class:`CompositeBackend`.
|
||||
|
||||
When ``search_space_id`` is provided the composite includes a
|
||||
:class:`SearchSpaceSkillsBackend` route at ``/skills/space/`` over a fresh
|
||||
per-runtime :class:`KBPostgresBackend`, mirroring how
|
||||
:func:`build_backend_resolver` constructs the main filesystem backend.
|
||||
|
||||
When ``search_space_id`` is ``None`` (e.g., desktop-local mode or unit
|
||||
tests) only the bundled :class:`BuiltinSkillsBackend` is exposed.
|
||||
|
||||
Returning a factory rather than a fixed instance is intentional: the
|
||||
underlying KB backend depends on per-call ``ToolRuntime`` state
|
||||
(``staged_dirs``, ``files`` cache, runtime config), so a single shared
|
||||
instance cannot serve multiple concurrent agent runs.
|
||||
"""
|
||||
builtin = BuiltinSkillsBackend(builtin_root)
|
||||
|
||||
if search_space_id is None:
|
||||
|
||||
def _factory_builtin_only(runtime: ToolRuntime) -> BackendProtocol:
|
||||
# Default StateBackend is intentionally inert: any path outside the
|
||||
# ``/skills/builtin/`` route resolves to an empty per-runtime state
|
||||
# so the SkillsMiddleware can iterate sources without raising.
|
||||
return CompositeBackend(
|
||||
default=StateBackend(runtime),
|
||||
routes={SKILLS_BUILTIN_PREFIX: builtin},
|
||||
)
|
||||
|
||||
return _factory_builtin_only
|
||||
|
||||
def _factory_with_space(runtime: ToolRuntime) -> BackendProtocol:
|
||||
# Imported lazily to avoid a hard dependency at module import time:
|
||||
# ``KBPostgresBackend`` pulls in DB models, which are unnecessary for
|
||||
# the unit-tested builtin path.
|
||||
from app.agents.shared.middleware.kb_postgres_backend import (
|
||||
KBPostgresBackend,
|
||||
)
|
||||
|
||||
kb = KBPostgresBackend(search_space_id, runtime)
|
||||
space = SearchSpaceSkillsBackend(kb)
|
||||
return CompositeBackend(
|
||||
default=StateBackend(runtime),
|
||||
routes={
|
||||
SKILLS_BUILTIN_PREFIX: builtin,
|
||||
SKILLS_SPACE_PREFIX: space,
|
||||
},
|
||||
)
|
||||
|
||||
return _factory_with_space
|
||||
|
||||
|
||||
def default_skills_sources() -> list[str]:
|
||||
"""Return the canonical source list for SkillsMiddleware (built-in then space)."""
|
||||
return [SKILLS_BUILTIN_PREFIX, SKILLS_SPACE_PREFIX]
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SKILLS_BUILTIN_PREFIX",
|
||||
"SKILLS_SPACE_PREFIX",
|
||||
"BuiltinSkillsBackend",
|
||||
"SearchSpaceSkillsBackend",
|
||||
"build_skills_backend_factory",
|
||||
"default_skills_sources",
|
||||
]
|
||||
|
|
@ -120,7 +120,9 @@ class ToolCallNameRepairMiddleware(
|
|||
# Stage 2 — invalid fallback
|
||||
# Local import keeps the middleware module import-light and avoids any
|
||||
# tools <-> middleware import-order coupling at module scope.
|
||||
from app.agents.shared.tools.invalid_tool import INVALID_TOOL_NAME
|
||||
from app.agents.multi_agent_chat.main_agent.tools.invalid_tool import (
|
||||
INVALID_TOOL_NAME,
|
||||
)
|
||||
|
||||
if INVALID_TOOL_NAME in registered:
|
||||
original_args = call.get("args") or {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue