mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-30 21:59:46 +02:00
chore: ran linting
This commit is contained in:
parent
bc1f31b481
commit
ac0f2fa2eb
33 changed files with 688 additions and 661 deletions
|
|
@ -262,7 +262,11 @@ def _normalize_connectors(
|
|||
valid_set -= _LIVE_SEARCH_CONNECTORS
|
||||
|
||||
if not connectors_to_search:
|
||||
base = list(available_connectors) if available_connectors else list(_ALL_CONNECTORS)
|
||||
base = (
|
||||
list(available_connectors)
|
||||
if available_connectors
|
||||
else list(_ALL_CONNECTORS)
|
||||
)
|
||||
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
|
||||
|
||||
normalized: list[str] = []
|
||||
|
|
@ -291,7 +295,11 @@ def _normalize_connectors(
|
|||
|
||||
# Fallback to all available if nothing matched
|
||||
if not out:
|
||||
base = list(available_connectors) if available_connectors else list(_ALL_CONNECTORS)
|
||||
base = (
|
||||
list(available_connectors)
|
||||
if available_connectors
|
||||
else list(_ALL_CONNECTORS)
|
||||
)
|
||||
return [c for c in base if c not in _LIVE_SEARCH_CONNECTORS]
|
||||
return out
|
||||
|
||||
|
|
|
|||
|
|
@ -68,12 +68,12 @@ from .podcast import create_generate_podcast_tool
|
|||
from .report import create_generate_report_tool
|
||||
from .scrape_webpage import create_scrape_webpage_tool
|
||||
from .search_surfsense_docs import create_search_surfsense_docs_tool
|
||||
from .web_search import create_web_search_tool
|
||||
from .shared_memory import (
|
||||
create_recall_shared_memory_tool,
|
||||
create_save_shared_memory_tool,
|
||||
)
|
||||
from .user_memory import create_recall_memory_tool, create_save_memory_tool
|
||||
from .web_search import create_web_search_tool
|
||||
|
||||
# =============================================================================
|
||||
# Tool Definition
|
||||
|
|
|
|||
|
|
@ -72,20 +72,22 @@ def _format_web_results(
|
|||
continue
|
||||
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||
doc_xml = "\n".join([
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_type>{source}</document_type>",
|
||||
f" <title><![CDATA[{title}]]></title>",
|
||||
f" <url><![CDATA[{url}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"<document_content>",
|
||||
f" <chunk id='{url}'><![CDATA[{content}]]></chunk>",
|
||||
"</document_content>",
|
||||
"</document>",
|
||||
"",
|
||||
])
|
||||
doc_xml = "\n".join(
|
||||
[
|
||||
"<document>",
|
||||
"<document_metadata>",
|
||||
f" <document_type>{source}</document_type>",
|
||||
f" <title><![CDATA[{title}]]></title>",
|
||||
f" <url><![CDATA[{url}]]></url>",
|
||||
f" <metadata_json><![CDATA[{metadata_json}]]></metadata_json>",
|
||||
"</document_metadata>",
|
||||
"<document_content>",
|
||||
f" <chunk id='{url}'><![CDATA[{content}]]></chunk>",
|
||||
"</document_content>",
|
||||
"</document>",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
if total_chars + len(doc_xml) > max_chars:
|
||||
parts.append("<!-- Output truncated to fit context window -->")
|
||||
|
|
@ -152,9 +154,7 @@ def create_web_search_tool(
|
|||
]
|
||||
|
||||
engine_names = ["SearXNG (platform default)"]
|
||||
engine_names.extend(
|
||||
_CONNECTOR_LABELS.get(c, c) for c in active_live_connectors
|
||||
)
|
||||
engine_names.extend(_CONNECTOR_LABELS.get(c, c) for c in active_live_connectors)
|
||||
engines_summary = ", ".join(engine_names)
|
||||
|
||||
description = (
|
||||
|
|
@ -179,10 +179,12 @@ def create_web_search_tool(
|
|||
tasks: list[asyncio.Task[list[dict[str, Any]]]] = []
|
||||
|
||||
if web_search_service.is_available():
|
||||
|
||||
async def _searxng() -> list[dict[str, Any]]:
|
||||
async with semaphore:
|
||||
_result_obj, docs = await web_search_service.search(
|
||||
query=query, top_k=clamped_top_k,
|
||||
query=query,
|
||||
top_k=clamped_top_k,
|
||||
)
|
||||
return docs
|
||||
|
||||
|
|
|
|||
|
|
@ -428,4 +428,4 @@ class ChucksHybridSearchRetriever:
|
|||
search_space_id,
|
||||
document_type,
|
||||
)
|
||||
return final_docs
|
||||
return final_docs
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ import asyncio
|
|||
import time
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import httpx
|
||||
from linkup import LinkupClient
|
||||
|
|
|
|||
|
|
@ -278,7 +278,7 @@ async def get_search_space_llm_instance(
|
|||
"ALIBABA_QWEN": "openai",
|
||||
"MOONSHOT": "openai",
|
||||
"ZHIPU": "openai",
|
||||
"MINIMAX": "openai",
|
||||
"MINIMAX": "openai",
|
||||
}
|
||||
provider_prefix = provider_map.get(
|
||||
global_config["provider"], global_config["provider"].lower()
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ latency overhead.
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
|
|
@ -112,10 +113,8 @@ def _cache_get(key: str) -> dict | None:
|
|||
|
||||
|
||||
def _cache_set(key: str, value: dict) -> None:
|
||||
try:
|
||||
with contextlib.suppress(redis.RedisError):
|
||||
_get_redis().setex(key, _CACHE_TTL_SECONDS, json.dumps(value))
|
||||
except redis.RedisError:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -208,7 +207,9 @@ async def search(
|
|||
try:
|
||||
async with httpx.AsyncClient(timeout=15.0, verify=False) as client:
|
||||
response = await client.get(
|
||||
searx_endpoint, params=params, headers=headers,
|
||||
searx_endpoint,
|
||||
params=params,
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
|
@ -217,7 +218,10 @@ async def search(
|
|||
last_error = exc
|
||||
if attempt == 0 and (
|
||||
isinstance(exc, httpx.TimeoutException)
|
||||
or (isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code >= 500)
|
||||
or (
|
||||
isinstance(exc, httpx.HTTPStatusError)
|
||||
and exc.response.status_code >= 500
|
||||
)
|
||||
):
|
||||
continue
|
||||
break
|
||||
|
|
@ -246,29 +250,33 @@ async def search(
|
|||
source_id = 200_000 + idx
|
||||
description = result.get("content") or result.get("snippet") or ""
|
||||
|
||||
sources_list.append({
|
||||
"id": source_id,
|
||||
"title": result.get("title", "Web Search Result"),
|
||||
"description": description,
|
||||
"url": result.get("url", ""),
|
||||
})
|
||||
|
||||
documents.append({
|
||||
"chunk_id": source_id,
|
||||
"content": description or result.get("content", ""),
|
||||
"score": result.get("score", 0.0),
|
||||
"document": {
|
||||
sources_list.append(
|
||||
{
|
||||
"id": source_id,
|
||||
"title": result.get("title", "Web Search Result"),
|
||||
"document_type": "SEARXNG_API",
|
||||
"metadata": {
|
||||
"url": result.get("url", ""),
|
||||
"engines": result.get("engines", []),
|
||||
"category": result.get("category"),
|
||||
"source": "SEARXNG_API",
|
||||
"description": description,
|
||||
"url": result.get("url", ""),
|
||||
}
|
||||
)
|
||||
|
||||
documents.append(
|
||||
{
|
||||
"chunk_id": source_id,
|
||||
"content": description or result.get("content", ""),
|
||||
"score": result.get("score", 0.0),
|
||||
"document": {
|
||||
"id": source_id,
|
||||
"title": result.get("title", "Web Search Result"),
|
||||
"document_type": "SEARXNG_API",
|
||||
"metadata": {
|
||||
"url": result.get("url", ""),
|
||||
"engines": result.get("engines", []),
|
||||
"category": result.get("category"),
|
||||
"source": "SEARXNG_API",
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
result_object: dict[str, Any] = {
|
||||
"id": 11,
|
||||
|
|
|
|||
|
|
@ -51,9 +51,7 @@ async def safe_set_chunks(
|
|||
from app.db import Chunk
|
||||
|
||||
if document.id is not None:
|
||||
await session.execute(
|
||||
delete(Chunk).where(Chunk.document_id == document.id)
|
||||
)
|
||||
await session.execute(delete(Chunk).where(Chunk.document_id == document.id))
|
||||
for chunk in chunks:
|
||||
chunk.document_id = document.id
|
||||
|
||||
|
|
|
|||
|
|
@ -37,9 +37,7 @@ async def safe_set_chunks(
|
|||
from app.db import Chunk
|
||||
|
||||
if document.id is not None:
|
||||
await session.execute(
|
||||
delete(Chunk).where(Chunk.document_id == document.id)
|
||||
)
|
||||
await session.execute(delete(Chunk).where(Chunk.document_id == document.id))
|
||||
for chunk in chunks:
|
||||
chunk.document_id = document.id
|
||||
|
||||
|
|
|
|||
|
|
@ -9,11 +9,9 @@ import re
|
|||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import delete as sa_delete, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
from sqlalchemy import delete as sa_delete
|
||||
from sqlalchemy.orm.attributes import set_committed_value
|
||||
|
||||
from app.config import config
|
||||
|
|
@ -39,6 +37,7 @@ async def _safe_set_docs_chunks(
|
|||
set_committed_value(document, "chunks", chunks)
|
||||
session.add_all(chunks)
|
||||
|
||||
|
||||
# Path to docs relative to project root
|
||||
DOCS_DIR = (
|
||||
Path(__file__).resolve().parent.parent.parent.parent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue