mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-08 23:32:40 +02:00
Merge remote-tracking branch 'upstream/dev' into fix/memory-extraction
This commit is contained in:
commit
b981b51ab1
176 changed files with 20407 additions and 6258 deletions
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
||||||
0.0.19
|
0.0.20
|
||||||
|
|
|
||||||
|
|
@ -159,10 +159,13 @@ STRIPE_PAGE_BUYING_ENABLED=FALSE
|
||||||
# STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
|
# STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
|
||||||
# STRIPE_RECONCILIATION_BATCH_SIZE=100
|
# STRIPE_RECONCILIATION_BATCH_SIZE=100
|
||||||
|
|
||||||
# Premium token purchases ($1 per 1M tokens for premium-tier models)
|
# Premium credit purchases via Stripe ($1 buys 1_000_000 micro-USD of
|
||||||
|
# credit; premium turns debit the actual per-call provider cost
|
||||||
|
# reported by LiteLLM, so cheap and expensive models bill proportionally)
|
||||||
# STRIPE_TOKEN_BUYING_ENABLED=FALSE
|
# STRIPE_TOKEN_BUYING_ENABLED=FALSE
|
||||||
# STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
|
# STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
|
||||||
# STRIPE_TOKENS_PER_UNIT=1000000
|
# STRIPE_CREDIT_MICROS_PER_UNIT=1000000
|
||||||
|
# DEPRECATED — STRIPE_TOKENS_PER_UNIT=1000000
|
||||||
|
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
# TTS & STT (Text-to-Speech / Speech-to-Text)
|
# TTS & STT (Text-to-Speech / Speech-to-Text)
|
||||||
|
|
@ -305,6 +308,24 @@ STT_SERVICE=local/base
|
||||||
# Advanced (optional)
|
# Advanced (optional)
|
||||||
# ------------------------------------------------------------------------------
|
# ------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# New-chat agent feature flags
|
||||||
|
SURFSENSE_ENABLE_CONTEXT_EDITING=true
|
||||||
|
SURFSENSE_ENABLE_COMPACTION_V2=true
|
||||||
|
SURFSENSE_ENABLE_RETRY_AFTER=true
|
||||||
|
SURFSENSE_ENABLE_MODEL_FALLBACK=false
|
||||||
|
SURFSENSE_ENABLE_MODEL_CALL_LIMIT=true
|
||||||
|
SURFSENSE_ENABLE_TOOL_CALL_LIMIT=true
|
||||||
|
SURFSENSE_ENABLE_TOOL_CALL_REPAIR=true
|
||||||
|
SURFSENSE_ENABLE_BUSY_MUTEX=true
|
||||||
|
SURFSENSE_ENABLE_SKILLS=true
|
||||||
|
SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS=true
|
||||||
|
SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE=true
|
||||||
|
SURFSENSE_ENABLE_ACTION_LOG=true
|
||||||
|
SURFSENSE_ENABLE_REVERT_ROUTE=true
|
||||||
|
SURFSENSE_ENABLE_PERMISSION=true
|
||||||
|
SURFSENSE_ENABLE_DOOM_LOOP=true
|
||||||
|
SURFSENSE_ENABLE_STREAM_PARITY_V2=true
|
||||||
|
|
||||||
# Periodic connector sync interval (default: 5m)
|
# Periodic connector sync interval (default: 5m)
|
||||||
# SCHEDULE_CHECKER_INTERVAL=5m
|
# SCHEDULE_CHECKER_INTERVAL=5m
|
||||||
|
|
||||||
|
|
@ -315,9 +336,24 @@ STT_SERVICE=local/base
|
||||||
# Pages limit per user for ETL (default: unlimited)
|
# Pages limit per user for ETL (default: unlimited)
|
||||||
# PAGES_LIMIT=500
|
# PAGES_LIMIT=500
|
||||||
|
|
||||||
# Premium token quota per registered user (default: 5M)
|
# Premium credit quota per registered user, in micro-USD (default: $5).
|
||||||
# Only applies to models with billing_tier=premium in global_llm_config.yaml
|
# Premium turns are debited at the actual per-call provider cost reported
|
||||||
# PREMIUM_TOKEN_LIMIT=5000000
|
# by LiteLLM. Only applies to models with billing_tier=premium.
|
||||||
|
# PREMIUM_CREDIT_MICROS_LIMIT=5000000
|
||||||
|
# DEPRECATED — PREMIUM_TOKEN_LIMIT=5000000
|
||||||
|
|
||||||
|
# Safety ceiling on per-call premium reservation, in micro-USD ($1.00 default).
|
||||||
|
# QUOTA_MAX_RESERVE_MICROS=1000000
|
||||||
|
|
||||||
|
# Per-image reservation for POST /image-generations, in micro-USD ($0.05 default).
|
||||||
|
# QUOTA_DEFAULT_IMAGE_RESERVE_MICROS=50000
|
||||||
|
|
||||||
|
# Per-podcast reservation for the podcast Celery task ($0.20 default).
|
||||||
|
# QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
|
||||||
|
|
||||||
|
# Per-video-presentation reservation for the video Celery task ($1.00 default).
|
||||||
|
# Override path bypasses QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
|
||||||
|
# QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
|
||||||
|
|
||||||
# No-login (anonymous) mode — public users can chat without an account
|
# No-login (anonymous) mode — public users can chat without an account
|
||||||
# Set TRUE to enable /free pages and anonymous chat API
|
# Set TRUE to enable /free pages and anonymous chat API
|
||||||
|
|
|
||||||
|
|
@ -54,11 +54,15 @@ STRIPE_PAGES_PER_UNIT=1000
|
||||||
# Set FALSE to disable new checkout session creation temporarily
|
# Set FALSE to disable new checkout session creation temporarily
|
||||||
STRIPE_PAGE_BUYING_ENABLED=TRUE
|
STRIPE_PAGE_BUYING_ENABLED=TRUE
|
||||||
|
|
||||||
# Premium token purchases via Stripe (for premium-tier model usage)
|
# Premium credit purchases via Stripe (for premium-tier model usage).
|
||||||
# Set TRUE to allow users to buy premium token packs ($1 per 1M tokens)
|
# Each pack grants STRIPE_CREDIT_MICROS_PER_UNIT micro-USD of credit
|
||||||
|
# (default 1_000_000 = $1.00). Premium turns are billed at the actual
|
||||||
|
# per-call provider cost reported by LiteLLM.
|
||||||
STRIPE_TOKEN_BUYING_ENABLED=FALSE
|
STRIPE_TOKEN_BUYING_ENABLED=FALSE
|
||||||
STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
|
STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
|
||||||
STRIPE_TOKENS_PER_UNIT=1000000
|
STRIPE_CREDIT_MICROS_PER_UNIT=1000000
|
||||||
|
# DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
|
||||||
|
# STRIPE_TOKENS_PER_UNIT=1000000
|
||||||
|
|
||||||
# Periodic Stripe safety net for purchases left in PENDING (minutes old)
|
# Periodic Stripe safety net for purchases left in PENDING (minutes old)
|
||||||
STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
|
STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
|
||||||
|
|
@ -184,9 +188,35 @@ VIDEO_PRESENTATION_DEFAULT_DURATION_IN_FRAMES=300
|
||||||
# (Optional) Maximum pages limit per user for ETL services (default: `999999999` for unlimited in OSS version)
|
# (Optional) Maximum pages limit per user for ETL services (default: `999999999` for unlimited in OSS version)
|
||||||
PAGES_LIMIT=500
|
PAGES_LIMIT=500
|
||||||
|
|
||||||
# Premium token quota per registered user (default: 3,000,000)
|
# Premium credit quota per registered user, in micro-USD
|
||||||
# Applies only to models with billing_tier=premium in global_llm_config.yaml
|
# (default: 5,000,000 == $5.00 of credit). Premium turns are debited at the
|
||||||
PREMIUM_TOKEN_LIMIT=3000000
|
# actual per-call provider cost reported by LiteLLM, so cheap and expensive
|
||||||
|
# models bill proportionally. Applies only to models with
|
||||||
|
# billing_tier=premium in global_llm_config.yaml.
|
||||||
|
PREMIUM_CREDIT_MICROS_LIMIT=5000000
|
||||||
|
# DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
|
||||||
|
# PREMIUM_TOKEN_LIMIT=5000000
|
||||||
|
|
||||||
|
# Safety ceiling on per-call premium reservation, in micro-USD.
|
||||||
|
# stream_new_chat estimates an upper-bound cost from the model's
|
||||||
|
# litellm-published per-token rates × the config's quota_reserve_tokens
|
||||||
|
# and clamps to this value so a misconfigured model can't lock the
|
||||||
|
# user's whole balance on one call. Default $1.00.
|
||||||
|
QUOTA_MAX_RESERVE_MICROS=1000000
|
||||||
|
|
||||||
|
# Per-image reservation (in micro-USD) for the POST /image-generations
|
||||||
|
# endpoint. Bypassed for free configs. Default $0.05.
|
||||||
|
QUOTA_DEFAULT_IMAGE_RESERVE_MICROS=50000
|
||||||
|
|
||||||
|
# Per-podcast reservation (in micro-USD) used by the podcast Celery task.
|
||||||
|
# Single envelope covers one transcript-generation LLM call. Default $0.20.
|
||||||
|
QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
|
||||||
|
|
||||||
|
# Per-video-presentation reservation (in micro-USD) used by the video
|
||||||
|
# presentation Celery task. Covers worst-case fan-out of N slide-scene
|
||||||
|
# generations + refines. Default $1.00. NOTE: tasks using the override
|
||||||
|
# path bypass the QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
|
||||||
|
QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
|
||||||
|
|
||||||
# No-login (anonymous) mode — allows public users to chat without an account
|
# No-login (anonymous) mode — allows public users to chat without an account
|
||||||
# Set TRUE to enable /free pages and anonymous chat API
|
# Set TRUE to enable /free pages and anonymous chat API
|
||||||
|
|
@ -294,3 +324,30 @@ LANGSMITH_PROJECT=surfsense
|
||||||
# SURFSENSE_ENABLE_PLUGIN_LOADER=false
|
# SURFSENSE_ENABLE_PLUGIN_LOADER=false
|
||||||
# Comma-separated allowlist of plugin entry-point names
|
# Comma-separated allowlist of plugin entry-point names
|
||||||
# SURFSENSE_ALLOWED_PLUGINS=year_substituter
|
# SURFSENSE_ALLOWED_PLUGINS=year_substituter
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Compiled-agent cache (Phase 1 + 2 perf optimization, default ON)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# When ON, the per-turn LangGraph + middleware compile result (~3-5s of CPU
|
||||||
|
# on a cold turn) is reused across subsequent turns on the same thread,
|
||||||
|
# collapsing it to a microsecond hash lookup. All connector tools acquire
|
||||||
|
# their own short-lived DB session per call (Phase 2 refactor) so a cached
|
||||||
|
# closure is safe to share across requests. Flip OFF only as a last-resort
|
||||||
|
# rollback if you suspect cache-related staleness.
|
||||||
|
# SURFSENSE_ENABLE_AGENT_CACHE=true
|
||||||
|
|
||||||
|
# Cache capacity (max number of compiled-agent entries kept in memory)
|
||||||
|
# and TTL per entry (seconds). Working set is typically one entry per
|
||||||
|
# active thread on this replica; tune up for very large deployments.
|
||||||
|
# SURFSENSE_AGENT_CACHE_MAXSIZE=256
|
||||||
|
# SURFSENSE_AGENT_CACHE_TTL_SECONDS=1800
|
||||||
|
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Connector discovery TTL cache (Phase 1.4 perf optimization)
|
||||||
|
# -----------------------------------------------------------------------------
|
||||||
|
# Caches the per-search-space "available connectors" + "available document
|
||||||
|
# types" lookups that ``create_surfsense_deep_agent`` hits on every turn.
|
||||||
|
# ORM event listeners auto-invalidate on connector / document inserts,
|
||||||
|
# updates and deletes — the TTL only bounds staleness for bulk-import
|
||||||
|
# paths that bypass the ORM. Set to 0 to disable the cache.
|
||||||
|
# SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS=30
|
||||||
|
|
|
||||||
|
|
@ -38,16 +38,26 @@ RUN pip install --upgrade certifi pip-system-certs
|
||||||
COPY pyproject.toml .
|
COPY pyproject.toml .
|
||||||
COPY uv.lock .
|
COPY uv.lock .
|
||||||
|
|
||||||
# Install PyTorch based on architecture
|
# Install all Python dependencies from uv.lock for deterministic builds.
|
||||||
RUN if [ "$(uname -m)" = "x86_64" ]; then \
|
#
|
||||||
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121; \
|
# `uv pip install -e .` re-resolves from pyproject.toml and ignores uv.lock,
|
||||||
else \
|
# which lets prod silently drift to newer upstream versions on every rebuild
|
||||||
pip install --no-cache-dir torch torchvision torchaudio; \
|
# (e.g. deepagents 0.4.x -> 0.5.x breaking the FilesystemMiddleware imports).
|
||||||
fi
|
# Exporting the lock to requirements.txt and feeding it to `uv pip install`
|
||||||
|
# pins every transitive package to the exact version captured in uv.lock.
|
||||||
# Install python dependencies
|
#
|
||||||
|
# Note on torch/CUDA: we do NOT install torch from a separate cu* index here.
|
||||||
|
# PyPI's torch wheels for Linux x86_64 already ship CUDA-enabled and pull
|
||||||
|
# nvidia-cudnn-cu13, nvidia-nccl-cu13, triton, etc. as install deps (all
|
||||||
|
# captured in uv.lock). Installing from cu121 first only wasted ~2GB of
|
||||||
|
# downloads that the lock-based install immediately replaced. If a specific
|
||||||
|
# CUDA version is needed (driver compatibility, etc.), wire it through
|
||||||
|
# [tool.uv.sources] in pyproject.toml so the lock stays the source of truth.
|
||||||
RUN pip install --no-cache-dir uv && \
|
RUN pip install --no-cache-dir uv && \
|
||||||
uv pip install --system --no-cache-dir -e .
|
uv export --frozen --no-dev --no-hashes --no-emit-project \
|
||||||
|
--format requirements-txt -o /tmp/requirements.txt && \
|
||||||
|
uv pip install --system --no-cache-dir -r /tmp/requirements.txt && \
|
||||||
|
rm /tmp/requirements.txt
|
||||||
|
|
||||||
# Set SSL environment variables dynamically
|
# Set SSL environment variables dynamically
|
||||||
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
|
RUN CERTIFI_PATH=$(python -c "import certifi; print(certifi.where())") && \
|
||||||
|
|
@ -66,13 +76,18 @@ RUN cd /root/.EasyOCR/model && (unzip -o english_g2.zip || true) && (unzip -o cr
|
||||||
# Pre-download Docling models
|
# Pre-download Docling models
|
||||||
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
|
RUN python -c "try:\n from docling.document_converter import DocumentConverter\n conv = DocumentConverter()\nexcept:\n pass" || true
|
||||||
|
|
||||||
# Install Playwright browsers for web scraping if needed
|
# Install Playwright browsers for web scraping (the playwright package itself
|
||||||
RUN pip install playwright && \
|
# is already installed via uv.lock above)
|
||||||
playwright install chromium --with-deps
|
RUN playwright install chromium --with-deps
|
||||||
|
|
||||||
# Copy source code
|
# Copy source code
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
# Install the project itself in editable mode. Dependencies were already
|
||||||
|
# installed deterministically from uv.lock above, so --no-deps prevents any
|
||||||
|
# re-resolution that could pull newer versions.
|
||||||
|
RUN uv pip install --system --no-cache-dir --no-deps -e .
|
||||||
|
|
||||||
# Copy and set permissions for entrypoint script
|
# Copy and set permissions for entrypoint script
|
||||||
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
|
# Use dos2unix to ensure LF line endings (fixes CRLF issues from Windows checkouts)
|
||||||
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
|
COPY scripts/docker/entrypoint.sh /app/scripts/docker/entrypoint.sh
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,291 @@
|
||||||
|
"""rename premium token columns to credit micros and add cost_micros to token_usage
|
||||||
|
|
||||||
|
Migrates the premium quota system from a flat token counter to a USD-cost
|
||||||
|
based credit system, where 1 credit = 1 micro-USD ($0.000001).
|
||||||
|
|
||||||
|
Column renames (1:1 numerical mapping — the prior $1 per 1M tokens Stripe
|
||||||
|
price means every existing value is already correct in the new unit, no
|
||||||
|
data transformation needed):
|
||||||
|
|
||||||
|
user.premium_tokens_limit -> premium_credit_micros_limit
|
||||||
|
user.premium_tokens_used -> premium_credit_micros_used
|
||||||
|
user.premium_tokens_reserved -> premium_credit_micros_reserved
|
||||||
|
|
||||||
|
premium_token_purchases.tokens_granted -> credit_micros_granted
|
||||||
|
|
||||||
|
New column for cost auditing per turn:
|
||||||
|
|
||||||
|
token_usage.cost_micros (BigInteger NOT NULL DEFAULT 0)
|
||||||
|
|
||||||
|
The "user" table is in zero_publication's column list (added in 139), so
|
||||||
|
this migration must drop and recreate the publication with the renamed
|
||||||
|
column names, otherwise zero-cache will replicate stale column names and
|
||||||
|
the FE Zero schema will fail to bind.
|
||||||
|
|
||||||
|
IMPORTANT - before AND after running this migration:
|
||||||
|
1. Stop zero-cache (it holds replication locks that will deadlock DDL)
|
||||||
|
2. Run: alembic upgrade head
|
||||||
|
3. Delete / reset the zero-cache data volume
|
||||||
|
4. Restart zero-cache (it will do a fresh initial sync)
|
||||||
|
|
||||||
|
Skipping the zero-cache stop will deadlock at the ACCESS EXCLUSIVE LOCK on
|
||||||
|
"user". Skipping the data-volume reset will leave IndexedDB clients seeing
|
||||||
|
column-not-found errors from a stale catalog snapshot.
|
||||||
|
|
||||||
|
Revision ID: 140
|
||||||
|
Revises: 139
|
||||||
|
"""
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision: str = "140"
|
||||||
|
down_revision: str | None = "139"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
PUBLICATION_NAME = "zero_publication"
|
||||||
|
|
||||||
|
# Replicates 139's document column list verbatim — must stay in sync.
|
||||||
|
DOCUMENT_COLS = [
|
||||||
|
"id",
|
||||||
|
"title",
|
||||||
|
"document_type",
|
||||||
|
"search_space_id",
|
||||||
|
"folder_id",
|
||||||
|
"created_by_id",
|
||||||
|
"status",
|
||||||
|
"created_at",
|
||||||
|
"updated_at",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Same five live-meter fields as 139, with the renamed column names.
|
||||||
|
USER_COLS = [
|
||||||
|
"id",
|
||||||
|
"pages_limit",
|
||||||
|
"pages_used",
|
||||||
|
"premium_credit_micros_limit",
|
||||||
|
"premium_credit_micros_used",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _terminate_blocked_pids(conn, table: str) -> None:
|
||||||
|
"""Kill backends whose locks on *table* would block our AccessExclusiveLock."""
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"SELECT pg_terminate_backend(l.pid) "
|
||||||
|
"FROM pg_locks l "
|
||||||
|
"JOIN pg_class c ON c.oid = l.relation "
|
||||||
|
"WHERE c.relname = :tbl "
|
||||||
|
" AND l.pid != pg_backend_pid()"
|
||||||
|
),
|
||||||
|
{"tbl": table},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _has_zero_version(conn, table: str) -> bool:
|
||||||
|
return (
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"SELECT 1 FROM information_schema.columns "
|
||||||
|
"WHERE table_name = :tbl AND column_name = '_0_version'"
|
||||||
|
),
|
||||||
|
{"tbl": table},
|
||||||
|
).fetchone()
|
||||||
|
is not None
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _column_exists(conn, table: str, column: str) -> bool:
|
||||||
|
return (
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
"SELECT 1 FROM information_schema.columns "
|
||||||
|
"WHERE table_name = :tbl AND column_name = :col"
|
||||||
|
),
|
||||||
|
{"tbl": table, "col": column},
|
||||||
|
).fetchone()
|
||||||
|
is not None
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_publication_ddl(
|
||||||
|
user_cols: list[str],
|
||||||
|
*,
|
||||||
|
documents_has_zero_ver: bool,
|
||||||
|
user_has_zero_ver: bool,
|
||||||
|
) -> str:
|
||||||
|
doc_cols = DOCUMENT_COLS + (['"_0_version"'] if documents_has_zero_ver else [])
|
||||||
|
user_col_list_with_meta = user_cols + (
|
||||||
|
['"_0_version"'] if user_has_zero_ver else []
|
||||||
|
)
|
||||||
|
doc_col_list = ", ".join(doc_cols)
|
||||||
|
user_col_list = ", ".join(user_col_list_with_meta)
|
||||||
|
return (
|
||||||
|
f"CREATE PUBLICATION {PUBLICATION_NAME} FOR TABLE "
|
||||||
|
f"notifications, "
|
||||||
|
f"documents ({doc_col_list}), "
|
||||||
|
f"folders, "
|
||||||
|
f"search_source_connectors, "
|
||||||
|
f"new_chat_messages, "
|
||||||
|
f"chat_comments, "
|
||||||
|
f"chat_session_state, "
|
||||||
|
f'"user" ({user_col_list})'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
conn = op.get_bind()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 1. Add cost_micros to token_usage. Idempotent guard so re-runs in
|
||||||
|
# dev environments are safe.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if not _column_exists(conn, "token_usage", "cost_micros"):
|
||||||
|
op.add_column(
|
||||||
|
"token_usage",
|
||||||
|
sa.Column(
|
||||||
|
"cost_micros",
|
||||||
|
sa.BigInteger(),
|
||||||
|
nullable=False,
|
||||||
|
server_default="0",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 2. Rename premium_token_purchases.tokens_granted -> credit_micros_granted.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
if _column_exists(
|
||||||
|
conn, "premium_token_purchases", "tokens_granted"
|
||||||
|
) and not _column_exists(conn, "premium_token_purchases", "credit_micros_granted"):
|
||||||
|
op.alter_column(
|
||||||
|
"premium_token_purchases",
|
||||||
|
"tokens_granted",
|
||||||
|
new_column_name="credit_micros_granted",
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# 3. Rename user.premium_tokens_* -> premium_credit_micros_*.
|
||||||
|
#
|
||||||
|
# We must drop the publication first (it references the old column
|
||||||
|
# names) and re-acquire the lock for DDL. asyncpg requires LOCK TABLE
|
||||||
|
# in a transaction block; alembic's outer transaction already holds
|
||||||
|
# one, but a SAVEPOINT keeps the LOCK + DDL atomic.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
|
||||||
|
with tx:
|
||||||
|
conn.execute(sa.text("SET lock_timeout = '10s'"))
|
||||||
|
|
||||||
|
_terminate_blocked_pids(conn, "user")
|
||||||
|
conn.execute(sa.text('LOCK TABLE "user" IN ACCESS EXCLUSIVE MODE'))
|
||||||
|
|
||||||
|
# Re-assert REPLICA IDENTITY DEFAULT for safety; column-list
|
||||||
|
# publications require at least the PK to be in the column list,
|
||||||
|
# which is true for both the old and new shape.
|
||||||
|
conn.execute(sa.text('ALTER TABLE "user" REPLICA IDENTITY DEFAULT'))
|
||||||
|
|
||||||
|
# Drop the publication BEFORE renaming columns, otherwise Postgres
|
||||||
|
# rejects the rename: "cannot drop column ... referenced by
|
||||||
|
# publication".
|
||||||
|
conn.execute(sa.text(f"DROP PUBLICATION IF EXISTS {PUBLICATION_NAME}"))
|
||||||
|
|
||||||
|
for old, new in (
|
||||||
|
("premium_tokens_limit", "premium_credit_micros_limit"),
|
||||||
|
("premium_tokens_used", "premium_credit_micros_used"),
|
||||||
|
("premium_tokens_reserved", "premium_credit_micros_reserved"),
|
||||||
|
):
|
||||||
|
if _column_exists(conn, "user", old) and not _column_exists(
|
||||||
|
conn, "user", new
|
||||||
|
):
|
||||||
|
op.alter_column("user", old, new_column_name=new)
|
||||||
|
|
||||||
|
# Update the server_default on the renamed limit column so newly
|
||||||
|
# inserted users get $5 of credit (== 5_000_000 micros) by
|
||||||
|
# default. Existing rows are unaffected.
|
||||||
|
op.alter_column(
|
||||||
|
"user",
|
||||||
|
"premium_credit_micros_limit",
|
||||||
|
server_default="5000000",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Recreate the publication with the new column names.
|
||||||
|
documents_has_zero_ver = _has_zero_version(conn, "documents")
|
||||||
|
user_has_zero_ver = _has_zero_version(conn, "user")
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
_build_publication_ddl(
|
||||||
|
USER_COLS,
|
||||||
|
documents_has_zero_ver=documents_has_zero_ver,
|
||||||
|
user_has_zero_ver=user_has_zero_ver,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Revert the rename and drop ``cost_micros``.
|
||||||
|
|
||||||
|
Mirrors ``upgrade``: drop the publication, rename columns back, drop
|
||||||
|
the new column, recreate the publication with the old column list.
|
||||||
|
Same zero-cache stop/reset runbook applies in reverse.
|
||||||
|
"""
|
||||||
|
conn = op.get_bind()
|
||||||
|
|
||||||
|
tx = conn.begin_nested() if conn.in_transaction() else conn.begin()
|
||||||
|
with tx:
|
||||||
|
conn.execute(sa.text("SET lock_timeout = '10s'"))
|
||||||
|
|
||||||
|
_terminate_blocked_pids(conn, "user")
|
||||||
|
conn.execute(sa.text('LOCK TABLE "user" IN ACCESS EXCLUSIVE MODE'))
|
||||||
|
|
||||||
|
conn.execute(sa.text(f"DROP PUBLICATION IF EXISTS {PUBLICATION_NAME}"))
|
||||||
|
|
||||||
|
for new, old in (
|
||||||
|
("premium_credit_micros_limit", "premium_tokens_limit"),
|
||||||
|
("premium_credit_micros_used", "premium_tokens_used"),
|
||||||
|
("premium_credit_micros_reserved", "premium_tokens_reserved"),
|
||||||
|
):
|
||||||
|
if _column_exists(conn, "user", new) and not _column_exists(
|
||||||
|
conn, "user", old
|
||||||
|
):
|
||||||
|
op.alter_column("user", new, new_column_name=old)
|
||||||
|
|
||||||
|
op.alter_column(
|
||||||
|
"user",
|
||||||
|
"premium_tokens_limit",
|
||||||
|
server_default="5000000",
|
||||||
|
)
|
||||||
|
|
||||||
|
legacy_user_cols = [
|
||||||
|
"id",
|
||||||
|
"pages_limit",
|
||||||
|
"pages_used",
|
||||||
|
"premium_tokens_limit",
|
||||||
|
"premium_tokens_used",
|
||||||
|
]
|
||||||
|
documents_has_zero_ver = _has_zero_version(conn, "documents")
|
||||||
|
user_has_zero_ver = _has_zero_version(conn, "user")
|
||||||
|
conn.execute(
|
||||||
|
sa.text(
|
||||||
|
_build_publication_ddl(
|
||||||
|
legacy_user_cols,
|
||||||
|
documents_has_zero_ver=documents_has_zero_ver,
|
||||||
|
user_has_zero_ver=user_has_zero_ver,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if _column_exists(
|
||||||
|
conn, "premium_token_purchases", "credit_micros_granted"
|
||||||
|
) and not _column_exists(conn, "premium_token_purchases", "tokens_granted"):
|
||||||
|
op.alter_column(
|
||||||
|
"premium_token_purchases",
|
||||||
|
"credit_micros_granted",
|
||||||
|
new_column_name="tokens_granted",
|
||||||
|
)
|
||||||
|
|
||||||
|
if _column_exists(conn, "token_usage", "cost_micros"):
|
||||||
|
op.drop_column("token_usage", "cost_micros")
|
||||||
357
surfsense_backend/app/agents/new_chat/agent_cache.py
Normal file
357
surfsense_backend/app/agents/new_chat/agent_cache.py
Normal file
|
|
@ -0,0 +1,357 @@
|
||||||
|
"""TTL-LRU cache for compiled SurfSense deep agents.
|
||||||
|
|
||||||
|
Why this exists
|
||||||
|
---------------
|
||||||
|
|
||||||
|
``create_surfsense_deep_agent`` runs a 4-5 second pipeline on EVERY chat
|
||||||
|
turn:
|
||||||
|
|
||||||
|
1. Discover connectors & document types from Postgres (~50-200ms)
|
||||||
|
2. Build the tool list (built-in + MCP) (~200ms-1.7s)
|
||||||
|
3. Compose the system prompt
|
||||||
|
4. Construct ~15 middleware instances (CPU)
|
||||||
|
5. Eagerly compile the general-purpose subagent
|
||||||
|
(``SubAgentMiddleware.__init__`` calls ``create_agent`` synchronously,
|
||||||
|
which builds a second LangGraph + Pydantic schemas — ~1.5-2s of pure
|
||||||
|
CPU work)
|
||||||
|
6. Compile the outer LangGraph
|
||||||
|
|
||||||
|
For a single thread, all six steps produce the SAME object on every turn
|
||||||
|
unless the user has changed their LLM config, toggled a feature flag,
|
||||||
|
added a connector, etc. The right answer is to compile ONCE per
|
||||||
|
"agent shape" and reuse the resulting :class:`CompiledStateGraph` for
|
||||||
|
every subsequent turn on the same thread.
|
||||||
|
|
||||||
|
Why a per-thread key (not a global pool)
|
||||||
|
----------------------------------------
|
||||||
|
|
||||||
|
Most middleware in the SurfSense stack captures per-thread state in
|
||||||
|
``__init__`` closures (``thread_id``, ``user_id``, ``search_space_id``,
|
||||||
|
``filesystem_mode``, ``mentioned_document_ids``). Cross-thread reuse
|
||||||
|
would silently leak state across users and threads. Keying the cache on
|
||||||
|
``(llm_config_id, thread_id, ...)`` gives us safe reuse for repeated
|
||||||
|
turns on the same thread without changing any middleware's behavior.
|
||||||
|
|
||||||
|
Phase 2 will move those captured fields onto :class:`SurfSenseContextSchema`
|
||||||
|
(read via ``runtime.context``) so the cache can collapse to a single
|
||||||
|
``(llm_config_id, search_space_id, ...)`` key shared across threads. Until
|
||||||
|
then, per-thread keying is the only safe option.
|
||||||
|
|
||||||
|
Cache shape
|
||||||
|
-----------
|
||||||
|
|
||||||
|
* TTL-LRU: entries auto-expire after ``ttl_seconds`` (default 1800s, 30
|
||||||
|
minutes — matches a typical chat session). ``maxsize`` (default 256)
|
||||||
|
caps memory; LRU evicts least-recently-used on overflow.
|
||||||
|
* In-flight de-duplication: per-key :class:`asyncio.Lock` so concurrent
|
||||||
|
cold misses on the same key wait for the first build instead of
|
||||||
|
building N times.
|
||||||
|
* Process-local: this is an in-memory cache. Multi-replica deployments
|
||||||
|
pay the build cost once per replica per key. That's fine; the working
|
||||||
|
set per replica is small (one entry per active thread on that replica).
|
||||||
|
|
||||||
|
Telemetry
|
||||||
|
---------
|
||||||
|
|
||||||
|
Every lookup logs ``[agent_cache]`` lines through ``surfsense.perf``:
|
||||||
|
|
||||||
|
* ``hit`` — cache hit, microseconds-fast
|
||||||
|
* ``miss`` — first build for this key, includes build duration
|
||||||
|
* ``stale`` — entry was found but expired; rebuilt
|
||||||
|
* ``evict`` — LRU eviction (size-limited)
|
||||||
|
* ``size`` — current cache occupancy at lookup time
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from collections import OrderedDict
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.utils.perf import get_perf_logger
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
_perf_log = get_perf_logger()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public API: signature helpers (cache key components)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def stable_hash(*parts: Any) -> str:
|
||||||
|
"""Compute a deterministic SHA1 of the str repr of ``parts``.
|
||||||
|
|
||||||
|
Used for cache key components that need a fixed-width representation
|
||||||
|
(system prompt, tool list, etc.). SHA1 is fine here — this is not a
|
||||||
|
security boundary, just a content fingerprint.
|
||||||
|
"""
|
||||||
|
h = hashlib.sha1(usedforsecurity=False)
|
||||||
|
for p in parts:
|
||||||
|
h.update(repr(p).encode("utf-8", errors="replace"))
|
||||||
|
h.update(b"\x1f") # ASCII unit separator between parts
|
||||||
|
return h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def tools_signature(
|
||||||
|
tools: list[Any] | tuple[Any, ...],
|
||||||
|
*,
|
||||||
|
available_connectors: list[str] | None,
|
||||||
|
available_document_types: list[str] | None,
|
||||||
|
) -> str:
|
||||||
|
"""Hash the bound-tool surface for cache-key purposes.
|
||||||
|
|
||||||
|
The signature changes whenever:
|
||||||
|
|
||||||
|
* A tool is added or removed from the bound list (built-in toggles,
|
||||||
|
MCP tools loaded for the user changes, gating rules flip, etc.).
|
||||||
|
* The available connectors / document types for the search space
|
||||||
|
change (new connector added, last connector removed, new document
|
||||||
|
type indexed). Because :func:`get_connector_gated_tools` derives
|
||||||
|
``modified_disabled_tools`` from ``available_connectors``, the
|
||||||
|
tool surface is technically already covered — but we hash the
|
||||||
|
connector list separately so an empty-list "no tools changed"
|
||||||
|
situation still rotates the key when, say, the user re-adds a
|
||||||
|
connector that gates a tool we were already not exposing.
|
||||||
|
|
||||||
|
Stays stable across:
|
||||||
|
|
||||||
|
* Process restarts (tool names + descriptions are static).
|
||||||
|
* Different replicas (everyone gets the same hash for the same
|
||||||
|
inputs).
|
||||||
|
"""
|
||||||
|
tool_descriptors = sorted(
|
||||||
|
(getattr(t, "name", repr(t)), getattr(t, "description", "")) for t in tools
|
||||||
|
)
|
||||||
|
connectors = sorted(available_connectors or [])
|
||||||
|
doc_types = sorted(available_document_types or [])
|
||||||
|
return stable_hash(tool_descriptors, connectors, doc_types)
|
||||||
|
|
||||||
|
|
||||||
|
def flags_signature(flags: Any) -> str:
|
||||||
|
"""Hash the resolved :class:`AgentFeatureFlags` dataclass.
|
||||||
|
|
||||||
|
Frozen dataclasses are deterministically reprable, so a SHA1 of their
|
||||||
|
repr is a stable fingerprint. Restart safe (flags are read once at
|
||||||
|
process boot).
|
||||||
|
"""
|
||||||
|
return stable_hash(repr(flags))
|
||||||
|
|
||||||
|
|
||||||
|
def system_prompt_hash(system_prompt: str) -> str:
|
||||||
|
"""Hash a system prompt string. Cheap, ~30µs for typical prompts."""
|
||||||
|
return hashlib.sha1(
|
||||||
|
system_prompt.encode("utf-8", errors="replace"),
|
||||||
|
usedforsecurity=False,
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Cache implementation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _Entry:
|
||||||
|
value: Any
|
||||||
|
created_at: float
|
||||||
|
last_used_at: float
|
||||||
|
|
||||||
|
|
||||||
|
class _AgentCache:
|
||||||
|
"""In-process TTL-LRU cache with per-key in-flight de-duplication.
|
||||||
|
|
||||||
|
NOT THREAD-SAFE in the multithreading sense — designed for a single
|
||||||
|
asyncio event loop. Uvicorn runs one event loop per worker process,
|
||||||
|
so this is fine; multi-worker deployments simply each maintain their
|
||||||
|
own cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, *, maxsize: int, ttl_seconds: float) -> None:
|
||||||
|
self._maxsize = maxsize
|
||||||
|
self._ttl = ttl_seconds
|
||||||
|
self._entries: OrderedDict[str, _Entry] = OrderedDict()
|
||||||
|
# One lock per key — guards "build" so concurrent cold misses on
|
||||||
|
# the same key wait for the first build instead of all racing.
|
||||||
|
self._locks: dict[str, asyncio.Lock] = {}
|
||||||
|
|
||||||
|
def _now(self) -> float:
|
||||||
|
return time.monotonic()
|
||||||
|
|
||||||
|
def _is_fresh(self, entry: _Entry) -> bool:
|
||||||
|
return (self._now() - entry.created_at) < self._ttl
|
||||||
|
|
||||||
|
def _evict_if_full(self) -> None:
|
||||||
|
while len(self._entries) >= self._maxsize:
|
||||||
|
evicted_key, _ = self._entries.popitem(last=False)
|
||||||
|
self._locks.pop(evicted_key, None)
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] evict key=%s reason=lru size=%d",
|
||||||
|
_short(evicted_key),
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _touch(self, key: str, entry: _Entry) -> None:
|
||||||
|
entry.last_used_at = self._now()
|
||||||
|
self._entries.move_to_end(key, last=True)
|
||||||
|
|
||||||
|
async def get_or_build(
|
||||||
|
self,
|
||||||
|
key: str,
|
||||||
|
*,
|
||||||
|
builder: Callable[[], Awaitable[Any]],
|
||||||
|
) -> Any:
|
||||||
|
"""Return the cached value for ``key`` or call ``builder()`` to make it.
|
||||||
|
|
||||||
|
``builder`` MUST be idempotent — concurrent cold misses on the
|
||||||
|
same key collapse to a single ``builder()`` call (the others
|
||||||
|
wait on the in-flight lock and observe the populated entry on
|
||||||
|
wake).
|
||||||
|
"""
|
||||||
|
# Fast path: hot hit.
|
||||||
|
entry = self._entries.get(key)
|
||||||
|
if entry is not None and self._is_fresh(entry):
|
||||||
|
self._touch(key, entry)
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] hit key=%s age=%.1fs size=%d",
|
||||||
|
_short(key),
|
||||||
|
self._now() - entry.created_at,
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
return entry.value
|
||||||
|
|
||||||
|
# Stale entry — drop it; rebuild below.
|
||||||
|
if entry is not None and not self._is_fresh(entry):
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] stale key=%s age=%.1fs ttl=%.0fs",
|
||||||
|
_short(key),
|
||||||
|
self._now() - entry.created_at,
|
||||||
|
self._ttl,
|
||||||
|
)
|
||||||
|
self._entries.pop(key, None)
|
||||||
|
|
||||||
|
# Slow path: serialize concurrent misses for the same key.
|
||||||
|
lock = self._locks.setdefault(key, asyncio.Lock())
|
||||||
|
async with lock:
|
||||||
|
# Double-check after acquiring the lock — another waiter may
|
||||||
|
# have populated the entry while we slept.
|
||||||
|
entry = self._entries.get(key)
|
||||||
|
if entry is not None and self._is_fresh(entry):
|
||||||
|
self._touch(key, entry)
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] hit key=%s age=%.1fs size=%d coalesced=true",
|
||||||
|
_short(key),
|
||||||
|
self._now() - entry.created_at,
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
return entry.value
|
||||||
|
|
||||||
|
t0 = time.perf_counter()
|
||||||
|
try:
|
||||||
|
value = await builder()
|
||||||
|
except BaseException:
|
||||||
|
# Don't cache failed builds; let the next caller retry.
|
||||||
|
_perf_log.warning(
|
||||||
|
"[agent_cache] build_failed key=%s elapsed=%.3fs",
|
||||||
|
_short(key),
|
||||||
|
time.perf_counter() - t0,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
elapsed = time.perf_counter() - t0
|
||||||
|
|
||||||
|
# Insert + evict.
|
||||||
|
self._evict_if_full()
|
||||||
|
now = self._now()
|
||||||
|
self._entries[key] = _Entry(value=value, created_at=now, last_used_at=now)
|
||||||
|
self._entries.move_to_end(key, last=True)
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] miss key=%s build=%.3fs size=%d",
|
||||||
|
_short(key),
|
||||||
|
elapsed,
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def invalidate(self, key: str) -> bool:
|
||||||
|
"""Drop a single entry; return True if anything was removed."""
|
||||||
|
removed = self._entries.pop(key, None) is not None
|
||||||
|
self._locks.pop(key, None)
|
||||||
|
if removed:
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] invalidate key=%s size=%d",
|
||||||
|
_short(key),
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
return removed
|
||||||
|
|
||||||
|
def invalidate_prefix(self, prefix: str) -> int:
|
||||||
|
"""Drop every entry whose key starts with ``prefix``. Returns count."""
|
||||||
|
keys = [k for k in self._entries if k.startswith(prefix)]
|
||||||
|
for k in keys:
|
||||||
|
self._entries.pop(k, None)
|
||||||
|
self._locks.pop(k, None)
|
||||||
|
if keys:
|
||||||
|
_perf_log.info(
|
||||||
|
"[agent_cache] invalidate_prefix prefix=%s removed=%d size=%d",
|
||||||
|
_short(prefix),
|
||||||
|
len(keys),
|
||||||
|
len(self._entries),
|
||||||
|
)
|
||||||
|
return len(keys)
|
||||||
|
|
||||||
|
def clear(self) -> None:
|
||||||
|
n = len(self._entries)
|
||||||
|
self._entries.clear()
|
||||||
|
self._locks.clear()
|
||||||
|
if n:
|
||||||
|
_perf_log.info("[agent_cache] clear removed=%d", n)
|
||||||
|
|
||||||
|
def stats(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"size": len(self._entries),
|
||||||
|
"maxsize": self._maxsize,
|
||||||
|
"ttl_seconds": self._ttl,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _short(key: str, n: int = 16) -> str:
|
||||||
|
"""Truncate keys for log lines so they don't blow up log volume."""
|
||||||
|
return key if len(key) <= n else f"{key[:n]}..."
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Module-level singleton
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
_DEFAULT_MAXSIZE = int(os.getenv("SURFSENSE_AGENT_CACHE_MAXSIZE", "256"))
|
||||||
|
_DEFAULT_TTL = float(os.getenv("SURFSENSE_AGENT_CACHE_TTL_SECONDS", "1800"))
|
||||||
|
|
||||||
|
_cache: _AgentCache = _AgentCache(maxsize=_DEFAULT_MAXSIZE, ttl_seconds=_DEFAULT_TTL)
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache() -> _AgentCache:
|
||||||
|
"""Return the process-wide compiled-agent cache singleton."""
|
||||||
|
return _cache
|
||||||
|
|
||||||
|
|
||||||
|
def reload_for_tests(*, maxsize: int = 256, ttl_seconds: float = 1800.0) -> _AgentCache:
|
||||||
|
"""Replace the singleton with a fresh cache. Tests only."""
|
||||||
|
global _cache
|
||||||
|
_cache = _AgentCache(maxsize=maxsize, ttl_seconds=ttl_seconds)
|
||||||
|
return _cache
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"flags_signature",
|
||||||
|
"get_cache",
|
||||||
|
"reload_for_tests",
|
||||||
|
"stable_hash",
|
||||||
|
"system_prompt_hash",
|
||||||
|
"tools_signature",
|
||||||
|
]
|
||||||
|
|
@ -40,6 +40,13 @@ from langchain_core.tools import BaseTool
|
||||||
from langgraph.types import Checkpointer
|
from langgraph.types import Checkpointer
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.agents.new_chat.agent_cache import (
|
||||||
|
flags_signature,
|
||||||
|
get_cache,
|
||||||
|
stable_hash,
|
||||||
|
system_prompt_hash,
|
||||||
|
tools_signature,
|
||||||
|
)
|
||||||
from app.agents.new_chat.context import SurfSenseContextSchema
|
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||||
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
||||||
from app.agents.new_chat.filesystem_backends import build_backend_resolver
|
from app.agents.new_chat.filesystem_backends import build_backend_resolver
|
||||||
|
|
@ -53,6 +60,7 @@ from app.agents.new_chat.middleware import (
|
||||||
DedupHITLToolCallsMiddleware,
|
DedupHITLToolCallsMiddleware,
|
||||||
DoomLoopMiddleware,
|
DoomLoopMiddleware,
|
||||||
FileIntentMiddleware,
|
FileIntentMiddleware,
|
||||||
|
FlattenSystemMessageMiddleware,
|
||||||
KnowledgeBasePersistenceMiddleware,
|
KnowledgeBasePersistenceMiddleware,
|
||||||
KnowledgePriorityMiddleware,
|
KnowledgePriorityMiddleware,
|
||||||
KnowledgeTreeMiddleware,
|
KnowledgeTreeMiddleware,
|
||||||
|
|
@ -330,23 +338,39 @@ async def create_surfsense_deep_agent(
|
||||||
else None,
|
else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Discover available connectors and document types for this search space
|
# Discover available connectors and document types for this search space.
|
||||||
|
#
|
||||||
|
# NOTE: These two calls cannot be parallelized via ``asyncio.gather``.
|
||||||
|
# ``ConnectorService`` shares a single ``AsyncSession`` (``self.session``);
|
||||||
|
# SQLAlchemy explicitly forbids concurrent operations on the same session
|
||||||
|
# ("This session is provisioning a new connection; concurrent operations
|
||||||
|
# are not permitted on the same session"). The Phase 1.4 in-process TTL
|
||||||
|
# cache in ``connector_service`` already collapses the warm path to a
|
||||||
|
# near-zero pair of dict lookups, so sequential awaits cost nothing in
|
||||||
|
# the common case while remaining correct on cold cache misses.
|
||||||
available_connectors: list[str] | None = None
|
available_connectors: list[str] | None = None
|
||||||
available_document_types: list[str] | None = None
|
available_document_types: list[str] | None = None
|
||||||
|
|
||||||
_t0 = time.perf_counter()
|
_t0 = time.perf_counter()
|
||||||
try:
|
try:
|
||||||
connector_types = await connector_service.get_available_connectors(
|
try:
|
||||||
|
connector_types_result = await connector_service.get_available_connectors(
|
||||||
search_space_id
|
search_space_id
|
||||||
)
|
)
|
||||||
if connector_types:
|
if connector_types_result:
|
||||||
available_connectors = _map_connectors_to_searchable_types(connector_types)
|
available_connectors = _map_connectors_to_searchable_types(
|
||||||
|
connector_types_result
|
||||||
available_document_types = await connector_service.get_available_document_types(
|
|
||||||
search_space_id
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logging.warning("Failed to discover available connectors: %s", e)
|
||||||
|
|
||||||
|
try:
|
||||||
|
available_document_types = (
|
||||||
|
await connector_service.get_available_document_types(search_space_id)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning("Failed to discover available document types: %s", e)
|
||||||
|
except Exception as e: # pragma: no cover - defensive outer guard
|
||||||
logging.warning(f"Failed to discover available connectors/document types: {e}")
|
logging.warning(f"Failed to discover available connectors/document types: {e}")
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[create_agent] Connector/doc-type discovery in %.3fs",
|
"[create_agent] Connector/doc-type discovery in %.3fs",
|
||||||
|
|
@ -469,8 +493,16 @@ async def create_surfsense_deep_agent(
|
||||||
# entire middleware build + main-graph compile into a single
|
# entire middleware build + main-graph compile into a single
|
||||||
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
|
# ``asyncio.to_thread`` so the heavy CPU work runs off-loop and the
|
||||||
# event loop stays responsive.
|
# event loop stays responsive.
|
||||||
_t0 = time.perf_counter()
|
#
|
||||||
agent = await asyncio.to_thread(
|
# PHASE 1: cache the resulting compiled graph. ``agent_cache`` is keyed
|
||||||
|
# on every per-request value that any middleware in the stack closes
|
||||||
|
# over in ``__init__`` — drop one and you risk leaking state across
|
||||||
|
# threads. Hits collapse this whole block to a microsecond lookup;
|
||||||
|
# misses pay the original CPU cost AND populate the cache.
|
||||||
|
config_id = agent_config.config_id if agent_config is not None else None
|
||||||
|
|
||||||
|
async def _build_agent() -> Any:
|
||||||
|
return await asyncio.to_thread(
|
||||||
_build_compiled_agent_blocking,
|
_build_compiled_agent_blocking,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
|
|
@ -484,14 +516,54 @@ async def create_surfsense_deep_agent(
|
||||||
anon_session_id=anon_session_id,
|
anon_session_id=anon_session_id,
|
||||||
available_connectors=available_connectors,
|
available_connectors=available_connectors,
|
||||||
available_document_types=available_document_types,
|
available_document_types=available_document_types,
|
||||||
|
# ``mentioned_document_ids`` is consumed by
|
||||||
|
# ``KnowledgePriorityMiddleware`` per turn via
|
||||||
|
# ``runtime.context`` (Phase 1.5). We still pass the
|
||||||
|
# caller-provided list here for the legacy fallback path
|
||||||
|
# (cache disabled / context not propagated) — the middleware
|
||||||
|
# drains its own copy after the first read so a cached graph
|
||||||
|
# never replays stale mentions.
|
||||||
mentioned_document_ids=mentioned_document_ids,
|
mentioned_document_ids=mentioned_document_ids,
|
||||||
max_input_tokens=_max_input_tokens,
|
max_input_tokens=_max_input_tokens,
|
||||||
flags=_flags,
|
flags=_flags,
|
||||||
checkpointer=checkpointer,
|
checkpointer=checkpointer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_t0 = time.perf_counter()
|
||||||
|
if _flags.enable_agent_cache and not _flags.disable_new_agent_stack:
|
||||||
|
# Cache key components — order matters only for human readability;
|
||||||
|
# the resulting hash is what's stored. Every component must
|
||||||
|
# rotate on a real shape change AND stay stable across identical
|
||||||
|
# invocations.
|
||||||
|
cache_key = stable_hash(
|
||||||
|
"v1", # schema version of the key — bump if components change
|
||||||
|
config_id,
|
||||||
|
thread_id,
|
||||||
|
user_id,
|
||||||
|
search_space_id,
|
||||||
|
visibility,
|
||||||
|
filesystem_selection.mode,
|
||||||
|
anon_session_id,
|
||||||
|
tools_signature(
|
||||||
|
tools,
|
||||||
|
available_connectors=available_connectors,
|
||||||
|
available_document_types=available_document_types,
|
||||||
|
),
|
||||||
|
flags_signature(_flags),
|
||||||
|
system_prompt_hash(final_system_prompt),
|
||||||
|
_max_input_tokens,
|
||||||
|
# ``mentioned_document_ids`` deliberately omitted — middleware
|
||||||
|
# reads it from ``runtime.context`` (Phase 1.5).
|
||||||
|
)
|
||||||
|
agent = await get_cache().get_or_build(cache_key, builder=_build_agent)
|
||||||
|
else:
|
||||||
|
agent = await _build_agent()
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[create_agent] Middleware stack + graph compiled in %.3fs",
|
"[create_agent] Middleware stack + graph compiled in %.3fs (cache=%s)",
|
||||||
time.perf_counter() - _t0,
|
time.perf_counter() - _t0,
|
||||||
|
"on"
|
||||||
|
if _flags.enable_agent_cache and not _flags.disable_new_agent_stack
|
||||||
|
else "off",
|
||||||
)
|
)
|
||||||
|
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
|
|
@ -1038,6 +1110,14 @@ def _build_compiled_agent_blocking(
|
||||||
noop_mw,
|
noop_mw,
|
||||||
retry_mw,
|
retry_mw,
|
||||||
fallback_mw,
|
fallback_mw,
|
||||||
|
# Coalesce a multi-text-block system message into one block
|
||||||
|
# immediately before the model call. Sits innermost on the
|
||||||
|
# system-message-mutation chain so it observes every appender
|
||||||
|
# (todo / filesystem / skills / subagents …) and prevents
|
||||||
|
# OpenRouter→Anthropic from redistributing ``cache_control``
|
||||||
|
# across N blocks and tripping Anthropic's 4-breakpoint cap.
|
||||||
|
# See ``middleware/flatten_system.py`` for full rationale.
|
||||||
|
FlattenSystemMessageMiddleware(),
|
||||||
# Tool-call repair must run after model emits but before
|
# Tool-call repair must run after model emits but before
|
||||||
# permission / dedup / doom-loop interpret the calls.
|
# permission / dedup / doom-loop interpret the calls.
|
||||||
repair_mw,
|
repair_mw,
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,25 @@
|
||||||
"""
|
"""
|
||||||
Context schema definitions for SurfSense agents.
|
Context schema definitions for SurfSense agents.
|
||||||
|
|
||||||
This module defines the custom state schema used by the SurfSense deep agent.
|
This module defines the per-invocation context object passed to the SurfSense
|
||||||
|
deep agent via ``agent.astream_events(..., context=ctx)`` (LangGraph >= 0.6).
|
||||||
|
|
||||||
|
The agent's compiled graph is the same across invocations (and cached by
|
||||||
|
``agent_cache``), so anything that varies per turn — the user mentions a
|
||||||
|
specific document, the front-end issues a unique ``request_id``, etc. —
|
||||||
|
MUST live on this context object instead of being captured into a
|
||||||
|
middleware ``__init__`` closure. Middlewares read fields back via
|
||||||
|
``runtime.context.<field>``; tools read them via ``runtime.context``.
|
||||||
|
|
||||||
|
This object is read inside both ``KnowledgePriorityMiddleware`` (for
|
||||||
|
``mentioned_document_ids``) and any future middleware that needs
|
||||||
|
per-request state without invalidating the compiled-agent cache.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import NotRequired, TypedDict
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import TypedDict
|
||||||
|
|
||||||
|
|
||||||
class FileOperationContractState(TypedDict):
|
class FileOperationContractState(TypedDict):
|
||||||
|
|
@ -15,25 +30,35 @@ class FileOperationContractState(TypedDict):
|
||||||
turn_id: str
|
turn_id: str
|
||||||
|
|
||||||
|
|
||||||
class SurfSenseContextSchema(TypedDict):
|
@dataclass
|
||||||
|
class SurfSenseContextSchema:
|
||||||
"""
|
"""
|
||||||
Custom state schema for the SurfSense deep agent.
|
Per-invocation context for the SurfSense deep agent.
|
||||||
|
|
||||||
This extends the default agent state with custom fields.
|
Defaults are chosen so the dataclass can be safely default-constructed
|
||||||
The default state already includes:
|
(LangGraph's ``Runtime.context`` itself defaults to ``None`` if no
|
||||||
- messages: Conversation history
|
context is supplied — see ``langgraph.runtime.Runtime``). All fields
|
||||||
- todos: Task list from TodoListMiddleware
|
are optional; consumers must None-check before reading.
|
||||||
- files: Virtual filesystem from FilesystemMiddleware
|
|
||||||
|
|
||||||
We're adding fields needed for knowledge base search:
|
Phase 1.5 fields:
|
||||||
- search_space_id: The user's search space ID
|
search_space_id: Search space the request is scoped to.
|
||||||
- db_session: Database session (injected at runtime)
|
mentioned_document_ids: KB documents the user @-mentioned this turn.
|
||||||
- connector_service: Connector service instance (injected at runtime)
|
Read by ``KnowledgePriorityMiddleware`` to seed its priority
|
||||||
|
list. Stays out of the compiled-agent cache key — that's the
|
||||||
|
whole point of putting it here.
|
||||||
|
file_operation_contract: One-shot file operation contract emitted
|
||||||
|
by ``FileIntentMiddleware`` for the upcoming turn.
|
||||||
|
turn_id / request_id: Correlation IDs surfaced by the streaming
|
||||||
|
task; populated for telemetry.
|
||||||
|
|
||||||
|
Phase 2 will extend with: thread_id, user_id, visibility,
|
||||||
|
filesystem_mode, anon_session_id, available_connectors,
|
||||||
|
available_document_types, created_by_id (everything currently captured
|
||||||
|
by middleware ``__init__`` closures).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_space_id: int
|
search_space_id: int | None = None
|
||||||
file_operation_contract: NotRequired[FileOperationContractState]
|
mentioned_document_ids: list[int] = field(default_factory=list)
|
||||||
turn_id: NotRequired[str]
|
file_operation_contract: FileOperationContractState | None = None
|
||||||
request_id: NotRequired[str]
|
turn_id: str | None = None
|
||||||
# These are runtime-injected and won't be serialized
|
request_id: str | None = None
|
||||||
# db_session and connector_service are passed when invoking the agent
|
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,10 @@ Feature flags for the SurfSense new_chat agent stack.
|
||||||
|
|
||||||
These flags gate the newer agent middleware (some ported from OpenCode,
|
These flags gate the newer agent middleware (some ported from OpenCode,
|
||||||
some sourced from ``langchain.agents.middleware`` / ``deepagents``, some
|
some sourced from ``langchain.agents.middleware`` / ``deepagents``, some
|
||||||
SurfSense-native). They follow a "default-OFF for risky things,
|
SurfSense-native). Most shipped agent-stack upgrades default ON so Docker
|
||||||
default-ON for safe upgrades, master kill-switch for everything new" model.
|
image updates work even when older installs do not have newly introduced
|
||||||
|
environment variables. Risky/experimental integrations stay default OFF,
|
||||||
|
and the master kill-switch can still disable everything new.
|
||||||
|
|
||||||
All new middleware checks its flag at agent build time. If the master
|
All new middleware checks its flag at agent build time. If the master
|
||||||
kill-switch ``SURFSENSE_DISABLE_NEW_AGENT_STACK`` is set, every new
|
kill-switch ``SURFSENSE_DISABLE_NEW_AGENT_STACK`` is set, every new
|
||||||
|
|
@ -14,16 +16,19 @@ operators a single switch to revert to pre-port behavior.
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
|
|
||||||
Local development (recommended for trying everything except doom-loop / selector):
|
Defaults:
|
||||||
|
|
||||||
SURFSENSE_ENABLE_CONTEXT_EDITING=true
|
SURFSENSE_ENABLE_CONTEXT_EDITING=true
|
||||||
SURFSENSE_ENABLE_COMPACTION_V2=true
|
SURFSENSE_ENABLE_COMPACTION_V2=true
|
||||||
SURFSENSE_ENABLE_RETRY_AFTER=true
|
SURFSENSE_ENABLE_RETRY_AFTER=true
|
||||||
|
SURFSENSE_ENABLE_MODEL_FALLBACK=false
|
||||||
|
SURFSENSE_ENABLE_MODEL_CALL_LIMIT=true
|
||||||
|
SURFSENSE_ENABLE_TOOL_CALL_LIMIT=true
|
||||||
SURFSENSE_ENABLE_TOOL_CALL_REPAIR=true
|
SURFSENSE_ENABLE_TOOL_CALL_REPAIR=true
|
||||||
SURFSENSE_ENABLE_PERMISSION=false # default off, opt-in per deploy
|
SURFSENSE_ENABLE_PERMISSION=true
|
||||||
SURFSENSE_ENABLE_DOOM_LOOP=false # default off until UI ships
|
SURFSENSE_ENABLE_DOOM_LOOP=true
|
||||||
SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false
|
SURFSENSE_ENABLE_LLM_TOOL_SELECTOR=false # adds a per-turn LLM call
|
||||||
SURFSENSE_ENABLE_STREAM_PARITY_V2=false # structured streaming events
|
SURFSENSE_ENABLE_STREAM_PARITY_V2=true
|
||||||
|
|
||||||
Master kill-switch (overrides everything else):
|
Master kill-switch (overrides everything else):
|
||||||
|
|
||||||
|
|
@ -60,32 +65,28 @@ class AgentFeatureFlags:
|
||||||
disable_new_agent_stack: bool = False
|
disable_new_agent_stack: bool = False
|
||||||
|
|
||||||
# Agent quality — context budget, retry/limits, name-repair, doom-loop
|
# Agent quality — context budget, retry/limits, name-repair, doom-loop
|
||||||
enable_context_editing: bool = False
|
enable_context_editing: bool = True
|
||||||
enable_compaction_v2: bool = False
|
enable_compaction_v2: bool = True
|
||||||
enable_retry_after: bool = False
|
enable_retry_after: bool = True
|
||||||
enable_model_fallback: bool = False
|
enable_model_fallback: bool = False
|
||||||
enable_model_call_limit: bool = False
|
enable_model_call_limit: bool = True
|
||||||
enable_tool_call_limit: bool = False
|
enable_tool_call_limit: bool = True
|
||||||
enable_tool_call_repair: bool = False
|
enable_tool_call_repair: bool = True
|
||||||
enable_doom_loop: bool = (
|
enable_doom_loop: bool = True
|
||||||
False # Default OFF until UI handles permission='doom_loop'
|
|
||||||
)
|
|
||||||
|
|
||||||
# Safety — permissions, concurrency, tool-set narrowing
|
# Safety — permissions, concurrency, tool-set narrowing
|
||||||
enable_permission: bool = False # Default OFF for first deploy
|
enable_permission: bool = True
|
||||||
enable_busy_mutex: bool = False
|
enable_busy_mutex: bool = True
|
||||||
enable_llm_tool_selector: bool = False # Default OFF — adds per-turn LLM cost
|
enable_llm_tool_selector: bool = False # Default OFF — adds per-turn LLM cost
|
||||||
|
|
||||||
# Skills + subagents
|
# Skills + subagents
|
||||||
enable_skills: bool = False
|
enable_skills: bool = True
|
||||||
enable_specialized_subagents: bool = False
|
enable_specialized_subagents: bool = True
|
||||||
enable_kb_planner_runnable: bool = False
|
enable_kb_planner_runnable: bool = True
|
||||||
|
|
||||||
# Snapshot / revert
|
# Snapshot / revert
|
||||||
enable_action_log: bool = False
|
enable_action_log: bool = True
|
||||||
enable_revert_route: bool = (
|
enable_revert_route: bool = True
|
||||||
False # Backend ships before UI; route returns 503 until this flips
|
|
||||||
)
|
|
||||||
|
|
||||||
# Streaming parity v2 — opt in to LangChain's structured
|
# Streaming parity v2 — opt in to LangChain's structured
|
||||||
# ``AIMessageChunk`` content (typed reasoning blocks, tool-input
|
# ``AIMessageChunk`` content (typed reasoning blocks, tool-input
|
||||||
|
|
@ -94,7 +95,7 @@ class AgentFeatureFlags:
|
||||||
# text path and the synthetic ``call_<run_id>`` tool-call id (no
|
# text path and the synthetic ``call_<run_id>`` tool-call id (no
|
||||||
# ``langchainToolCallId`` propagation). Schema migrations 135/136
|
# ``langchainToolCallId`` propagation). Schema migrations 135/136
|
||||||
# ship unconditionally because they're forward-compatible.
|
# ship unconditionally because they're forward-compatible.
|
||||||
enable_stream_parity_v2: bool = False
|
enable_stream_parity_v2: bool = True
|
||||||
|
|
||||||
# Plugins
|
# Plugins
|
||||||
enable_plugin_loader: bool = False
|
enable_plugin_loader: bool = False
|
||||||
|
|
@ -102,6 +103,41 @@ class AgentFeatureFlags:
|
||||||
# Observability — OTel (orthogonal; also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
# Observability — OTel (orthogonal; also requires OTEL_EXPORTER_OTLP_ENDPOINT)
|
||||||
enable_otel: bool = False
|
enable_otel: bool = False
|
||||||
|
|
||||||
|
# Performance — compiled-agent cache (Phase 1 + Phase 2).
|
||||||
|
# When ON, ``create_surfsense_deep_agent`` reuses a previously-compiled
|
||||||
|
# graph if the cache key matches (LLM config + thread + tool surface +
|
||||||
|
# flags + system prompt + filesystem mode). Cuts per-turn agent-build
|
||||||
|
# wall clock from ~4-5s to <50µs on cache hits.
|
||||||
|
#
|
||||||
|
# SAFETY (Phase 2 unblocked this default-on):
|
||||||
|
# All connector mutation tools (``tools/notion``, ``tools/gmail``,
|
||||||
|
# ``tools/google_drive``, ``tools/dropbox``, ``tools/onedrive``,
|
||||||
|
# ``tools/google_calendar``, ``tools/confluence``, ``tools/discord``,
|
||||||
|
# ``tools/teams``, ``tools/luma``, ``connected_accounts``,
|
||||||
|
# ``update_memory``, ``search_surfsense_docs``) now acquire fresh
|
||||||
|
# short-lived ``AsyncSession`` instances per call via
|
||||||
|
# :data:`async_session_maker`. The factory still accepts ``db_session``
|
||||||
|
# for registry compatibility but ``del``'s it immediately — see any
|
||||||
|
# of those files' factory docstrings for the rationale. The ``llm``
|
||||||
|
# closure is per-(provider, model, config_id) which is already in
|
||||||
|
# the cache key, so the LLM is safe to share across cached hits of
|
||||||
|
# the same key. The KB priority middleware reads
|
||||||
|
# ``mentioned_document_ids`` from ``runtime.context`` (Phase 1.5),
|
||||||
|
# not its constructor closure, so the same compiled agent serves
|
||||||
|
# turns with different mention lists correctly.
|
||||||
|
#
|
||||||
|
# Rollback: set ``SURFSENSE_ENABLE_AGENT_CACHE=false`` in the
|
||||||
|
# environment if a regression surfaces. The path is exercised by
|
||||||
|
# the ``tests/unit/agents/new_chat/test_agent_cache_*`` suite.
|
||||||
|
enable_agent_cache: bool = True
|
||||||
|
# Phase 1 (deferred — measure first): pre-build & share the
|
||||||
|
# general-purpose subagent ``CompiledSubAgent`` across cold-cache
|
||||||
|
# misses. Only helps when the outer cache MISSES (cache hits already
|
||||||
|
# reuse the entire SubAgentMiddleware-compiled graph). Off by default
|
||||||
|
# until we have data showing cold misses are frequent enough to
|
||||||
|
# justify the extra global state.
|
||||||
|
enable_agent_cache_share_gp_subagent: bool = False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_env(cls) -> AgentFeatureFlags:
|
def from_env(cls) -> AgentFeatureFlags:
|
||||||
"""Read flags from environment.
|
"""Read flags from environment.
|
||||||
|
|
@ -115,48 +151,76 @@ class AgentFeatureFlags:
|
||||||
"SURFSENSE_DISABLE_NEW_AGENT_STACK is set: every new agent "
|
"SURFSENSE_DISABLE_NEW_AGENT_STACK is set: every new agent "
|
||||||
"middleware is forced OFF for this build."
|
"middleware is forced OFF for this build."
|
||||||
)
|
)
|
||||||
return cls(disable_new_agent_stack=True)
|
return cls(
|
||||||
|
disable_new_agent_stack=True,
|
||||||
|
enable_context_editing=False,
|
||||||
|
enable_compaction_v2=False,
|
||||||
|
enable_retry_after=False,
|
||||||
|
enable_model_fallback=False,
|
||||||
|
enable_model_call_limit=False,
|
||||||
|
enable_tool_call_limit=False,
|
||||||
|
enable_tool_call_repair=False,
|
||||||
|
enable_doom_loop=False,
|
||||||
|
enable_permission=False,
|
||||||
|
enable_busy_mutex=False,
|
||||||
|
enable_llm_tool_selector=False,
|
||||||
|
enable_skills=False,
|
||||||
|
enable_specialized_subagents=False,
|
||||||
|
enable_kb_planner_runnable=False,
|
||||||
|
enable_action_log=False,
|
||||||
|
enable_revert_route=False,
|
||||||
|
enable_stream_parity_v2=False,
|
||||||
|
enable_plugin_loader=False,
|
||||||
|
enable_otel=False,
|
||||||
|
enable_agent_cache=False,
|
||||||
|
enable_agent_cache_share_gp_subagent=False,
|
||||||
|
)
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
disable_new_agent_stack=False,
|
disable_new_agent_stack=False,
|
||||||
# Agent quality
|
# Agent quality
|
||||||
enable_context_editing=_env_bool("SURFSENSE_ENABLE_CONTEXT_EDITING", False),
|
enable_context_editing=_env_bool("SURFSENSE_ENABLE_CONTEXT_EDITING", True),
|
||||||
enable_compaction_v2=_env_bool("SURFSENSE_ENABLE_COMPACTION_V2", False),
|
enable_compaction_v2=_env_bool("SURFSENSE_ENABLE_COMPACTION_V2", True),
|
||||||
enable_retry_after=_env_bool("SURFSENSE_ENABLE_RETRY_AFTER", False),
|
enable_retry_after=_env_bool("SURFSENSE_ENABLE_RETRY_AFTER", True),
|
||||||
enable_model_fallback=_env_bool("SURFSENSE_ENABLE_MODEL_FALLBACK", False),
|
enable_model_fallback=_env_bool("SURFSENSE_ENABLE_MODEL_FALLBACK", False),
|
||||||
enable_model_call_limit=_env_bool(
|
enable_model_call_limit=_env_bool(
|
||||||
"SURFSENSE_ENABLE_MODEL_CALL_LIMIT", False
|
"SURFSENSE_ENABLE_MODEL_CALL_LIMIT", True
|
||||||
),
|
),
|
||||||
enable_tool_call_limit=_env_bool("SURFSENSE_ENABLE_TOOL_CALL_LIMIT", False),
|
enable_tool_call_limit=_env_bool("SURFSENSE_ENABLE_TOOL_CALL_LIMIT", True),
|
||||||
enable_tool_call_repair=_env_bool(
|
enable_tool_call_repair=_env_bool(
|
||||||
"SURFSENSE_ENABLE_TOOL_CALL_REPAIR", False
|
"SURFSENSE_ENABLE_TOOL_CALL_REPAIR", True
|
||||||
),
|
),
|
||||||
enable_doom_loop=_env_bool("SURFSENSE_ENABLE_DOOM_LOOP", False),
|
enable_doom_loop=_env_bool("SURFSENSE_ENABLE_DOOM_LOOP", True),
|
||||||
# Safety
|
# Safety
|
||||||
enable_permission=_env_bool("SURFSENSE_ENABLE_PERMISSION", False),
|
enable_permission=_env_bool("SURFSENSE_ENABLE_PERMISSION", True),
|
||||||
enable_busy_mutex=_env_bool("SURFSENSE_ENABLE_BUSY_MUTEX", False),
|
enable_busy_mutex=_env_bool("SURFSENSE_ENABLE_BUSY_MUTEX", True),
|
||||||
enable_llm_tool_selector=_env_bool(
|
enable_llm_tool_selector=_env_bool(
|
||||||
"SURFSENSE_ENABLE_LLM_TOOL_SELECTOR", False
|
"SURFSENSE_ENABLE_LLM_TOOL_SELECTOR", False
|
||||||
),
|
),
|
||||||
# Skills + subagents
|
# Skills + subagents
|
||||||
enable_skills=_env_bool("SURFSENSE_ENABLE_SKILLS", False),
|
enable_skills=_env_bool("SURFSENSE_ENABLE_SKILLS", True),
|
||||||
enable_specialized_subagents=_env_bool(
|
enable_specialized_subagents=_env_bool(
|
||||||
"SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", False
|
"SURFSENSE_ENABLE_SPECIALIZED_SUBAGENTS", True
|
||||||
),
|
),
|
||||||
enable_kb_planner_runnable=_env_bool(
|
enable_kb_planner_runnable=_env_bool(
|
||||||
"SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", False
|
"SURFSENSE_ENABLE_KB_PLANNER_RUNNABLE", True
|
||||||
),
|
),
|
||||||
# Snapshot / revert
|
# Snapshot / revert
|
||||||
enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", False),
|
enable_action_log=_env_bool("SURFSENSE_ENABLE_ACTION_LOG", True),
|
||||||
enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", False),
|
enable_revert_route=_env_bool("SURFSENSE_ENABLE_REVERT_ROUTE", True),
|
||||||
# Streaming parity v2
|
# Streaming parity v2
|
||||||
enable_stream_parity_v2=_env_bool(
|
enable_stream_parity_v2=_env_bool(
|
||||||
"SURFSENSE_ENABLE_STREAM_PARITY_V2", False
|
"SURFSENSE_ENABLE_STREAM_PARITY_V2", True
|
||||||
),
|
),
|
||||||
# Plugins
|
# Plugins
|
||||||
enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False),
|
enable_plugin_loader=_env_bool("SURFSENSE_ENABLE_PLUGIN_LOADER", False),
|
||||||
# Observability
|
# Observability
|
||||||
enable_otel=_env_bool("SURFSENSE_ENABLE_OTEL", False),
|
enable_otel=_env_bool("SURFSENSE_ENABLE_OTEL", False),
|
||||||
|
# Performance
|
||||||
|
enable_agent_cache=_env_bool("SURFSENSE_ENABLE_AGENT_CACHE", True),
|
||||||
|
enable_agent_cache_share_gp_subagent=_env_bool(
|
||||||
|
"SURFSENSE_ENABLE_AGENT_CACHE_SHARE_GP_SUBAGENT", False
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def any_new_middleware_enabled(self) -> bool:
|
def any_new_middleware_enabled(self) -> bool:
|
||||||
|
|
|
||||||
|
|
@ -90,41 +90,18 @@ class SanitizedChatLiteLLM(ChatLiteLLM):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
|
||||||
# Provider mapping for LiteLLM model string construction
|
# Provider mapping for LiteLLM model string construction.
|
||||||
PROVIDER_MAP = {
|
#
|
||||||
"OPENAI": "openai",
|
# Single source of truth lives in
|
||||||
"ANTHROPIC": "anthropic",
|
# :mod:`app.services.provider_capabilities` so the YAML loader (which
|
||||||
"GROQ": "groq",
|
# runs during ``app.config`` class-body init) can resolve provider
|
||||||
"COHERE": "cohere",
|
# prefixes without dragging the agent / tools tree into module load
|
||||||
"GOOGLE": "gemini",
|
# order. Re-exported here under the historical ``PROVIDER_MAP`` name
|
||||||
"OLLAMA": "ollama_chat",
|
# so existing callers (``llm_router_service``, ``image_gen_router_service``,
|
||||||
"MISTRAL": "mistral",
|
# tests) keep working unchanged.
|
||||||
"AZURE_OPENAI": "azure",
|
from app.services.provider_capabilities import ( # noqa: E402
|
||||||
"OPENROUTER": "openrouter",
|
_PROVIDER_PREFIX_MAP as PROVIDER_MAP,
|
||||||
"XAI": "xai",
|
)
|
||||||
"BEDROCK": "bedrock",
|
|
||||||
"VERTEX_AI": "vertex_ai",
|
|
||||||
"TOGETHER_AI": "together_ai",
|
|
||||||
"FIREWORKS_AI": "fireworks_ai",
|
|
||||||
"DEEPSEEK": "openai",
|
|
||||||
"ALIBABA_QWEN": "openai",
|
|
||||||
"MOONSHOT": "openai",
|
|
||||||
"ZHIPU": "openai",
|
|
||||||
"GITHUB_MODELS": "github",
|
|
||||||
"REPLICATE": "replicate",
|
|
||||||
"PERPLEXITY": "perplexity",
|
|
||||||
"ANYSCALE": "anyscale",
|
|
||||||
"DEEPINFRA": "deepinfra",
|
|
||||||
"CEREBRAS": "cerebras",
|
|
||||||
"SAMBANOVA": "sambanova",
|
|
||||||
"AI21": "ai21",
|
|
||||||
"CLOUDFLARE": "cloudflare",
|
|
||||||
"DATABRICKS": "databricks",
|
|
||||||
"COMETAPI": "cometapi",
|
|
||||||
"HUGGINGFACE": "huggingface",
|
|
||||||
"MINIMAX": "openai",
|
|
||||||
"CUSTOM": "custom",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
|
def _attach_model_profile(llm: ChatLiteLLM, model_string: str) -> None:
|
||||||
|
|
@ -178,6 +155,17 @@ class AgentConfig:
|
||||||
anonymous_enabled: bool = False
|
anonymous_enabled: bool = False
|
||||||
quota_reserve_tokens: int | None = None
|
quota_reserve_tokens: int | None = None
|
||||||
|
|
||||||
|
# Capability flag: best-effort True for the chat selector / catalog.
|
||||||
|
# Resolved via :func:`provider_capabilities.derive_supports_image_input`
|
||||||
|
# which prefers OpenRouter's ``architecture.input_modalities`` and
|
||||||
|
# otherwise consults LiteLLM's authoritative model map. Default True
|
||||||
|
# is the conservative-allow stance — the streaming-task safety net
|
||||||
|
# (``is_known_text_only_chat_model``) is the *only* place a False
|
||||||
|
# actually blocks a request. Setting this to False here without an
|
||||||
|
# authoritative source would silently hide vision-capable models
|
||||||
|
# (the regression we're fixing).
|
||||||
|
supports_image_input: bool = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_auto_mode(cls) -> "AgentConfig":
|
def from_auto_mode(cls) -> "AgentConfig":
|
||||||
"""
|
"""
|
||||||
|
|
@ -203,6 +191,12 @@ class AgentConfig:
|
||||||
is_premium=False,
|
is_premium=False,
|
||||||
anonymous_enabled=False,
|
anonymous_enabled=False,
|
||||||
quota_reserve_tokens=None,
|
quota_reserve_tokens=None,
|
||||||
|
# Auto routes across the configured pool, which usually
|
||||||
|
# contains at least one vision-capable deployment; the router
|
||||||
|
# will surface a 404 from a non-vision deployment as a normal
|
||||||
|
# ``allowed_fails`` event and fail over rather than blocking
|
||||||
|
# the request outright.
|
||||||
|
supports_image_input=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -216,10 +210,24 @@ class AgentConfig:
|
||||||
Returns:
|
Returns:
|
||||||
AgentConfig instance
|
AgentConfig instance
|
||||||
"""
|
"""
|
||||||
return cls(
|
# Lazy import to avoid pulling provider_capabilities (and its
|
||||||
provider=config.provider.value
|
# transitive litellm import) into module-init order.
|
||||||
|
from app.services.provider_capabilities import derive_supports_image_input
|
||||||
|
|
||||||
|
provider_value = (
|
||||||
|
config.provider.value
|
||||||
if hasattr(config.provider, "value")
|
if hasattr(config.provider, "value")
|
||||||
else str(config.provider),
|
else str(config.provider)
|
||||||
|
)
|
||||||
|
litellm_params = config.litellm_params or {}
|
||||||
|
base_model = (
|
||||||
|
litellm_params.get("base_model")
|
||||||
|
if isinstance(litellm_params, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
provider=provider_value,
|
||||||
model_name=config.model_name,
|
model_name=config.model_name,
|
||||||
api_key=config.api_key,
|
api_key=config.api_key,
|
||||||
api_base=config.api_base,
|
api_base=config.api_base,
|
||||||
|
|
@ -235,6 +243,16 @@ class AgentConfig:
|
||||||
is_premium=False,
|
is_premium=False,
|
||||||
anonymous_enabled=False,
|
anonymous_enabled=False,
|
||||||
quota_reserve_tokens=None,
|
quota_reserve_tokens=None,
|
||||||
|
# BYOK rows have no operator-curated capability flag, so we
|
||||||
|
# ask LiteLLM (default-allow on unknown). The streaming
|
||||||
|
# safety net still blocks if the model is *explicitly*
|
||||||
|
# marked text-only.
|
||||||
|
supports_image_input=derive_supports_image_input(
|
||||||
|
provider=provider_value,
|
||||||
|
model_name=config.model_name,
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=config.custom_provider,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -253,15 +271,46 @@ class AgentConfig:
|
||||||
Returns:
|
Returns:
|
||||||
AgentConfig instance
|
AgentConfig instance
|
||||||
"""
|
"""
|
||||||
|
# Lazy import to avoid pulling provider_capabilities (and its
|
||||||
|
# transitive litellm import) into module-init order.
|
||||||
|
from app.services.provider_capabilities import derive_supports_image_input
|
||||||
|
|
||||||
# Get system instructions from YAML, default to empty string
|
# Get system instructions from YAML, default to empty string
|
||||||
system_instructions = yaml_config.get("system_instructions", "")
|
system_instructions = yaml_config.get("system_instructions", "")
|
||||||
|
|
||||||
|
provider = yaml_config.get("provider", "").upper()
|
||||||
|
model_name = yaml_config.get("model_name", "")
|
||||||
|
custom_provider = yaml_config.get("custom_provider")
|
||||||
|
litellm_params = yaml_config.get("litellm_params") or {}
|
||||||
|
base_model = (
|
||||||
|
litellm_params.get("base_model")
|
||||||
|
if isinstance(litellm_params, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
# Explicit YAML override wins; otherwise derive from LiteLLM /
|
||||||
|
# OpenRouter modalities. The YAML loader already populates this
|
||||||
|
# field, but this method is also called from
|
||||||
|
# ``load_global_llm_config_by_id``'s file fallback (hot reload),
|
||||||
|
# so we re-derive here for safety. The bool() coercion preserves
|
||||||
|
# the loader's behaviour for explicit ``true`` / ``false``
|
||||||
|
# strings that PyYAML may surface.
|
||||||
|
if "supports_image_input" in yaml_config:
|
||||||
|
supports_image_input = bool(yaml_config.get("supports_image_input"))
|
||||||
|
else:
|
||||||
|
supports_image_input = derive_supports_image_input(
|
||||||
|
provider=provider,
|
||||||
|
model_name=model_name,
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=custom_provider,
|
||||||
|
)
|
||||||
|
|
||||||
return cls(
|
return cls(
|
||||||
provider=yaml_config.get("provider", "").upper(),
|
provider=provider,
|
||||||
model_name=yaml_config.get("model_name", ""),
|
model_name=model_name,
|
||||||
api_key=yaml_config.get("api_key", ""),
|
api_key=yaml_config.get("api_key", ""),
|
||||||
api_base=yaml_config.get("api_base"),
|
api_base=yaml_config.get("api_base"),
|
||||||
custom_provider=yaml_config.get("custom_provider"),
|
custom_provider=custom_provider,
|
||||||
litellm_params=yaml_config.get("litellm_params"),
|
litellm_params=yaml_config.get("litellm_params"),
|
||||||
# Prompt configuration from YAML (with defaults for backwards compatibility)
|
# Prompt configuration from YAML (with defaults for backwards compatibility)
|
||||||
system_instructions=system_instructions if system_instructions else None,
|
system_instructions=system_instructions if system_instructions else None,
|
||||||
|
|
@ -276,6 +325,7 @@ class AgentConfig:
|
||||||
is_premium=yaml_config.get("billing_tier", "free") == "premium",
|
is_premium=yaml_config.get("billing_tier", "free") == "premium",
|
||||||
anonymous_enabled=yaml_config.get("anonymous_enabled", False),
|
anonymous_enabled=yaml_config.get("anonymous_enabled", False),
|
||||||
quota_reserve_tokens=yaml_config.get("quota_reserve_tokens"),
|
quota_reserve_tokens=yaml_config.get("quota_reserve_tokens"),
|
||||||
|
supports_image_input=supports_image_input,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,9 @@ from app.agents.new_chat.middleware.file_intent import (
|
||||||
from app.agents.new_chat.middleware.filesystem import (
|
from app.agents.new_chat.middleware.filesystem import (
|
||||||
SurfSenseFilesystemMiddleware,
|
SurfSenseFilesystemMiddleware,
|
||||||
)
|
)
|
||||||
|
from app.agents.new_chat.middleware.flatten_system import (
|
||||||
|
FlattenSystemMessageMiddleware,
|
||||||
|
)
|
||||||
from app.agents.new_chat.middleware.kb_persistence import (
|
from app.agents.new_chat.middleware.kb_persistence import (
|
||||||
KnowledgeBasePersistenceMiddleware,
|
KnowledgeBasePersistenceMiddleware,
|
||||||
commit_staged_filesystem_state,
|
commit_staged_filesystem_state,
|
||||||
|
|
@ -61,6 +64,7 @@ __all__ = [
|
||||||
"DedupHITLToolCallsMiddleware",
|
"DedupHITLToolCallsMiddleware",
|
||||||
"DoomLoopMiddleware",
|
"DoomLoopMiddleware",
|
||||||
"FileIntentMiddleware",
|
"FileIntentMiddleware",
|
||||||
|
"FlattenSystemMessageMiddleware",
|
||||||
"KnowledgeBasePersistenceMiddleware",
|
"KnowledgeBasePersistenceMiddleware",
|
||||||
"KnowledgeBaseSearchMiddleware",
|
"KnowledgeBaseSearchMiddleware",
|
||||||
"KnowledgePriorityMiddleware",
|
"KnowledgePriorityMiddleware",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,233 @@
|
||||||
|
r"""Coalesce multi-block system messages into a single text block.
|
||||||
|
|
||||||
|
Several middlewares in our deepagent stack each call
|
||||||
|
``append_to_system_message`` on the way down to the model
|
||||||
|
(``TodoListMiddleware``, ``SurfSenseFilesystemMiddleware``,
|
||||||
|
``SkillsMiddleware``, ``SubAgentMiddleware`` …). By the time the
|
||||||
|
request reaches the LLM, the system message has 5+ separate text blocks.
|
||||||
|
|
||||||
|
Anthropic enforces a hard cap of **4 ``cache_control`` blocks per
|
||||||
|
request**, and we configure 2 injection points
|
||||||
|
(``index: 0`` + ``index: -1``). With ``index: 0`` always targeting
|
||||||
|
the prepended ``request.system_message``, this middleware is the
|
||||||
|
defensive partner: it guarantees that "the system block" is *one*
|
||||||
|
content block, so LiteLLM's ``AnthropicCacheControlHook`` and any
|
||||||
|
OpenRouter→Anthropic transformer can never multiply our budget into
|
||||||
|
several breakpoints by spreading ``cache_control`` across multiple
|
||||||
|
text blocks of a multi-block system content.
|
||||||
|
|
||||||
|
Without flattening we used to see::
|
||||||
|
|
||||||
|
OpenrouterException - {"error":{"message":"Provider returned error",
|
||||||
|
"code":400,"metadata":{"raw":"...A maximum of 4 blocks with
|
||||||
|
cache_control may be provided. Found 5."}}}
|
||||||
|
|
||||||
|
(Same error class documented in
|
||||||
|
https://github.com/BerriAI/litellm/issues/15696 and
|
||||||
|
https://github.com/BerriAI/litellm/issues/20485 — the litellm-side fix
|
||||||
|
in PR #15395 covers the litellm transformer but does not protect us
|
||||||
|
when the OpenRouter SaaS itself does the redistribution.)
|
||||||
|
|
||||||
|
A separate fix in :mod:`app.agents.new_chat.prompt_caching` (switching
|
||||||
|
the first injection point from ``role: system`` to ``index: 0``)
|
||||||
|
neutralises the *primary* cause of the same 400 — multiple
|
||||||
|
``SystemMessage``\ s injected by ``before_agent`` middlewares
|
||||||
|
(priority/tree/memory/file-intent/anonymous-doc) accumulating across
|
||||||
|
turns, each tagged with ``cache_control`` by the ``role: system``
|
||||||
|
matcher. This middleware remains useful as defence-in-depth against
|
||||||
|
the multi-block redistribution path.
|
||||||
|
|
||||||
|
Placement: innermost on the system-message-mutation chain, after every
|
||||||
|
appender (``todo``/``filesystem``/``skills``/``subagents``) and after
|
||||||
|
summarization, but before ``noop``/``retry``/``fallback`` so each retry
|
||||||
|
attempt sees a flattened payload. See ``chat_deepagent.py``.
|
||||||
|
|
||||||
|
Idempotent: a string-content system message is left untouched. A list
|
||||||
|
that contains anything other than plain text blocks (e.g. an image) is
|
||||||
|
also left untouched — those are rare on system messages and we'd lose
|
||||||
|
the non-text payload by joining.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from langchain.agents.middleware.types import (
|
||||||
|
AgentMiddleware,
|
||||||
|
AgentState,
|
||||||
|
ContextT,
|
||||||
|
ModelRequest,
|
||||||
|
ModelResponse,
|
||||||
|
ResponseT,
|
||||||
|
)
|
||||||
|
from langchain_core.messages import SystemMessage
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _flatten_text_blocks(content: list[Any]) -> str | None:
|
||||||
|
"""Return joined text if every block is a plain ``{"type": "text"}``.
|
||||||
|
|
||||||
|
Returns ``None`` when the list contains anything that isn't a text
|
||||||
|
block we can safely concatenate (image, audio, file, non-standard
|
||||||
|
blocks, dicts with extra non-cache_control fields). The caller
|
||||||
|
leaves the original content untouched in that case rather than
|
||||||
|
silently dropping payload.
|
||||||
|
|
||||||
|
``cache_control`` on individual blocks is intentionally discarded —
|
||||||
|
the whole point of flattening is to let LiteLLM's
|
||||||
|
``cache_control_injection_points`` re-place a single breakpoint on
|
||||||
|
the resulting one-block system content.
|
||||||
|
"""
|
||||||
|
chunks: list[str] = []
|
||||||
|
for block in content:
|
||||||
|
if isinstance(block, str):
|
||||||
|
chunks.append(block)
|
||||||
|
continue
|
||||||
|
if not isinstance(block, dict):
|
||||||
|
return None
|
||||||
|
if block.get("type") != "text":
|
||||||
|
return None
|
||||||
|
text = block.get("text")
|
||||||
|
if not isinstance(text, str):
|
||||||
|
return None
|
||||||
|
chunks.append(text)
|
||||||
|
return "\n\n".join(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
def _flattened_request(
|
||||||
|
request: ModelRequest[ContextT],
|
||||||
|
) -> ModelRequest[ContextT] | None:
|
||||||
|
"""Return a request with system_message flattened, or ``None`` for no-op."""
|
||||||
|
sys_msg = request.system_message
|
||||||
|
if sys_msg is None:
|
||||||
|
return None
|
||||||
|
content = sys_msg.content
|
||||||
|
if not isinstance(content, list) or len(content) <= 1:
|
||||||
|
return None
|
||||||
|
|
||||||
|
flattened = _flatten_text_blocks(content)
|
||||||
|
if flattened is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
new_sys = SystemMessage(
|
||||||
|
content=flattened,
|
||||||
|
additional_kwargs=dict(sys_msg.additional_kwargs),
|
||||||
|
response_metadata=dict(sys_msg.response_metadata),
|
||||||
|
)
|
||||||
|
if sys_msg.id is not None:
|
||||||
|
new_sys.id = sys_msg.id
|
||||||
|
return request.override(system_message=new_sys)
|
||||||
|
|
||||||
|
|
||||||
|
def _diagnostic_summary(request: ModelRequest[Any]) -> str:
|
||||||
|
"""One-line dump of cache_control-relevant request shape.
|
||||||
|
|
||||||
|
Temporary diagnostic to prove where the ``Found N`` cache_control
|
||||||
|
breakpoints are coming from when Anthropic 400s. Removed once the
|
||||||
|
root cause is confirmed and a fix is in place.
|
||||||
|
"""
|
||||||
|
sys_msg = request.system_message
|
||||||
|
if sys_msg is None:
|
||||||
|
sys_shape = "none"
|
||||||
|
elif isinstance(sys_msg.content, str):
|
||||||
|
sys_shape = f"str(len={len(sys_msg.content)})"
|
||||||
|
elif isinstance(sys_msg.content, list):
|
||||||
|
sys_shape = f"list(blocks={len(sys_msg.content)})"
|
||||||
|
else:
|
||||||
|
sys_shape = f"other({type(sys_msg.content).__name__})"
|
||||||
|
|
||||||
|
role_hist: list[str] = []
|
||||||
|
multi_block_msgs = 0
|
||||||
|
msgs_with_cc = 0
|
||||||
|
sys_msgs_in_history = 0
|
||||||
|
for m in request.messages:
|
||||||
|
mtype = getattr(m, "type", type(m).__name__)
|
||||||
|
role_hist.append(mtype)
|
||||||
|
if isinstance(m, SystemMessage):
|
||||||
|
sys_msgs_in_history += 1
|
||||||
|
c = getattr(m, "content", None)
|
||||||
|
if isinstance(c, list):
|
||||||
|
multi_block_msgs += 1
|
||||||
|
for blk in c:
|
||||||
|
if isinstance(blk, dict) and "cache_control" in blk:
|
||||||
|
msgs_with_cc += 1
|
||||||
|
break
|
||||||
|
if "cache_control" in getattr(m, "additional_kwargs", {}) or {}:
|
||||||
|
msgs_with_cc += 1
|
||||||
|
|
||||||
|
tools = request.tools or []
|
||||||
|
tools_with_cc = 0
|
||||||
|
for t in tools:
|
||||||
|
if isinstance(t, dict) and (
|
||||||
|
"cache_control" in t or "cache_control" in t.get("function", {})
|
||||||
|
):
|
||||||
|
tools_with_cc += 1
|
||||||
|
|
||||||
|
return (
|
||||||
|
f"sys={sys_shape} msgs={len(request.messages)} "
|
||||||
|
f"sys_msgs_in_history={sys_msgs_in_history} "
|
||||||
|
f"multi_block_msgs={multi_block_msgs} pre_existing_msg_cc={msgs_with_cc} "
|
||||||
|
f"tools={len(tools)} pre_existing_tool_cc={tools_with_cc} "
|
||||||
|
f"roles={role_hist[-8:]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FlattenSystemMessageMiddleware(
|
||||||
|
AgentMiddleware[AgentState[ResponseT], ContextT, ResponseT]
|
||||||
|
):
|
||||||
|
"""Collapse a multi-text-block system message to a single string.
|
||||||
|
|
||||||
|
Sits innermost on the system-message-mutation chain so it observes
|
||||||
|
every middleware's contribution. Has no other side effect — the
|
||||||
|
body of every block is preserved, just joined with ``"\\n\\n"``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.tools = []
|
||||||
|
|
||||||
|
def wrap_model_call( # type: ignore[override]
|
||||||
|
self,
|
||||||
|
request: ModelRequest[ContextT],
|
||||||
|
handler: Callable[[ModelRequest[ContextT]], ModelResponse[ResponseT]],
|
||||||
|
) -> Any:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
logger.debug("[flatten_system_diag] %s", _diagnostic_summary(request))
|
||||||
|
flattened = _flattened_request(request)
|
||||||
|
if flattened is not None:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
logger.debug(
|
||||||
|
"[flatten_system] collapsed %d system blocks to one",
|
||||||
|
len(request.system_message.content), # type: ignore[arg-type, union-attr]
|
||||||
|
)
|
||||||
|
return handler(flattened)
|
||||||
|
return handler(request)
|
||||||
|
|
||||||
|
async def awrap_model_call( # type: ignore[override]
|
||||||
|
self,
|
||||||
|
request: ModelRequest[ContextT],
|
||||||
|
handler: Callable[
|
||||||
|
[ModelRequest[ContextT]], Awaitable[ModelResponse[ResponseT]]
|
||||||
|
],
|
||||||
|
) -> Any:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
logger.debug("[flatten_system_diag] %s", _diagnostic_summary(request))
|
||||||
|
flattened = _flattened_request(request)
|
||||||
|
if flattened is not None:
|
||||||
|
if logger.isEnabledFor(logging.DEBUG):
|
||||||
|
logger.debug(
|
||||||
|
"[flatten_system] collapsed %d system blocks to one",
|
||||||
|
len(request.system_message.content), # type: ignore[arg-type, union-attr]
|
||||||
|
)
|
||||||
|
return await handler(flattened)
|
||||||
|
return await handler(request)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"FlattenSystemMessageMiddleware",
|
||||||
|
"_flatten_text_blocks",
|
||||||
|
"_flattened_request",
|
||||||
|
]
|
||||||
|
|
@ -732,7 +732,6 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
state: AgentState,
|
state: AgentState,
|
||||||
runtime: Runtime[Any],
|
runtime: Runtime[Any],
|
||||||
) -> dict[str, Any] | None:
|
) -> dict[str, Any] | None:
|
||||||
del runtime
|
|
||||||
if self.filesystem_mode != FilesystemMode.CLOUD:
|
if self.filesystem_mode != FilesystemMode.CLOUD:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -755,7 +754,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
if anon_doc:
|
if anon_doc:
|
||||||
return self._anon_priority(state, anon_doc)
|
return self._anon_priority(state, anon_doc)
|
||||||
|
|
||||||
return await self._authenticated_priority(state, messages, user_text)
|
return await self._authenticated_priority(state, messages, user_text, runtime)
|
||||||
|
|
||||||
def _anon_priority(
|
def _anon_priority(
|
||||||
self,
|
self,
|
||||||
|
|
@ -787,6 +786,7 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
state: AgentState,
|
state: AgentState,
|
||||||
messages: Sequence[BaseMessage],
|
messages: Sequence[BaseMessage],
|
||||||
user_text: str,
|
user_text: str,
|
||||||
|
runtime: Runtime[Any] | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
t0 = asyncio.get_event_loop().time()
|
t0 = asyncio.get_event_loop().time()
|
||||||
(
|
(
|
||||||
|
|
@ -799,13 +799,45 @@ class KnowledgePriorityMiddleware(AgentMiddleware): # type: ignore[type-arg]
|
||||||
user_text=user_text,
|
user_text=user_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
mentioned_results: list[dict[str, Any]] = []
|
# Per-turn ``mentioned_document_ids`` flow:
|
||||||
|
# 1. Preferred path (Phase 1.5+): read from ``runtime.context`` — the
|
||||||
|
# streaming task supplies a fresh :class:`SurfSenseContextSchema`
|
||||||
|
# on every ``astream_events`` call, so this list is naturally
|
||||||
|
# scoped to the current turn. Allows cross-turn graph reuse via
|
||||||
|
# ``agent_cache``.
|
||||||
|
# 2. Legacy fallback (cache disabled / context not propagated): the
|
||||||
|
# constructor-injected ``self.mentioned_document_ids`` list. We
|
||||||
|
# drain it after the first read so a cached graph (no Phase 1.5
|
||||||
|
# wiring) doesn't keep replaying the same mentions on every
|
||||||
|
# turn.
|
||||||
|
#
|
||||||
|
# CRITICAL: distinguish "context absent" (legacy caller, no field at
|
||||||
|
# all) from "context provided but empty" (turn with no mentions).
|
||||||
|
# ``ctx_mentions`` is a ``list[int]``; an empty list is falsy in
|
||||||
|
# Python, so a naive ``if ctx_mentions:`` would fall through to the
|
||||||
|
# legacy closure on every no-mention follow-up turn — replaying the
|
||||||
|
# mentions baked in by turn 1's cache-miss build. Always drain the
|
||||||
|
# closure once the runtime path has fired so a cached middleware
|
||||||
|
# instance can never resurrect stale state.
|
||||||
|
mention_ids: list[int] = []
|
||||||
|
ctx = getattr(runtime, "context", None) if runtime is not None else None
|
||||||
|
ctx_mentions = getattr(ctx, "mentioned_document_ids", None) if ctx else None
|
||||||
|
if ctx_mentions is not None:
|
||||||
|
# Runtime path is authoritative — even an empty list means
|
||||||
|
# "this turn has no mentions", NOT "look at the closure".
|
||||||
|
mention_ids = list(ctx_mentions)
|
||||||
if self.mentioned_document_ids:
|
if self.mentioned_document_ids:
|
||||||
|
self.mentioned_document_ids = []
|
||||||
|
elif self.mentioned_document_ids:
|
||||||
|
mention_ids = list(self.mentioned_document_ids)
|
||||||
|
self.mentioned_document_ids = []
|
||||||
|
|
||||||
|
mentioned_results: list[dict[str, Any]] = []
|
||||||
|
if mention_ids:
|
||||||
mentioned_results = await fetch_mentioned_documents(
|
mentioned_results = await fetch_mentioned_documents(
|
||||||
document_ids=self.mentioned_document_ids,
|
document_ids=mention_ids,
|
||||||
search_space_id=self.search_space_id,
|
search_space_id=self.search_space_id,
|
||||||
)
|
)
|
||||||
self.mentioned_document_ids = []
|
|
||||||
|
|
||||||
if is_recency:
|
if is_recency:
|
||||||
doc_types = _resolve_search_types(
|
doc_types = _resolve_search_types(
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
"""LiteLLM-native prompt caching configuration for SurfSense agents.
|
r"""LiteLLM-native prompt caching configuration for SurfSense agents.
|
||||||
|
|
||||||
Replaces the legacy ``AnthropicPromptCachingMiddleware`` (which never
|
Replaces the legacy ``AnthropicPromptCachingMiddleware`` (which never
|
||||||
activated for our LiteLLM-based stack — its ``isinstance(model, ChatAnthropic)``
|
activated for our LiteLLM-based stack — its ``isinstance(model, ChatAnthropic)``
|
||||||
|
|
@ -17,8 +17,20 @@ Coverage:
|
||||||
|
|
||||||
We inject **two** breakpoints per request:
|
We inject **two** breakpoints per request:
|
||||||
|
|
||||||
- ``role: system`` — pins the SurfSense system prompt (provider variant,
|
- ``index: 0`` — pins the SurfSense system prompt at the head of the
|
||||||
citation rules, tool catalog, KB tree, skills metadata) into the cache.
|
request (provider variant, citation rules, tool catalog, KB tree,
|
||||||
|
skills metadata). The langchain agent factory always prepends
|
||||||
|
``request.system_message`` at index 0 (see ``factory.py``
|
||||||
|
``_execute_model_async``), so this targets exactly the main system
|
||||||
|
prompt regardless of how many other ``SystemMessage``\ s the
|
||||||
|
``before_agent`` injectors (priority, tree, memory, file-intent,
|
||||||
|
anonymous-doc) have inserted into ``state["messages"]``. Using
|
||||||
|
``role: system`` here would apply ``cache_control`` to **every**
|
||||||
|
system-role message and trip Anthropic's hard cap of 4 cache
|
||||||
|
breakpoints per request once the conversation accumulates enough
|
||||||
|
injected system messages — which surfaces as the upstream 400
|
||||||
|
``A maximum of 4 blocks with cache_control may be provided. Found N``
|
||||||
|
via OpenRouter→Anthropic.
|
||||||
- ``index: -1`` — pins the latest message so multi-turn savings compound:
|
- ``index: -1`` — pins the latest message so multi-turn savings compound:
|
||||||
Anthropic-family providers use longest-matching-prefix lookup, so turn
|
Anthropic-family providers use longest-matching-prefix lookup, so turn
|
||||||
N+1 still reads turn N's cache up to the shared prefix.
|
N+1 still reads turn N's cache up to the shared prefix.
|
||||||
|
|
@ -51,11 +63,21 @@ if TYPE_CHECKING:
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Two-breakpoint policy: system + latest message. See module docstring for
|
# Two-breakpoint policy: head-of-request + latest message. See module
|
||||||
# rationale. Anthropic limits requests to 4 ``cache_control`` blocks; we
|
# docstring for rationale. Anthropic caps requests at 4 ``cache_control``
|
||||||
# use 2 here, leaving headroom for Phase-2 tool caching.
|
# blocks; we use 2 here, leaving headroom for Phase-2 tool caching.
|
||||||
|
#
|
||||||
|
# IMPORTANT: ``index: 0`` (not ``role: system``). The deepagent stack's
|
||||||
|
# ``before_agent`` middlewares (priority, tree, memory, file-intent,
|
||||||
|
# anonymous-doc) insert ``SystemMessage`` instances into
|
||||||
|
# ``state["messages"]`` that accumulate across turns. With
|
||||||
|
# ``role: system`` the LiteLLM hook would tag *every* one of them with
|
||||||
|
# ``cache_control`` and overflow Anthropic's 4-block limit. ``index: 0``
|
||||||
|
# always targets the langchain-prepended ``request.system_message``
|
||||||
|
# (which our ``FlattenSystemMessageMiddleware`` reduces to a single text
|
||||||
|
# block), giving us exactly one stable cache breakpoint.
|
||||||
_DEFAULT_INJECTION_POINTS: tuple[dict[str, Any], ...] = (
|
_DEFAULT_INJECTION_POINTS: tuple[dict[str, Any], ...] = (
|
||||||
{"location": "message", "role": "system"},
|
{"location": "message", "index": 0},
|
||||||
{"location": "message", "index": -1},
|
{"location": "message", "index": -1},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.confluence import ConfluenceToolMetadataService
|
from app.services.confluence import ConfluenceToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -18,6 +19,23 @@ def create_create_confluence_page_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_confluence_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_confluence_page tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_confluence_page(
|
async def create_confluence_page(
|
||||||
title: str,
|
title: str,
|
||||||
|
|
@ -42,13 +60,14 @@ def create_create_confluence_page_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"create_confluence_page called: title='{title}'")
|
logger.info(f"create_confluence_page called: title='{title}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Confluence tool not properly configured.",
|
"message": "Confluence tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = ConfluenceToolMetadataService(db_session)
|
metadata_service = ConfluenceToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
|
|
@ -183,7 +202,9 @@ def create_create_confluence_page_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This page will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This page will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.confluence import ConfluenceToolMetadataService
|
from app.services.confluence import ConfluenceToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -18,6 +19,23 @@ def create_delete_confluence_page_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the delete_confluence_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_confluence_page tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_confluence_page(
|
async def delete_confluence_page(
|
||||||
page_title_or_id: str,
|
page_title_or_id: str,
|
||||||
|
|
@ -43,13 +61,14 @@ def create_delete_confluence_page_tool(
|
||||||
f"delete_confluence_page called: page_title_or_id='{page_title_or_id}'"
|
f"delete_confluence_page called: page_title_or_id='{page_title_or_id}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Confluence tool not properly configured.",
|
"message": "Confluence tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = ConfluenceToolMetadataService(db_session)
|
metadata_service = ConfluenceToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_deletion_context(
|
context = await metadata_service.get_deletion_context(
|
||||||
search_space_id, user_id, page_title_or_id
|
search_space_id, user_id, page_title_or_id
|
||||||
|
|
@ -95,7 +114,9 @@ def create_delete_confluence_page_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
|
|
@ -135,7 +156,10 @@ def create_delete_confluence_page_tool(
|
||||||
or "status code 403" in str(api_err).lower()
|
or "status code 403" in str(api_err).lower()
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
connector.config = {**connector.config, "auth_expired": True}
|
connector.config = {
|
||||||
|
**connector.config,
|
||||||
|
"auth_expired": True,
|
||||||
|
}
|
||||||
flag_modified(connector, "config")
|
flag_modified(connector, "config")
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
from app.connectors.confluence_history import ConfluenceHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.confluence import ConfluenceToolMetadataService
|
from app.services.confluence import ConfluenceToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -18,6 +19,23 @@ def create_update_confluence_page_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the update_confluence_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_confluence_page tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_confluence_page(
|
async def update_confluence_page(
|
||||||
page_title_or_id: str,
|
page_title_or_id: str,
|
||||||
|
|
@ -45,13 +63,14 @@ def create_update_confluence_page_tool(
|
||||||
f"update_confluence_page called: page_title_or_id='{page_title_or_id}'"
|
f"update_confluence_page called: page_title_or_id='{page_title_or_id}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Confluence tool not properly configured.",
|
"message": "Confluence tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = ConfluenceToolMetadataService(db_session)
|
metadata_service = ConfluenceToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, page_title_or_id
|
search_space_id, user_id, page_title_or_id
|
||||||
|
|
@ -152,7 +171,10 @@ def create_update_confluence_page_tool(
|
||||||
or "status code 403" in str(api_err).lower()
|
or "status code 403" in str(api_err).lower()
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
connector.config = {**connector.config, "auth_expired": True}
|
connector.config = {
|
||||||
|
**connector.config,
|
||||||
|
"auth_expired": True,
|
||||||
|
}
|
||||||
flag_modified(connector, "config")
|
flag_modified(connector, "config")
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from pydantic import BaseModel, Field
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
from app.services.mcp_oauth.registry import MCP_SERVICES
|
from app.services.mcp_oauth.registry import MCP_SERVICES
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -53,6 +53,23 @@ def create_get_connected_accounts_tool(
|
||||||
search_space_id: int,
|
search_space_id: int,
|
||||||
user_id: str,
|
user_id: str,
|
||||||
) -> StructuredTool:
|
) -> StructuredTool:
|
||||||
|
"""Factory function to create the get_connected_accounts tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to scope account discovery to.
|
||||||
|
user_id: User ID to scope account discovery to.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured StructuredTool for connected-accounts discovery.
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
async def _run(service: str) -> list[dict[str, Any]]:
|
async def _run(service: str) -> list[dict[str, Any]]:
|
||||||
svc_cfg = MCP_SERVICES.get(service)
|
svc_cfg = MCP_SERVICES.get(service)
|
||||||
|
|
@ -68,6 +85,7 @@ def create_get_connected_accounts_tool(
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return [{"error": f"Connector type '{svc_cfg.connector_type}' not found."}]
|
return [{"error": f"Connector type '{svc_cfg.connector_type}' not found."}]
|
||||||
|
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import DISCORD_API, get_bot_token, get_discord_connector, get_guild_id
|
from ._auth import DISCORD_API, get_bot_token, get_discord_connector, get_guild_id
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_list_discord_channels_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the list_discord_channels tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured list_discord_channels tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def list_discord_channels() -> dict[str, Any]:
|
async def list_discord_channels() -> dict[str, Any]:
|
||||||
"""List text channels in the connected Discord server.
|
"""List text channels in the connected Discord server.
|
||||||
|
|
@ -22,13 +41,14 @@ def create_list_discord_channels_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with status and a list of channels (id, name).
|
Dictionary with status and a list of channels (id, name).
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Discord tool not properly configured.",
|
"message": "Discord tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
connector = await get_discord_connector(
|
connector = await get_discord_connector(
|
||||||
db_session, search_space_id, user_id
|
db_session, search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import DISCORD_API, get_bot_token, get_discord_connector
|
from ._auth import DISCORD_API, get_bot_token, get_discord_connector
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_read_discord_messages_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the read_discord_messages tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured read_discord_messages tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def read_discord_messages(
|
async def read_discord_messages(
|
||||||
channel_id: str,
|
channel_id: str,
|
||||||
|
|
@ -30,7 +49,7 @@ def create_read_discord_messages_tool(
|
||||||
Dictionary with status and a list of messages including
|
Dictionary with status and a list of messages including
|
||||||
id, author, content, timestamp.
|
id, author, content, timestamp.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Discord tool not properly configured.",
|
"message": "Discord tool not properly configured.",
|
||||||
|
|
@ -39,6 +58,7 @@ def create_read_discord_messages_tool(
|
||||||
limit = min(limit, 50)
|
limit = min(limit, 50)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
connector = await get_discord_connector(
|
connector = await get_discord_connector(
|
||||||
db_session, search_space_id, user_id
|
db_session, search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import DISCORD_API, get_bot_token, get_discord_connector
|
from ._auth import DISCORD_API, get_bot_token, get_discord_connector
|
||||||
|
|
||||||
|
|
@ -17,6 +18,23 @@ def create_send_discord_message_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the send_discord_message tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured send_discord_message tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def send_discord_message(
|
async def send_discord_message(
|
||||||
channel_id: str,
|
channel_id: str,
|
||||||
|
|
@ -34,7 +52,7 @@ def create_send_discord_message_tool(
|
||||||
IMPORTANT:
|
IMPORTANT:
|
||||||
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Discord tool not properly configured.",
|
"message": "Discord tool not properly configured.",
|
||||||
|
|
@ -47,6 +65,7 @@ def create_send_discord_message_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
connector = await get_discord_connector(
|
connector = await get_discord_connector(
|
||||||
db_session, search_space_id, user_id
|
db_session, search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.dropbox.client import DropboxClient
|
from app.connectors.dropbox.client import DropboxClient
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -59,6 +59,23 @@ def create_create_dropbox_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_dropbox_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_dropbox_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_dropbox_file(
|
async def create_dropbox_file(
|
||||||
name: str,
|
name: str,
|
||||||
|
|
@ -82,13 +99,14 @@ def create_create_dropbox_file_tool(
|
||||||
f"create_dropbox_file called: name='{name}', file_type='{file_type}'"
|
f"create_dropbox_file called: name='{name}', file_type='{file_type}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Dropbox tool not properly configured.",
|
"message": "Dropbox tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
@ -149,7 +167,9 @@ def create_create_dropbox_file_tool(
|
||||||
]
|
]
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Error fetching folders for connector %s", cid, exc_info=True
|
"Error fetching folders for connector %s",
|
||||||
|
cid,
|
||||||
|
exc_info=True,
|
||||||
)
|
)
|
||||||
parent_folders[cid] = []
|
parent_folders[cid] = []
|
||||||
|
|
||||||
|
|
@ -217,7 +237,9 @@ def create_create_dropbox_file_tool(
|
||||||
)
|
)
|
||||||
|
|
||||||
if final_file_type == "paper":
|
if final_file_type == "paper":
|
||||||
created = await client.create_paper_doc(file_path, final_content or "")
|
created = await client.create_paper_doc(
|
||||||
|
file_path, final_content or ""
|
||||||
|
)
|
||||||
file_id = created.get("file_id", "")
|
file_id = created.get("file_id", "")
|
||||||
web_url = created.get("url", "")
|
web_url = created.get("url", "")
|
||||||
else:
|
else:
|
||||||
|
|
@ -246,7 +268,9 @@ def create_create_dropbox_file_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from app.db import (
|
||||||
DocumentType,
|
DocumentType,
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
|
async_session_maker,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -23,6 +24,23 @@ def create_delete_dropbox_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the delete_dropbox_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_dropbox_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_dropbox_file(
|
async def delete_dropbox_file(
|
||||||
file_name: str,
|
file_name: str,
|
||||||
|
|
@ -55,13 +73,14 @@ def create_delete_dropbox_file_tool(
|
||||||
f"delete_dropbox_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
f"delete_dropbox_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Dropbox tool not properly configured.",
|
"message": "Dropbox tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
doc_result = await db_session.execute(
|
doc_result = await db_session.execute(
|
||||||
select(Document)
|
select(Document)
|
||||||
.join(
|
.join(
|
||||||
|
|
@ -193,14 +212,17 @@ def create_delete_dropbox_file_tool(
|
||||||
|
|
||||||
final_file_path = result.params.get("file_path", file_path)
|
final_file_path = result.params.get("file_path", file_path)
|
||||||
final_connector_id = result.params.get("connector_id", connector.id)
|
final_connector_id = result.params.get("connector_id", connector.id)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
if final_connector_id != connector.id:
|
if final_connector_id != connector.id:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
and_(
|
and_(
|
||||||
SearchSourceConnector.id == final_connector_id,
|
SearchSourceConnector.id == final_connector_id,
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id
|
||||||
|
== search_space_id,
|
||||||
SearchSourceConnector.user_id == user_id,
|
SearchSourceConnector.user_id == user_id,
|
||||||
SearchSourceConnector.connector_type
|
SearchSourceConnector.connector_type
|
||||||
== SearchSourceConnectorType.DROPBOX_CONNECTOR,
|
== SearchSourceConnectorType.DROPBOX_CONNECTOR,
|
||||||
|
|
@ -221,7 +243,9 @@ def create_delete_dropbox_file_tool(
|
||||||
f"Deleting Dropbox file: path='{final_file_path}', connector={actual_connector_id}"
|
f"Deleting Dropbox file: path='{final_file_path}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
client = DropboxClient(session=db_session, connector_id=actual_connector_id)
|
client = DropboxClient(
|
||||||
|
session=db_session, connector_id=actual_connector_id
|
||||||
|
)
|
||||||
await client.delete_file(final_file_path)
|
await client.delete_file(final_file_path)
|
||||||
|
|
||||||
logger.info(f"Dropbox file deleted: path={final_file_path}")
|
logger.info(f"Dropbox file deleted: path={final_file_path}")
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ from app.services.image_gen_router_service import (
|
||||||
ImageGenRouterService,
|
ImageGenRouterService,
|
||||||
is_image_gen_auto_mode,
|
is_image_gen_auto_mode,
|
||||||
)
|
)
|
||||||
|
from app.services.provider_api_base import resolve_api_base
|
||||||
from app.utils.signed_image_urls import generate_image_token
|
from app.utils.signed_image_urls import generate_image_token
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -49,12 +50,16 @@ _PROVIDER_MAP = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_provider_prefix(provider: str, custom_provider: str | None) -> str:
|
||||||
|
if custom_provider:
|
||||||
|
return custom_provider
|
||||||
|
return _PROVIDER_MAP.get(provider.upper(), provider.lower())
|
||||||
|
|
||||||
|
|
||||||
def _build_model_string(
|
def _build_model_string(
|
||||||
provider: str, model_name: str, custom_provider: str | None
|
provider: str, model_name: str, custom_provider: str | None
|
||||||
) -> str:
|
) -> str:
|
||||||
if custom_provider:
|
prefix = _resolve_provider_prefix(provider, custom_provider)
|
||||||
return f"{custom_provider}/{model_name}"
|
|
||||||
prefix = _PROVIDER_MAP.get(provider.upper(), provider.lower())
|
|
||||||
return f"{prefix}/{model_name}"
|
return f"{prefix}/{model_name}"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -146,14 +151,18 @@ def create_generate_image_tool(
|
||||||
"error": f"Image generation config {config_id} not found"
|
"error": f"Image generation config {config_id} not found"
|
||||||
}
|
}
|
||||||
|
|
||||||
model_string = _build_model_string(
|
provider_prefix = _resolve_provider_prefix(
|
||||||
cfg.get("provider", ""),
|
cfg.get("provider", ""), cfg.get("custom_provider")
|
||||||
cfg["model_name"],
|
|
||||||
cfg.get("custom_provider"),
|
|
||||||
)
|
)
|
||||||
|
model_string = f"{provider_prefix}/{cfg['model_name']}"
|
||||||
gen_kwargs["api_key"] = cfg.get("api_key")
|
gen_kwargs["api_key"] = cfg.get("api_key")
|
||||||
if cfg.get("api_base"):
|
api_base = resolve_api_base(
|
||||||
gen_kwargs["api_base"] = cfg["api_base"]
|
provider=cfg.get("provider"),
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=cfg.get("api_base"),
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
gen_kwargs["api_base"] = api_base
|
||||||
if cfg.get("api_version"):
|
if cfg.get("api_version"):
|
||||||
gen_kwargs["api_version"] = cfg["api_version"]
|
gen_kwargs["api_version"] = cfg["api_version"]
|
||||||
if cfg.get("litellm_params"):
|
if cfg.get("litellm_params"):
|
||||||
|
|
@ -175,14 +184,18 @@ def create_generate_image_tool(
|
||||||
"error": f"Image generation config {config_id} not found"
|
"error": f"Image generation config {config_id} not found"
|
||||||
}
|
}
|
||||||
|
|
||||||
model_string = _build_model_string(
|
provider_prefix = _resolve_provider_prefix(
|
||||||
db_cfg.provider.value,
|
db_cfg.provider.value, db_cfg.custom_provider
|
||||||
db_cfg.model_name,
|
|
||||||
db_cfg.custom_provider,
|
|
||||||
)
|
)
|
||||||
|
model_string = f"{provider_prefix}/{db_cfg.model_name}"
|
||||||
gen_kwargs["api_key"] = db_cfg.api_key
|
gen_kwargs["api_key"] = db_cfg.api_key
|
||||||
if db_cfg.api_base:
|
api_base = resolve_api_base(
|
||||||
gen_kwargs["api_base"] = db_cfg.api_base
|
provider=db_cfg.provider.value,
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=db_cfg.api_base,
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
gen_kwargs["api_base"] = api_base
|
||||||
if db_cfg.api_version:
|
if db_cfg.api_version:
|
||||||
gen_kwargs["api_version"] = db_cfg.api_version
|
gen_kwargs["api_version"] = db_cfg.api_version
|
||||||
if db_cfg.litellm_params:
|
if db_cfg.litellm_params:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from app.db import SearchSourceConnector
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
|
||||||
|
def split_recipients(value: str | None) -> list[str]:
|
||||||
|
if not value:
|
||||||
|
return []
|
||||||
|
return [recipient.strip() for recipient in value.split(",") if recipient.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
def unwrap_composio_data(data: Any) -> Any:
|
||||||
|
if isinstance(data, dict):
|
||||||
|
inner = data.get("data", data)
|
||||||
|
if isinstance(inner, dict):
|
||||||
|
return inner.get("response_data", inner)
|
||||||
|
return inner
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
async def execute_composio_gmail_tool(
|
||||||
|
connector: SearchSourceConnector,
|
||||||
|
user_id: str,
|
||||||
|
tool_name: str,
|
||||||
|
params: dict[str, Any],
|
||||||
|
) -> tuple[Any, str | None]:
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
return None, "Composio connected account ID not found for this Gmail connector."
|
||||||
|
|
||||||
|
result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name=tool_name,
|
||||||
|
params=params,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
return None, result.get("error", "Unknown Composio Gmail error")
|
||||||
|
|
||||||
|
return unwrap_composio_data(result.get("data")), None
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.gmail import GmailToolMetadataService
|
from app.services.gmail import GmailToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,23 @@ def create_create_gmail_draft_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_gmail_draft tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_gmail_draft tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_gmail_draft(
|
async def create_gmail_draft(
|
||||||
to: str,
|
to: str,
|
||||||
|
|
@ -57,20 +75,23 @@ def create_create_gmail_draft_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"create_gmail_draft called: to='{to}', subject='{subject}'")
|
logger.info(f"create_gmail_draft called: to='{to}', subject='{subject}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Gmail tool not properly configured. Please contact support.",
|
"message": "Gmail tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GmailToolMetadataService(db_session)
|
metadata_service = GmailToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": context["error"]}
|
return {"status": "error", "message": context["error"]}
|
||||||
|
|
||||||
accounts = context.get("accounts", [])
|
accounts = context.get("accounts", [])
|
||||||
|
|
@ -157,16 +178,13 @@ def create_create_gmail_draft_tool(
|
||||||
f"Creating Gmail draft: to='{final_to}', subject='{final_subject}', connector={actual_connector_id}"
|
f"Creating Gmail draft: to='{final_to}', subject='{final_subject}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_gmail = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_gmail:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this Gmail connector.",
|
"message": "Composio connected account ID not found for this Gmail connector.",
|
||||||
|
|
@ -186,13 +204,17 @@ def create_create_gmail_draft_tool(
|
||||||
config_data["token"]
|
config_data["token"]
|
||||||
)
|
)
|
||||||
if config_data.get("refresh_token"):
|
if config_data.get("refresh_token"):
|
||||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
config_data["refresh_token"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["refresh_token"]
|
config_data["refresh_token"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if config_data.get("client_secret"):
|
if config_data.get("client_secret"):
|
||||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
config_data["client_secret"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["client_secret"]
|
config_data["client_secret"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
exp = config_data.get("expiry", "")
|
exp = config_data.get("expiry", "")
|
||||||
if exp:
|
if exp:
|
||||||
|
|
@ -208,10 +230,6 @@ def create_create_gmail_draft_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
from googleapiclient.discovery import build
|
|
||||||
|
|
||||||
gmail_service = build("gmail", "v1", credentials=creds)
|
|
||||||
|
|
||||||
message = MIMEText(final_body)
|
message = MIMEText(final_body)
|
||||||
message["to"] = final_to
|
message["to"] = final_to
|
||||||
message["subject"] = final_subject
|
message["subject"] = final_subject
|
||||||
|
|
@ -222,6 +240,34 @@ def create_create_gmail_draft_tool(
|
||||||
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_composio_gmail:
|
||||||
|
from app.agents.new_chat.tools.gmail.composio_helpers import (
|
||||||
|
execute_composio_gmail_tool,
|
||||||
|
split_recipients,
|
||||||
|
)
|
||||||
|
|
||||||
|
created, error = await execute_composio_gmail_tool(
|
||||||
|
connector,
|
||||||
|
user_id,
|
||||||
|
"GMAIL_CREATE_EMAIL_DRAFT",
|
||||||
|
{
|
||||||
|
"user_id": "me",
|
||||||
|
"recipient_email": final_to,
|
||||||
|
"subject": final_subject,
|
||||||
|
"body": final_body,
|
||||||
|
"cc": split_recipients(final_cc),
|
||||||
|
"bcc": split_recipients(final_bcc),
|
||||||
|
"is_html": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
if not isinstance(created, dict):
|
||||||
|
created = {}
|
||||||
|
else:
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
gmail_service = build("gmail", "v1", credentials=creds)
|
||||||
created = await asyncio.get_event_loop().run_in_executor(
|
created = await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -285,7 +331,9 @@ def create_create_gmail_draft_tool(
|
||||||
draft_id=created.get("id"),
|
draft_id=created.get("id"),
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This draft will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This draft will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -20,6 +20,23 @@ def create_read_gmail_email_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the read_gmail_email tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured read_gmail_email tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def read_gmail_email(message_id: str) -> dict[str, Any]:
|
async def read_gmail_email(message_id: str) -> dict[str, Any]:
|
||||||
"""Read the full content of a specific Gmail email by its message ID.
|
"""Read the full content of a specific Gmail email by its message ID.
|
||||||
|
|
@ -32,10 +49,11 @@ def create_read_gmail_email_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with status and the full email content formatted as markdown.
|
Dictionary with status and the full email content formatted as markdown.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Gmail tool not properly configured."}
|
return {"status": "error", "message": "Gmail tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
@ -50,7 +68,57 @@ def create_read_gmail_email_tool(
|
||||||
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
|
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
|
||||||
}
|
}
|
||||||
|
|
||||||
from app.agents.new_chat.tools.gmail.search_emails import _build_credentials
|
if (
|
||||||
|
connector.connector_type
|
||||||
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
|
):
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Composio connected account ID not found.",
|
||||||
|
}
|
||||||
|
|
||||||
|
from app.agents.new_chat.tools.gmail.search_emails import (
|
||||||
|
_format_gmail_summary,
|
||||||
|
)
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
service = ComposioService()
|
||||||
|
detail, error = await service.get_gmail_message_detail(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
message_id=message_id,
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
return {"status": "error", "message": error}
|
||||||
|
if not detail:
|
||||||
|
return {
|
||||||
|
"status": "not_found",
|
||||||
|
"message": f"Email with ID '{message_id}' not found.",
|
||||||
|
}
|
||||||
|
|
||||||
|
summary = _format_gmail_summary(detail)
|
||||||
|
content = (
|
||||||
|
f"# {summary['subject']}\n\n"
|
||||||
|
f"**From:** {summary['from']}\n"
|
||||||
|
f"**To:** {summary['to']}\n"
|
||||||
|
f"**Date:** {summary['date']}\n\n"
|
||||||
|
f"## Message Content\n\n"
|
||||||
|
f"{detail.get('messageText') or detail.get('snippet') or ''}\n\n"
|
||||||
|
f"## Message Details\n\n"
|
||||||
|
f"- **Message ID:** {summary['message_id']}\n"
|
||||||
|
f"- **Thread ID:** {summary['thread_id']}\n"
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message_id": summary["message_id"] or message_id,
|
||||||
|
"content": content,
|
||||||
|
}
|
||||||
|
|
||||||
|
from app.agents.new_chat.tools.gmail.search_emails import (
|
||||||
|
_build_credentials,
|
||||||
|
)
|
||||||
|
|
||||||
creds = _build_credentials(connector)
|
creds = _build_credentials(connector)
|
||||||
|
|
||||||
|
|
@ -84,7 +152,11 @@ def create_read_gmail_email_tool(
|
||||||
|
|
||||||
content = gmail.format_message_to_markdown(detail)
|
content = gmail.format_message_to_markdown(detail)
|
||||||
|
|
||||||
return {"status": "success", "message_id": message_id, "content": content}
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message_id": message_id,
|
||||||
|
"content": content,
|
||||||
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
from langgraph.errors import GraphInterrupt
|
from langgraph.errors import GraphInterrupt
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -39,12 +39,7 @@ def _build_credentials(connector: SearchSourceConnector):
|
||||||
from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES
|
from app.utils.google_credentials import COMPOSIO_GOOGLE_CONNECTOR_TYPES
|
||||||
|
|
||||||
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
if connector.connector_type in COMPOSIO_GOOGLE_CONNECTOR_TYPES:
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
raise ValueError("Composio connectors must use Composio tool execution.")
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
|
||||||
if not cca_id:
|
|
||||||
raise ValueError("Composio connected account ID not found.")
|
|
||||||
return build_composio_credentials(cca_id)
|
|
||||||
|
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
|
|
||||||
|
|
@ -67,11 +62,85 @@ def _build_credentials(connector: SearchSourceConnector):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _gmail_headers(message: dict[str, Any]) -> dict[str, str]:
|
||||||
|
headers = message.get("payload", {}).get("headers", [])
|
||||||
|
return {
|
||||||
|
header.get("name", "").lower(): header.get("value", "")
|
||||||
|
for header in headers
|
||||||
|
if isinstance(header, dict)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _format_gmail_summary(message: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
headers = _gmail_headers(message)
|
||||||
|
return {
|
||||||
|
"message_id": message.get("id") or message.get("messageId"),
|
||||||
|
"thread_id": message.get("threadId"),
|
||||||
|
"subject": message.get("subject") or headers.get("subject", "No Subject"),
|
||||||
|
"from": message.get("sender") or headers.get("from", "Unknown"),
|
||||||
|
"to": message.get("to") or headers.get("to", ""),
|
||||||
|
"date": message.get("messageTimestamp") or headers.get("date", ""),
|
||||||
|
"snippet": message.get("snippet") or message.get("messageText", "")[:300],
|
||||||
|
"labels": message.get("labelIds", []),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _search_composio_gmail(
|
||||||
|
connector: SearchSourceConnector,
|
||||||
|
user_id: str,
|
||||||
|
query: str,
|
||||||
|
max_results: int,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Composio connected account ID not found.",
|
||||||
|
}
|
||||||
|
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
service = ComposioService()
|
||||||
|
messages, _next_token, _estimate, error = await service.get_gmail_messages(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
query=query,
|
||||||
|
max_results=max_results,
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
return {"status": "error", "message": error}
|
||||||
|
|
||||||
|
emails = [_format_gmail_summary(message) for message in messages]
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"emails": emails,
|
||||||
|
"total": len(emails),
|
||||||
|
"message": "No emails found." if not emails else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def create_search_gmail_tool(
|
def create_search_gmail_tool(
|
||||||
db_session: AsyncSession | None = None,
|
db_session: AsyncSession | None = None,
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the search_gmail tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured search_gmail tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def search_gmail(
|
async def search_gmail(
|
||||||
query: str,
|
query: str,
|
||||||
|
|
@ -90,12 +159,13 @@ def create_search_gmail_tool(
|
||||||
Dictionary with status and a list of email summaries including
|
Dictionary with status and a list of email summaries including
|
||||||
message_id, subject, from, date, snippet.
|
message_id, subject, from, date, snippet.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Gmail tool not properly configured."}
|
return {"status": "error", "message": "Gmail tool not properly configured."}
|
||||||
|
|
||||||
max_results = min(max_results, 20)
|
max_results = min(max_results, 20)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
@ -110,6 +180,14 @@ def create_search_gmail_tool(
|
||||||
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
|
"message": "No Gmail connector found. Please connect Gmail in your workspace settings.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
connector.connector_type
|
||||||
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
|
):
|
||||||
|
return await _search_composio_gmail(
|
||||||
|
connector, str(user_id), query, max_results
|
||||||
|
)
|
||||||
|
|
||||||
creds = _build_credentials(connector)
|
creds = _build_credentials(connector)
|
||||||
|
|
||||||
from app.connectors.google_gmail_connector import GoogleGmailConnector
|
from app.connectors.google_gmail_connector import GoogleGmailConnector
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.gmail import GmailToolMetadataService
|
from app.services.gmail import GmailToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,23 @@ def create_send_gmail_email_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the send_gmail_email tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured send_gmail_email tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def send_gmail_email(
|
async def send_gmail_email(
|
||||||
to: str,
|
to: str,
|
||||||
|
|
@ -58,20 +76,23 @@ def create_send_gmail_email_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")
|
logger.info(f"send_gmail_email called: to='{to}', subject='{subject}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Gmail tool not properly configured. Please contact support.",
|
"message": "Gmail tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GmailToolMetadataService(db_session)
|
metadata_service = GmailToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": context["error"]}
|
return {"status": "error", "message": context["error"]}
|
||||||
|
|
||||||
accounts = context.get("accounts", [])
|
accounts = context.get("accounts", [])
|
||||||
|
|
@ -158,16 +179,13 @@ def create_send_gmail_email_tool(
|
||||||
f"Sending Gmail email: to='{final_to}', subject='{final_subject}', connector={actual_connector_id}"
|
f"Sending Gmail email: to='{final_to}', subject='{final_subject}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_gmail = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_gmail:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this Gmail connector.",
|
"message": "Composio connected account ID not found for this Gmail connector.",
|
||||||
|
|
@ -187,13 +205,17 @@ def create_send_gmail_email_tool(
|
||||||
config_data["token"]
|
config_data["token"]
|
||||||
)
|
)
|
||||||
if config_data.get("refresh_token"):
|
if config_data.get("refresh_token"):
|
||||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
config_data["refresh_token"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["refresh_token"]
|
config_data["refresh_token"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if config_data.get("client_secret"):
|
if config_data.get("client_secret"):
|
||||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
config_data["client_secret"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["client_secret"]
|
config_data["client_secret"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
exp = config_data.get("expiry", "")
|
exp = config_data.get("expiry", "")
|
||||||
if exp:
|
if exp:
|
||||||
|
|
@ -209,10 +231,6 @@ def create_send_gmail_email_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
from googleapiclient.discovery import build
|
|
||||||
|
|
||||||
gmail_service = build("gmail", "v1", credentials=creds)
|
|
||||||
|
|
||||||
message = MIMEText(final_body)
|
message = MIMEText(final_body)
|
||||||
message["to"] = final_to
|
message["to"] = final_to
|
||||||
message["subject"] = final_subject
|
message["subject"] = final_subject
|
||||||
|
|
@ -223,6 +241,34 @@ def create_send_gmail_email_tool(
|
||||||
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_composio_gmail:
|
||||||
|
from app.agents.new_chat.tools.gmail.composio_helpers import (
|
||||||
|
execute_composio_gmail_tool,
|
||||||
|
split_recipients,
|
||||||
|
)
|
||||||
|
|
||||||
|
sent, error = await execute_composio_gmail_tool(
|
||||||
|
connector,
|
||||||
|
user_id,
|
||||||
|
"GMAIL_SEND_EMAIL",
|
||||||
|
{
|
||||||
|
"user_id": "me",
|
||||||
|
"recipient_email": final_to,
|
||||||
|
"subject": final_subject,
|
||||||
|
"body": final_body,
|
||||||
|
"cc": split_recipients(final_cc),
|
||||||
|
"bcc": split_recipients(final_bcc),
|
||||||
|
"is_html": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
if not isinstance(sent, dict):
|
||||||
|
sent = {}
|
||||||
|
else:
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
gmail_service = build("gmail", "v1", credentials=creds)
|
||||||
sent = await asyncio.get_event_loop().run_in_executor(
|
sent = await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -286,7 +332,9 @@ def create_send_gmail_email_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This email will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.gmail import GmailToolMetadataService
|
from app.services.gmail import GmailToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,6 +18,23 @@ def create_trash_gmail_email_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the trash_gmail_email tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured trash_gmail_email tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def trash_gmail_email(
|
async def trash_gmail_email(
|
||||||
email_subject_or_id: str,
|
email_subject_or_id: str,
|
||||||
|
|
@ -55,13 +73,14 @@ def create_trash_gmail_email_tool(
|
||||||
f"trash_gmail_email called: email_subject_or_id='{email_subject_or_id}', delete_from_kb={delete_from_kb}"
|
f"trash_gmail_email called: email_subject_or_id='{email_subject_or_id}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Gmail tool not properly configured. Please contact support.",
|
"message": "Gmail tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GmailToolMetadataService(db_session)
|
metadata_service = GmailToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_trash_context(
|
context = await metadata_service.get_trash_context(
|
||||||
search_space_id, user_id, email_subject_or_id
|
search_space_id, user_id, email_subject_or_id
|
||||||
|
|
@ -122,7 +141,9 @@ def create_trash_gmail_email_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
if not final_connector_id:
|
if not final_connector_id:
|
||||||
return {
|
return {
|
||||||
|
|
@ -158,16 +179,13 @@ def create_trash_gmail_email_tool(
|
||||||
f"Trashing Gmail email: message_id='{final_message_id}', connector={final_connector_id}"
|
f"Trashing Gmail email: message_id='{final_message_id}', connector={final_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_gmail = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_gmail:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this Gmail connector.",
|
"message": "Composio connected account ID not found for this Gmail connector.",
|
||||||
|
|
@ -187,13 +205,17 @@ def create_trash_gmail_email_tool(
|
||||||
config_data["token"]
|
config_data["token"]
|
||||||
)
|
)
|
||||||
if config_data.get("refresh_token"):
|
if config_data.get("refresh_token"):
|
||||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
config_data["refresh_token"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["refresh_token"]
|
config_data["refresh_token"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if config_data.get("client_secret"):
|
if config_data.get("client_secret"):
|
||||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
config_data["client_secret"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["client_secret"]
|
config_data["client_secret"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
exp = config_data.get("expiry", "")
|
exp = config_data.get("expiry", "")
|
||||||
if exp:
|
if exp:
|
||||||
|
|
@ -209,11 +231,24 @@ def create_trash_gmail_email_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_composio_gmail:
|
||||||
|
from app.agents.new_chat.tools.gmail.composio_helpers import (
|
||||||
|
execute_composio_gmail_tool,
|
||||||
|
)
|
||||||
|
|
||||||
|
_trashed, error = await execute_composio_gmail_tool(
|
||||||
|
connector,
|
||||||
|
user_id,
|
||||||
|
"GMAIL_MOVE_TO_TRASH",
|
||||||
|
{"user_id": "me", "message_id": final_message_id},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
else:
|
||||||
from googleapiclient.discovery import build
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
gmail_service = build("gmail", "v1", credentials=creds)
|
gmail_service = build("gmail", "v1", credentials=creds)
|
||||||
|
|
||||||
try:
|
|
||||||
await asyncio.get_event_loop().run_in_executor(
|
await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.gmail import GmailToolMetadataService
|
from app.services.gmail import GmailToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,23 @@ def create_update_gmail_draft_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the update_gmail_draft tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_gmail_draft tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_gmail_draft(
|
async def update_gmail_draft(
|
||||||
draft_subject_or_id: str,
|
draft_subject_or_id: str,
|
||||||
|
|
@ -76,13 +94,14 @@ def create_update_gmail_draft_tool(
|
||||||
f"update_gmail_draft called: draft_subject_or_id='{draft_subject_or_id}'"
|
f"update_gmail_draft called: draft_subject_or_id='{draft_subject_or_id}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Gmail tool not properly configured. Please contact support.",
|
"message": "Gmail tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GmailToolMetadataService(db_session)
|
metadata_service = GmailToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, draft_subject_or_id
|
search_space_id, user_id, draft_subject_or_id
|
||||||
|
|
@ -188,16 +207,13 @@ def create_update_gmail_draft_tool(
|
||||||
f"Updating Gmail draft: subject='{final_subject}', connector={final_connector_id}"
|
f"Updating Gmail draft: subject='{final_subject}', connector={final_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_gmail = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_gmail:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this Gmail connector.",
|
"message": "Composio connected account ID not found for this Gmail connector.",
|
||||||
|
|
@ -217,13 +233,17 @@ def create_update_gmail_draft_tool(
|
||||||
config_data["token"]
|
config_data["token"]
|
||||||
)
|
)
|
||||||
if config_data.get("refresh_token"):
|
if config_data.get("refresh_token"):
|
||||||
config_data["refresh_token"] = token_encryption.decrypt_token(
|
config_data["refresh_token"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["refresh_token"]
|
config_data["refresh_token"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
if config_data.get("client_secret"):
|
if config_data.get("client_secret"):
|
||||||
config_data["client_secret"] = token_encryption.decrypt_token(
|
config_data["client_secret"] = (
|
||||||
|
token_encryption.decrypt_token(
|
||||||
config_data["client_secret"]
|
config_data["client_secret"]
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
exp = config_data.get("expiry", "")
|
exp = config_data.get("expiry", "")
|
||||||
if exp:
|
if exp:
|
||||||
|
|
@ -239,15 +259,19 @@ def create_update_gmail_draft_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
from googleapiclient.discovery import build
|
|
||||||
|
|
||||||
gmail_service = build("gmail", "v1", credentials=creds)
|
|
||||||
|
|
||||||
# Resolve draft_id if not already available
|
# Resolve draft_id if not already available
|
||||||
if not final_draft_id:
|
if not final_draft_id:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"draft_id not in metadata, looking up via drafts.list for message_id={message_id}"
|
f"draft_id not in metadata, looking up via drafts.list for message_id={message_id}"
|
||||||
)
|
)
|
||||||
|
if is_composio_gmail:
|
||||||
|
final_draft_id = await _find_composio_draft_id_by_message(
|
||||||
|
connector, user_id, message_id
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
gmail_service = build("gmail", "v1", credentials=creds)
|
||||||
final_draft_id = await _find_draft_id_by_message(
|
final_draft_id = await _find_draft_id_by_message(
|
||||||
gmail_service, message_id
|
gmail_service, message_id
|
||||||
)
|
)
|
||||||
|
|
@ -272,6 +296,35 @@ def create_update_gmail_draft_tool(
|
||||||
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_composio_gmail:
|
||||||
|
from app.agents.new_chat.tools.gmail.composio_helpers import (
|
||||||
|
execute_composio_gmail_tool,
|
||||||
|
split_recipients,
|
||||||
|
)
|
||||||
|
|
||||||
|
updated, error = await execute_composio_gmail_tool(
|
||||||
|
connector,
|
||||||
|
user_id,
|
||||||
|
"GMAIL_UPDATE_DRAFT",
|
||||||
|
{
|
||||||
|
"user_id": "me",
|
||||||
|
"draft_id": final_draft_id,
|
||||||
|
"recipient_email": final_to,
|
||||||
|
"subject": final_subject,
|
||||||
|
"body": final_body,
|
||||||
|
"cc": split_recipients(final_cc),
|
||||||
|
"bcc": split_recipients(final_bcc),
|
||||||
|
"is_html": False,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
if not isinstance(updated, dict):
|
||||||
|
updated = {}
|
||||||
|
else:
|
||||||
|
from googleapiclient.discovery import build
|
||||||
|
|
||||||
|
gmail_service = build("gmail", "v1", credentials=creds)
|
||||||
updated = await asyncio.get_event_loop().run_in_executor(
|
updated = await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -408,3 +461,35 @@ async def _find_draft_id_by_message(gmail_service: Any, message_id: str) -> str
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to look up draft by message_id: {e}")
|
logger.warning(f"Failed to look up draft by message_id: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _find_composio_draft_id_by_message(
|
||||||
|
connector: Any, user_id: str, message_id: str
|
||||||
|
) -> str | None:
|
||||||
|
from app.agents.new_chat.tools.gmail.composio_helpers import (
|
||||||
|
execute_composio_gmail_tool,
|
||||||
|
)
|
||||||
|
|
||||||
|
page_token = ""
|
||||||
|
while True:
|
||||||
|
params: dict[str, Any] = {
|
||||||
|
"user_id": "me",
|
||||||
|
"max_results": 100,
|
||||||
|
"verbose": False,
|
||||||
|
}
|
||||||
|
if page_token:
|
||||||
|
params["page_token"] = page_token
|
||||||
|
|
||||||
|
data, error = await execute_composio_gmail_tool(
|
||||||
|
connector, user_id, "GMAIL_LIST_DRAFTS", params
|
||||||
|
)
|
||||||
|
if error or not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
for draft in data.get("drafts", []):
|
||||||
|
if draft.get("message", {}).get("id") == message_id:
|
||||||
|
return draft.get("id")
|
||||||
|
|
||||||
|
page_token = data.get("nextPageToken") or data.get("next_page_token") or ""
|
||||||
|
if not page_token:
|
||||||
|
return None
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,23 @@ def create_create_calendar_event_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_calendar_event tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_calendar_event tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_calendar_event(
|
async def create_calendar_event(
|
||||||
summary: str,
|
summary: str,
|
||||||
|
|
@ -60,20 +78,23 @@ def create_create_calendar_event_tool(
|
||||||
f"create_calendar_event called: summary='{summary}', start='{start_datetime}', end='{end_datetime}'"
|
f"create_calendar_event called: summary='{summary}', start='{start_datetime}', end='{end_datetime}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Google Calendar tool not properly configured. Please contact support.",
|
"message": "Google Calendar tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": context["error"]}
|
return {"status": "error", "message": context["error"]}
|
||||||
|
|
||||||
accounts = context.get("accounts", [])
|
accounts = context.get("accounts", [])
|
||||||
|
|
@ -113,7 +134,9 @@ def create_create_calendar_event_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
final_summary = result.params.get("summary", summary)
|
final_summary = result.params.get("summary", summary)
|
||||||
final_start_datetime = result.params.get("start_datetime", start_datetime)
|
final_start_datetime = result.params.get(
|
||||||
|
"start_datetime", start_datetime
|
||||||
|
)
|
||||||
final_end_datetime = result.params.get("end_datetime", end_datetime)
|
final_end_datetime = result.params.get("end_datetime", end_datetime)
|
||||||
final_description = result.params.get("description", description)
|
final_description = result.params.get("description", description)
|
||||||
final_location = result.params.get("location", location)
|
final_location = result.params.get("location", location)
|
||||||
|
|
@ -121,7 +144,10 @@ def create_create_calendar_event_tool(
|
||||||
final_connector_id = result.params.get("connector_id")
|
final_connector_id = result.params.get("connector_id")
|
||||||
|
|
||||||
if not final_summary or not final_summary.strip():
|
if not final_summary or not final_summary.strip():
|
||||||
return {"status": "error", "message": "Event summary cannot be empty."}
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Event summary cannot be empty.",
|
||||||
|
}
|
||||||
|
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
|
|
@ -168,16 +194,13 @@ def create_create_calendar_event_tool(
|
||||||
f"Creating calendar event: summary='{final_summary}', connector={actual_connector_id}"
|
f"Creating calendar event: summary='{final_summary}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_calendar = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_calendar:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this connector.",
|
"message": "Composio connected account ID not found for this connector.",
|
||||||
|
|
@ -211,10 +234,6 @@ def create_create_calendar_event_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
service = await asyncio.get_event_loop().run_in_executor(
|
|
||||||
None, lambda: build("calendar", "v3", credentials=creds)
|
|
||||||
)
|
|
||||||
|
|
||||||
tz = context.get("timezone", "UTC")
|
tz = context.get("timezone", "UTC")
|
||||||
event_body: dict[str, Any] = {
|
event_body: dict[str, Any] = {
|
||||||
"summary": final_summary,
|
"summary": final_summary,
|
||||||
|
|
@ -231,6 +250,43 @@ def create_create_calendar_event_tool(
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_composio_calendar:
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
composio_params = {
|
||||||
|
"calendar_id": "primary",
|
||||||
|
"summary": final_summary,
|
||||||
|
"start_datetime": final_start_datetime,
|
||||||
|
"end_datetime": final_end_datetime,
|
||||||
|
"timezone": tz,
|
||||||
|
"attendees": final_attendees or [],
|
||||||
|
}
|
||||||
|
if final_description:
|
||||||
|
composio_params["description"] = final_description
|
||||||
|
if final_location:
|
||||||
|
composio_params["location"] = final_location
|
||||||
|
|
||||||
|
composio_result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLECALENDAR_CREATE_EVENT",
|
||||||
|
params=composio_params,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not composio_result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
composio_result.get(
|
||||||
|
"error", "Unknown Composio Calendar error"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
created = composio_result.get("data", {})
|
||||||
|
if isinstance(created, dict):
|
||||||
|
created = created.get("data", created)
|
||||||
|
if isinstance(created, dict):
|
||||||
|
created = created.get("response_data", created)
|
||||||
|
else:
|
||||||
|
service = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
|
)
|
||||||
created = await asyncio.get_event_loop().run_in_executor(
|
created = await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -295,7 +351,9 @@ def create_create_calendar_event_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This event will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This event will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,23 @@ def create_delete_calendar_event_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the delete_calendar_event tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_calendar_event tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_calendar_event(
|
async def delete_calendar_event(
|
||||||
event_title_or_id: str,
|
event_title_or_id: str,
|
||||||
|
|
@ -54,13 +72,14 @@ def create_delete_calendar_event_tool(
|
||||||
f"delete_calendar_event called: event_ref='{event_title_or_id}', delete_from_kb={delete_from_kb}"
|
f"delete_calendar_event called: event_ref='{event_title_or_id}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Google Calendar tool not properly configured. Please contact support.",
|
"message": "Google Calendar tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_deletion_context(
|
context = await metadata_service.get_deletion_context(
|
||||||
search_space_id, user_id, event_title_or_id
|
search_space_id, user_id, event_title_or_id
|
||||||
|
|
@ -121,7 +140,9 @@ def create_delete_calendar_event_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
if not final_connector_id:
|
if not final_connector_id:
|
||||||
return {
|
return {
|
||||||
|
|
@ -159,16 +180,13 @@ def create_delete_calendar_event_tool(
|
||||||
f"Deleting calendar event: event_id='{final_event_id}', connector={actual_connector_id}"
|
f"Deleting calendar event: event_id='{final_event_id}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_calendar = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_calendar:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this connector.",
|
"message": "Composio connected account ID not found for this connector.",
|
||||||
|
|
@ -202,11 +220,29 @@ def create_delete_calendar_event_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if is_composio_calendar:
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
composio_result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLECALENDAR_DELETE_EVENT",
|
||||||
|
params={
|
||||||
|
"calendar_id": "primary",
|
||||||
|
"event_id": final_event_id,
|
||||||
|
},
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not composio_result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
composio_result.get(
|
||||||
|
"error", "Unknown Composio Calendar error"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
service = await asyncio.get_event_loop().run_in_executor(
|
service = await asyncio.get_event_loop().run_in_executor(
|
||||||
None, lambda: build("calendar", "v3", credentials=creds)
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
|
||||||
await asyncio.get_event_loop().run_in_executor(
|
await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.agents.new_chat.tools.gmail.search_emails import _build_credentials
|
from app.agents.new_chat.tools.gmail.search_emails import _build_credentials
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -16,11 +16,57 @@ _CALENDAR_TYPES = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _to_calendar_boundary(value: str, *, is_end: bool) -> str:
|
||||||
|
if "T" in value:
|
||||||
|
return value
|
||||||
|
time = "23:59:59" if is_end else "00:00:00"
|
||||||
|
return f"{value}T{time}Z"
|
||||||
|
|
||||||
|
|
||||||
|
def _format_calendar_events(events_raw: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
events = []
|
||||||
|
for ev in events_raw:
|
||||||
|
start = ev.get("start", {})
|
||||||
|
end = ev.get("end", {})
|
||||||
|
attendees_raw = ev.get("attendees", [])
|
||||||
|
events.append(
|
||||||
|
{
|
||||||
|
"event_id": ev.get("id"),
|
||||||
|
"summary": ev.get("summary", "No Title"),
|
||||||
|
"start": start.get("dateTime") or start.get("date", ""),
|
||||||
|
"end": end.get("dateTime") or end.get("date", ""),
|
||||||
|
"location": ev.get("location", ""),
|
||||||
|
"description": ev.get("description", ""),
|
||||||
|
"html_link": ev.get("htmlLink", ""),
|
||||||
|
"attendees": [a.get("email", "") for a in attendees_raw[:10]],
|
||||||
|
"status": ev.get("status", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return events
|
||||||
|
|
||||||
|
|
||||||
def create_search_calendar_events_tool(
|
def create_search_calendar_events_tool(
|
||||||
db_session: AsyncSession | None = None,
|
db_session: AsyncSession | None = None,
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the search_calendar_events tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured search_calendar_events tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def search_calendar_events(
|
async def search_calendar_events(
|
||||||
start_date: str,
|
start_date: str,
|
||||||
|
|
@ -38,7 +84,7 @@ def create_search_calendar_events_tool(
|
||||||
Dictionary with status and a list of events including
|
Dictionary with status and a list of events including
|
||||||
event_id, summary, start, end, location, attendees.
|
event_id, summary, start, end, location, attendees.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Calendar tool not properly configured.",
|
"message": "Calendar tool not properly configured.",
|
||||||
|
|
@ -47,6 +93,7 @@ def create_search_calendar_events_tool(
|
||||||
max_results = min(max_results, 50)
|
max_results = min(max_results, 50)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
@ -61,9 +108,34 @@ def create_search_calendar_events_tool(
|
||||||
"message": "No Google Calendar connector found. Please connect Google Calendar in your workspace settings.",
|
"message": "No Google Calendar connector found. Please connect Google Calendar in your workspace settings.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (
|
||||||
|
connector.connector_type
|
||||||
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
|
):
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Composio connected account ID not found for this connector.",
|
||||||
|
}
|
||||||
|
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
events_raw, error = await ComposioService().get_calendar_events(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
time_min=_to_calendar_boundary(start_date, is_end=False),
|
||||||
|
time_max=_to_calendar_boundary(end_date, is_end=True),
|
||||||
|
max_results=max_results,
|
||||||
|
)
|
||||||
|
if not events_raw and not error:
|
||||||
|
error = "No events found in the specified date range."
|
||||||
|
else:
|
||||||
creds = _build_credentials(connector)
|
creds = _build_credentials(connector)
|
||||||
|
|
||||||
from app.connectors.google_calendar_connector import GoogleCalendarConnector
|
from app.connectors.google_calendar_connector import (
|
||||||
|
GoogleCalendarConnector,
|
||||||
|
)
|
||||||
|
|
||||||
cal = GoogleCalendarConnector(
|
cal = GoogleCalendarConnector(
|
||||||
credentials=creds,
|
credentials=creds,
|
||||||
|
|
@ -97,24 +169,7 @@ def create_search_calendar_events_tool(
|
||||||
}
|
}
|
||||||
return {"status": "error", "message": error}
|
return {"status": "error", "message": error}
|
||||||
|
|
||||||
events = []
|
events = _format_calendar_events(events_raw)
|
||||||
for ev in events_raw:
|
|
||||||
start = ev.get("start", {})
|
|
||||||
end = ev.get("end", {})
|
|
||||||
attendees_raw = ev.get("attendees", [])
|
|
||||||
events.append(
|
|
||||||
{
|
|
||||||
"event_id": ev.get("id"),
|
|
||||||
"summary": ev.get("summary", "No Title"),
|
|
||||||
"start": start.get("dateTime") or start.get("date", ""),
|
|
||||||
"end": end.get("dateTime") or end.get("date", ""),
|
|
||||||
"location": ev.get("location", ""),
|
|
||||||
"description": ev.get("description", ""),
|
|
||||||
"html_link": ev.get("htmlLink", ""),
|
|
||||||
"attendees": [a.get("email", "") for a in attendees_raw[:10]],
|
|
||||||
"status": ev.get("status", ""),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"status": "success", "events": events, "total": len(events)}
|
return {"status": "success", "events": events, "total": len(events)}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
from app.services.google_calendar import GoogleCalendarToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -33,6 +34,23 @@ def create_update_calendar_event_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the update_calendar_event tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_calendar_event tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_calendar_event(
|
async def update_calendar_event(
|
||||||
event_title_or_id: str,
|
event_title_or_id: str,
|
||||||
|
|
@ -74,13 +92,14 @@ def create_update_calendar_event_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"update_calendar_event called: event_ref='{event_title_or_id}'")
|
logger.info(f"update_calendar_event called: event_ref='{event_title_or_id}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Google Calendar tool not properly configured. Please contact support.",
|
"message": "Google Calendar tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
metadata_service = GoogleCalendarToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, event_title_or_id
|
search_space_id, user_id, event_title_or_id
|
||||||
|
|
@ -192,16 +211,13 @@ def create_update_calendar_event_tool(
|
||||||
f"Updating calendar event: event_id='{final_event_id}', connector={actual_connector_id}"
|
f"Updating calendar event: event_id='{final_event_id}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
is_composio_calendar = (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_calendar:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
creds = build_composio_credentials(cca_id)
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Composio connected account ID not found for this connector.",
|
"message": "Composio connected account ID not found for this connector.",
|
||||||
|
|
@ -235,10 +251,6 @@ def create_update_calendar_event_tool(
|
||||||
expiry=datetime.fromisoformat(exp) if exp else None,
|
expiry=datetime.fromisoformat(exp) if exp else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
service = await asyncio.get_event_loop().run_in_executor(
|
|
||||||
None, lambda: build("calendar", "v3", credentials=creds)
|
|
||||||
)
|
|
||||||
|
|
||||||
update_body: dict[str, Any] = {}
|
update_body: dict[str, Any] = {}
|
||||||
if final_new_summary is not None:
|
if final_new_summary is not None:
|
||||||
update_body["summary"] = final_new_summary
|
update_body["summary"] = final_new_summary
|
||||||
|
|
@ -247,7 +259,9 @@ def create_update_calendar_event_tool(
|
||||||
final_new_start_datetime, context
|
final_new_start_datetime, context
|
||||||
)
|
)
|
||||||
if final_new_end_datetime is not None:
|
if final_new_end_datetime is not None:
|
||||||
update_body["end"] = _build_time_body(final_new_end_datetime, context)
|
update_body["end"] = _build_time_body(
|
||||||
|
final_new_end_datetime, context
|
||||||
|
)
|
||||||
if final_new_description is not None:
|
if final_new_description is not None:
|
||||||
update_body["description"] = final_new_description
|
update_body["description"] = final_new_description
|
||||||
if final_new_location is not None:
|
if final_new_location is not None:
|
||||||
|
|
@ -264,6 +278,53 @@ def create_update_calendar_event_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_composio_calendar:
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
composio_params: dict[str, Any] = {
|
||||||
|
"calendar_id": "primary",
|
||||||
|
"event_id": final_event_id,
|
||||||
|
}
|
||||||
|
if final_new_summary is not None:
|
||||||
|
composio_params["summary"] = final_new_summary
|
||||||
|
if final_new_start_datetime is not None:
|
||||||
|
composio_params["start_time"] = final_new_start_datetime
|
||||||
|
if final_new_end_datetime is not None:
|
||||||
|
composio_params["end_time"] = final_new_end_datetime
|
||||||
|
if final_new_description is not None:
|
||||||
|
composio_params["description"] = final_new_description
|
||||||
|
if final_new_location is not None:
|
||||||
|
composio_params["location"] = final_new_location
|
||||||
|
if final_new_attendees is not None:
|
||||||
|
composio_params["attendees"] = [
|
||||||
|
e.strip() for e in final_new_attendees if e.strip()
|
||||||
|
]
|
||||||
|
if not _is_date_only(
|
||||||
|
final_new_start_datetime or final_new_end_datetime or ""
|
||||||
|
):
|
||||||
|
composio_params["timezone"] = context.get("timezone", "UTC")
|
||||||
|
|
||||||
|
composio_result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLECALENDAR_PATCH_EVENT",
|
||||||
|
params=composio_params,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not composio_result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
composio_result.get(
|
||||||
|
"error", "Unknown Composio Calendar error"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
updated = composio_result.get("data", {})
|
||||||
|
if isinstance(updated, dict):
|
||||||
|
updated = updated.get("data", updated)
|
||||||
|
if isinstance(updated, dict):
|
||||||
|
updated = updated.get("response_data", updated)
|
||||||
|
else:
|
||||||
|
service = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
|
)
|
||||||
updated = await asyncio.get_event_loop().run_in_executor(
|
updated = await asyncio.get_event_loop().run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -314,7 +375,9 @@ def create_update_calendar_event_tool(
|
||||||
kb_message_suffix = ""
|
kb_message_suffix = ""
|
||||||
if document_id is not None:
|
if document_id is not None:
|
||||||
try:
|
try:
|
||||||
from app.services.google_calendar import GoogleCalendarKBSyncService
|
from app.services.google_calendar import (
|
||||||
|
GoogleCalendarKBSyncService,
|
||||||
|
)
|
||||||
|
|
||||||
kb_service = GoogleCalendarKBSyncService(db_session)
|
kb_service = GoogleCalendarKBSyncService(db_session)
|
||||||
kb_result = await kb_service.sync_after_update(
|
kb_result = await kb_service.sync_after_update(
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.google_drive.client import GoogleDriveClient
|
from app.connectors.google_drive.client import GoogleDriveClient
|
||||||
from app.connectors.google_drive.file_types import GOOGLE_DOC, GOOGLE_SHEET
|
from app.connectors.google_drive.file_types import GOOGLE_DOC, GOOGLE_SHEET
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.google_drive import GoogleDriveToolMetadataService
|
from app.services.google_drive import GoogleDriveToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -23,6 +24,25 @@ def create_create_google_drive_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_google_drive_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to find the Google Drive connector
|
||||||
|
user_id: User ID for fetching user-specific context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_google_drive_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_google_drive_file(
|
async def create_google_drive_file(
|
||||||
name: str,
|
name: str,
|
||||||
|
|
@ -65,7 +85,7 @@ def create_create_google_drive_file_tool(
|
||||||
f"create_google_drive_file called: name='{name}', type='{file_type}'"
|
f"create_google_drive_file called: name='{name}', type='{file_type}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Google Drive tool not properly configured. Please contact support.",
|
"message": "Google Drive tool not properly configured. Please contact support.",
|
||||||
|
|
@ -78,18 +98,23 @@ def create_create_google_drive_file_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GoogleDriveToolMetadataService(db_session)
|
metadata_service = GoogleDriveToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": context["error"]}
|
return {"status": "error", "message": context["error"]}
|
||||||
|
|
||||||
accounts = context.get("accounts", [])
|
accounts = context.get("accounts", [])
|
||||||
if accounts and all(a.get("auth_expired") for a in accounts):
|
if accounts and all(a.get("auth_expired") for a in accounts):
|
||||||
logger.warning("All Google Drive accounts have expired authentication")
|
logger.warning(
|
||||||
|
"All Google Drive accounts have expired authentication"
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": "auth_error",
|
"status": "auth_error",
|
||||||
"message": "All connected Google Drive accounts need re-authentication. Please re-authenticate in your connector settings.",
|
"message": "All connected Google Drive accounts need re-authentication. Please re-authenticate in your connector settings.",
|
||||||
|
|
@ -179,23 +204,53 @@ def create_create_google_drive_file_tool(
|
||||||
f"Creating Google Drive file: name='{final_name}', type='{final_file_type}', connector={actual_connector_id}"
|
f"Creating Google Drive file: name='{final_name}', type='{final_file_type}', connector={actual_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
pre_built_creds = None
|
is_composio_drive = (
|
||||||
if (
|
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_drive:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
pre_built_creds = build_composio_credentials(cca_id)
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Composio connected account ID not found for this Drive connector.",
|
||||||
|
}
|
||||||
client = GoogleDriveClient(
|
client = GoogleDriveClient(
|
||||||
session=db_session,
|
session=db_session,
|
||||||
connector_id=actual_connector_id,
|
connector_id=actual_connector_id,
|
||||||
credentials=pre_built_creds,
|
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
if is_composio_drive:
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
params: dict[str, Any] = {
|
||||||
|
"name": final_name,
|
||||||
|
"mimeType": mime_type,
|
||||||
|
"fields": "id,name,webViewLink,mimeType",
|
||||||
|
}
|
||||||
|
if final_parent_folder_id:
|
||||||
|
params["parents"] = [final_parent_folder_id]
|
||||||
|
if final_content:
|
||||||
|
params["description"] = final_content[:4096]
|
||||||
|
|
||||||
|
result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLEDRIVE_CREATE_FILE",
|
||||||
|
params=params,
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
result.get("error", "Unknown Composio Drive error")
|
||||||
|
)
|
||||||
|
created = result.get("data", {})
|
||||||
|
if isinstance(created, dict):
|
||||||
|
created = created.get("data", created)
|
||||||
|
if isinstance(created, dict):
|
||||||
|
created = created.get("response_data", created)
|
||||||
|
if not isinstance(created, dict):
|
||||||
|
created = {}
|
||||||
|
else:
|
||||||
created = await client.create_file(
|
created = await client.create_file(
|
||||||
name=final_name,
|
name=final_name,
|
||||||
mime_type=mime_type,
|
mime_type=mime_type,
|
||||||
|
|
@ -253,7 +308,9 @@ def create_create_google_drive_file_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.google_drive.client import GoogleDriveClient
|
from app.connectors.google_drive.client import GoogleDriveClient
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.google_drive import GoogleDriveToolMetadataService
|
from app.services.google_drive import GoogleDriveToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,6 +18,25 @@ def create_delete_google_drive_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the delete_google_drive_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to find the Google Drive connector
|
||||||
|
user_id: User ID for fetching user-specific context
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_google_drive_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_google_drive_file(
|
async def delete_google_drive_file(
|
||||||
file_name: str,
|
file_name: str,
|
||||||
|
|
@ -55,13 +75,14 @@ def create_delete_google_drive_file_tool(
|
||||||
f"delete_google_drive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
f"delete_google_drive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "Google Drive tool not properly configured. Please contact support.",
|
"message": "Google Drive tool not properly configured. Please contact support.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = GoogleDriveToolMetadataService(db_session)
|
metadata_service = GoogleDriveToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_trash_context(
|
context = await metadata_service.get_trash_context(
|
||||||
search_space_id, user_id, file_name
|
search_space_id, user_id, file_name
|
||||||
|
|
@ -122,7 +143,9 @@ def create_delete_google_drive_file_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
if not final_connector_id:
|
if not final_connector_id:
|
||||||
return {
|
return {
|
||||||
|
|
@ -158,23 +181,37 @@ def create_delete_google_drive_file_tool(
|
||||||
f"Deleting Google Drive file: file_id='{final_file_id}', connector={final_connector_id}"
|
f"Deleting Google Drive file: file_id='{final_file_id}', connector={final_connector_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
pre_built_creds = None
|
is_composio_drive = (
|
||||||
if (
|
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
||||||
):
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
if is_composio_drive:
|
||||||
|
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
pre_built_creds = build_composio_credentials(cca_id)
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Composio connected account ID not found for this Drive connector.",
|
||||||
|
}
|
||||||
|
|
||||||
client = GoogleDriveClient(
|
client = GoogleDriveClient(
|
||||||
session=db_session,
|
session=db_session,
|
||||||
connector_id=connector.id,
|
connector_id=connector.id,
|
||||||
credentials=pre_built_creds,
|
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
if is_composio_drive:
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
|
result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLEDRIVE_TRASH_FILE",
|
||||||
|
params={"file_id": final_file_id},
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
result.get("error", "Unknown Composio Drive error")
|
||||||
|
)
|
||||||
|
else:
|
||||||
await client.trash_file(file_id=final_file_id)
|
await client.trash_file(file_id=final_file_id)
|
||||||
except HttpError as http_err:
|
except HttpError as http_err:
|
||||||
if http_err.resp.status == 403:
|
if http_err.resp.status == 403:
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ DEFAULT_AUTO_APPROVED_TOOLS: frozenset[str] = frozenset(
|
||||||
{
|
{
|
||||||
"create_gmail_draft",
|
"create_gmail_draft",
|
||||||
"update_gmail_draft",
|
"update_gmail_draft",
|
||||||
|
"create_calendar_event",
|
||||||
"create_notion_page",
|
"create_notion_page",
|
||||||
"create_confluence_page",
|
"create_confluence_page",
|
||||||
"create_google_drive_file",
|
"create_google_drive_file",
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.jira_history import JiraHistoryConnector
|
from app.connectors.jira_history import JiraHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.jira import JiraToolMetadataService
|
from app.services.jira import JiraToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,28 @@ def create_create_jira_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""Factory function to create the create_jira_issue tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits. Per-call sessions also
|
||||||
|
keep the request's outer transaction free of long-running Jira API
|
||||||
|
blocking.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to find the Jira connector
|
||||||
|
user_id: User ID for fetching user-specific context
|
||||||
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_jira_issue tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_jira_issue(
|
async def create_jira_issue(
|
||||||
project_key: str,
|
project_key: str,
|
||||||
|
|
@ -49,10 +72,11 @@ def create_create_jira_issue_tool(
|
||||||
f"create_jira_issue called: project_key='{project_key}', summary='{summary}'"
|
f"create_jira_issue called: project_key='{project_key}', summary='{summary}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Jira tool not properly configured."}
|
return {"status": "error", "message": "Jira tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = JiraToolMetadataService(db_session)
|
metadata_service = JiraToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
|
|
@ -97,7 +121,10 @@ def create_create_jira_issue_tool(
|
||||||
final_connector_id = result.params.get("connector_id", connector_id)
|
final_connector_id = result.params.get("connector_id", connector_id)
|
||||||
|
|
||||||
if not final_summary or not final_summary.strip():
|
if not final_summary or not final_summary.strip():
|
||||||
return {"status": "error", "message": "Issue summary cannot be empty."}
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Issue summary cannot be empty.",
|
||||||
|
}
|
||||||
if not final_project_key:
|
if not final_project_key:
|
||||||
return {"status": "error", "message": "A project must be selected."}
|
return {"status": "error", "message": "A project must be selected."}
|
||||||
|
|
||||||
|
|
@ -117,7 +144,10 @@ def create_create_jira_issue_tool(
|
||||||
)
|
)
|
||||||
connector = result.scalars().first()
|
connector = result.scalars().first()
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Jira connector found."}
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "No Jira connector found.",
|
||||||
|
}
|
||||||
actual_connector_id = connector.id
|
actual_connector_id = connector.id
|
||||||
else:
|
else:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
|
|
@ -188,7 +218,9 @@ def create_create_jira_issue_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.jira_history import JiraHistoryConnector
|
from app.connectors.jira_history import JiraHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.jira import JiraToolMetadataService
|
from app.services.jira import JiraToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,26 @@ def create_delete_jira_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""Factory function to create the delete_jira_issue tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to find the Jira connector
|
||||||
|
user_id: User ID for fetching user-specific context
|
||||||
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_jira_issue tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_jira_issue(
|
async def delete_jira_issue(
|
||||||
issue_title_or_key: str,
|
issue_title_or_key: str,
|
||||||
|
|
@ -44,10 +65,11 @@ def create_delete_jira_issue_tool(
|
||||||
f"delete_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
|
f"delete_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Jira tool not properly configured."}
|
return {"status": "error", "message": "Jira tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = JiraToolMetadataService(db_session)
|
metadata_service = JiraToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_deletion_context(
|
context = await metadata_service.get_deletion_context(
|
||||||
search_space_id, user_id, issue_title_or_key
|
search_space_id, user_id, issue_title_or_key
|
||||||
|
|
@ -92,7 +114,9 @@ def create_delete_jira_issue_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
|
|
||||||
|
|
@ -129,7 +153,10 @@ def create_delete_jira_issue_tool(
|
||||||
except Exception as api_err:
|
except Exception as api_err:
|
||||||
if "status code 403" in str(api_err).lower():
|
if "status code 403" in str(api_err).lower():
|
||||||
try:
|
try:
|
||||||
connector.config = {**connector.config, "auth_expired": True}
|
connector.config = {
|
||||||
|
**connector.config,
|
||||||
|
"auth_expired": True,
|
||||||
|
}
|
||||||
flag_modified(connector, "config")
|
flag_modified(connector, "config")
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ from sqlalchemy.orm.attributes import flag_modified
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.jira_history import JiraHistoryConnector
|
from app.connectors.jira_history import JiraHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.jira import JiraToolMetadataService
|
from app.services.jira import JiraToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -19,6 +20,26 @@ def create_update_jira_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
|
"""Factory function to create the update_jira_issue tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
search_space_id: Search space ID to find the Jira connector
|
||||||
|
user_id: User ID for fetching user-specific context
|
||||||
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_jira_issue tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_jira_issue(
|
async def update_jira_issue(
|
||||||
issue_title_or_key: str,
|
issue_title_or_key: str,
|
||||||
|
|
@ -48,10 +69,11 @@ def create_update_jira_issue_tool(
|
||||||
f"update_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
|
f"update_jira_issue called: issue_title_or_key='{issue_title_or_key}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Jira tool not properly configured."}
|
return {"status": "error", "message": "Jira tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = JiraToolMetadataService(db_session)
|
metadata_service = JiraToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, issue_title_or_key
|
search_space_id, user_id, issue_title_or_key
|
||||||
|
|
@ -97,7 +119,9 @@ def create_update_jira_issue_tool(
|
||||||
|
|
||||||
final_issue_key = result.params.get("issue_key", issue_key)
|
final_issue_key = result.params.get("issue_key", issue_key)
|
||||||
final_summary = result.params.get("new_summary", new_summary)
|
final_summary = result.params.get("new_summary", new_summary)
|
||||||
final_description = result.params.get("new_description", new_description)
|
final_description = result.params.get(
|
||||||
|
"new_description", new_description
|
||||||
|
)
|
||||||
final_priority = result.params.get("new_priority", new_priority)
|
final_priority = result.params.get("new_priority", new_priority)
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
|
|
@ -140,7 +164,9 @@ def create_update_jira_issue_tool(
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "paragraph",
|
"type": "paragraph",
|
||||||
"content": [{"type": "text", "text": final_description}],
|
"content": [
|
||||||
|
{"type": "text", "text": final_description}
|
||||||
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
@ -161,7 +187,10 @@ def create_update_jira_issue_tool(
|
||||||
except Exception as api_err:
|
except Exception as api_err:
|
||||||
if "status code 403" in str(api_err).lower():
|
if "status code 403" in str(api_err).lower():
|
||||||
try:
|
try:
|
||||||
connector.config = {**connector.config, "auth_expired": True}
|
connector.config = {
|
||||||
|
**connector.config,
|
||||||
|
"auth_expired": True,
|
||||||
|
}
|
||||||
flag_modified(connector, "config")
|
flag_modified(connector, "config")
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.linear import LinearToolMetadataService
|
from app.services.linear import LinearToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,11 +18,17 @@ def create_create_linear_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""Factory function to create the create_linear_issue tool.
|
||||||
Factory function to create the create_linear_issue tool.
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing the Linear connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Linear connector
|
search_space_id: Search space ID to find the Linear connector
|
||||||
user_id: User ID for fetching user-specific context
|
user_id: User ID for fetching user-specific context
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +36,7 @@ def create_create_linear_issue_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured create_linear_issue tool
|
Configured create_linear_issue tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_linear_issue(
|
async def create_linear_issue(
|
||||||
|
|
@ -65,7 +73,7 @@ def create_create_linear_issue_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"create_linear_issue called: title='{title}'")
|
logger.info(f"create_linear_issue called: title='{title}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Linear tool not properly configured - missing required parameters"
|
"Linear tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -75,13 +83,16 @@ def create_create_linear_issue_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = LinearToolMetadataService(db_session)
|
metadata_service = LinearToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": context["error"]}
|
return {"status": "error", "message": context["error"]}
|
||||||
|
|
||||||
workspaces = context.get("workspaces", [])
|
workspaces = context.get("workspaces", [])
|
||||||
|
|
@ -128,7 +139,10 @@ def create_create_linear_issue_tool(
|
||||||
|
|
||||||
if not final_title or not final_title.strip():
|
if not final_title or not final_title.strip():
|
||||||
logger.error("Title is empty or contains only whitespace")
|
logger.error("Title is empty or contains only whitespace")
|
||||||
return {"status": "error", "message": "Issue title cannot be empty."}
|
return {
|
||||||
|
"status": "error",
|
||||||
|
"message": "Issue title cannot be empty.",
|
||||||
|
}
|
||||||
if not final_team_id:
|
if not final_team_id:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
|
|
@ -192,7 +206,9 @@ def create_create_linear_issue_tool(
|
||||||
)
|
)
|
||||||
|
|
||||||
if result.get("status") == "error":
|
if result.get("status") == "error":
|
||||||
logger.error(f"Failed to create Linear issue: {result.get('message')}")
|
logger.error(
|
||||||
|
f"Failed to create Linear issue: {result.get('message')}"
|
||||||
|
)
|
||||||
return {"status": "error", "message": result.get("message")}
|
return {"status": "error", "message": result.get("message")}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -215,7 +231,9 @@ def create_create_linear_issue_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This issue will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.linear import LinearToolMetadataService
|
from app.services.linear import LinearToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,11 +18,17 @@ def create_delete_linear_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""Factory function to create the delete_linear_issue tool.
|
||||||
Factory function to create the delete_linear_issue tool.
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing the Linear connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Linear connector
|
search_space_id: Search space ID to find the Linear connector
|
||||||
user_id: User ID for finding the correct Linear connector
|
user_id: User ID for finding the correct Linear connector
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +36,7 @@ def create_delete_linear_issue_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured delete_linear_issue tool
|
Configured delete_linear_issue tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_linear_issue(
|
async def delete_linear_issue(
|
||||||
|
|
@ -73,7 +81,7 @@ def create_delete_linear_issue_tool(
|
||||||
f"delete_linear_issue called: issue_ref='{issue_ref}', delete_from_kb={delete_from_kb}"
|
f"delete_linear_issue called: issue_ref='{issue_ref}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Linear tool not properly configured - missing required parameters"
|
"Linear tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -83,6 +91,7 @@ def create_delete_linear_issue_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = LinearToolMetadataService(db_session)
|
metadata_service = LinearToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_delete_context(
|
context = await metadata_service.get_delete_context(
|
||||||
search_space_id, user_id, issue_ref
|
search_space_id, user_id, issue_ref
|
||||||
|
|
@ -136,7 +145,9 @@ def create_delete_linear_issue_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Deleting Linear issue with final params: issue_id={final_issue_id}, "
|
f"Deleting Linear issue with final params: issue_id={final_issue_id}, "
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
from app.connectors.linear_connector import LinearAPIError, LinearConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.linear import LinearKBSyncService, LinearToolMetadataService
|
from app.services.linear import LinearKBSyncService, LinearToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -17,11 +18,17 @@ def create_update_linear_issue_tool(
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
connector_id: int | None = None,
|
connector_id: int | None = None,
|
||||||
):
|
):
|
||||||
"""
|
"""Factory function to create the update_linear_issue tool.
|
||||||
Factory function to create the update_linear_issue tool.
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing the Linear connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Linear connector
|
search_space_id: Search space ID to find the Linear connector
|
||||||
user_id: User ID for fetching user-specific context
|
user_id: User ID for fetching user-specific context
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +36,7 @@ def create_update_linear_issue_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured update_linear_issue tool
|
Configured update_linear_issue tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_linear_issue(
|
async def update_linear_issue(
|
||||||
|
|
@ -86,7 +94,7 @@ def create_update_linear_issue_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"update_linear_issue called: issue_ref='{issue_ref}'")
|
logger.info(f"update_linear_issue called: issue_ref='{issue_ref}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Linear tool not properly configured - missing required parameters"
|
"Linear tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -96,6 +104,7 @@ def create_update_linear_issue_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = LinearToolMetadataService(db_session)
|
metadata_service = LinearToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, issue_ref
|
search_space_id, user_id, issue_ref
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
||||||
|
|
||||||
|
|
@ -17,6 +18,23 @@ def create_create_luma_event_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_luma_event tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_luma_event tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_luma_event(
|
async def create_luma_event(
|
||||||
name: str,
|
name: str,
|
||||||
|
|
@ -40,11 +58,14 @@ def create_create_luma_event_tool(
|
||||||
IMPORTANT:
|
IMPORTANT:
|
||||||
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Luma tool not properly configured."}
|
return {"status": "error", "message": "Luma tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_luma_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_luma_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Luma connector found."}
|
return {"status": "error", "message": "No Luma connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_list_luma_events_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the list_luma_events tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured list_luma_events tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def list_luma_events(
|
async def list_luma_events(
|
||||||
max_results: int = 25,
|
max_results: int = 25,
|
||||||
|
|
@ -28,13 +47,16 @@ def create_list_luma_events_tool(
|
||||||
Dictionary with status and a list of events including
|
Dictionary with status and a list of events including
|
||||||
event_id, name, start_at, end_at, location, url.
|
event_id, name, start_at, end_at, location, url.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Luma tool not properly configured."}
|
return {"status": "error", "message": "Luma tool not properly configured."}
|
||||||
|
|
||||||
max_results = min(max_results, 50)
|
max_results = min(max_results, 50)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_luma_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_luma_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Luma connector found."}
|
return {"status": "error", "message": "No Luma connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
from ._auth import LUMA_API, get_api_key, get_luma_connector, luma_headers
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_read_luma_event_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the read_luma_event tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured read_luma_event tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def read_luma_event(event_id: str) -> dict[str, Any]:
|
async def read_luma_event(event_id: str) -> dict[str, Any]:
|
||||||
"""Read detailed information about a specific Luma event.
|
"""Read detailed information about a specific Luma event.
|
||||||
|
|
@ -26,11 +45,14 @@ def create_read_luma_event_tool(
|
||||||
Dictionary with status and full event details including
|
Dictionary with status and full event details including
|
||||||
description, attendees count, meeting URL.
|
description, attendees count, meeting URL.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Luma tool not properly configured."}
|
return {"status": "error", "message": "Luma tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_luma_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_luma_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Luma connector found."}
|
return {"status": "error", "message": "No Luma connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.notion import NotionToolMetadataService
|
from app.services.notion import NotionToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -20,8 +21,17 @@ def create_create_notion_page_tool(
|
||||||
"""
|
"""
|
||||||
Factory function to create the create_notion_page tool.
|
Factory function to create the create_notion_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker`. This is critical for the compiled-agent
|
||||||
|
cache: the compiled graph (and therefore this closure) is reused
|
||||||
|
across HTTP requests, so capturing a per-request session here would
|
||||||
|
surface stale/closed sessions on cache hits. Per-call sessions also
|
||||||
|
keep the request's outer transaction free of long-running Notion API
|
||||||
|
blocking.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing Notion connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Notion connector
|
search_space_id: Search space ID to find the Notion connector
|
||||||
user_id: User ID for fetching user-specific context
|
user_id: User ID for fetching user-specific context
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +39,7 @@ def create_create_notion_page_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured create_notion_page tool
|
Configured create_notion_page tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_notion_page(
|
async def create_notion_page(
|
||||||
|
|
@ -67,7 +78,7 @@ def create_create_notion_page_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"create_notion_page called: title='{title}'")
|
logger.info(f"create_notion_page called: title='{title}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Notion tool not properly configured - missing required parameters"
|
"Notion tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -77,13 +88,16 @@ def create_create_notion_page_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = NotionToolMetadataService(db_session)
|
metadata_service = NotionToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_creation_context(
|
context = await metadata_service.get_creation_context(
|
||||||
search_space_id, user_id
|
search_space_id, user_id
|
||||||
)
|
)
|
||||||
|
|
||||||
if "error" in context:
|
if "error" in context:
|
||||||
logger.error(f"Failed to fetch creation context: {context['error']}")
|
logger.error(
|
||||||
|
f"Failed to fetch creation context: {context['error']}"
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": context["error"],
|
"message": context["error"],
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.notion.tool_metadata_service import NotionToolMetadataService
|
from app.services.notion.tool_metadata_service import NotionToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -20,8 +21,14 @@ def create_delete_notion_page_tool(
|
||||||
"""
|
"""
|
||||||
Factory function to create the delete_notion_page tool.
|
Factory function to create the delete_notion_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing Notion connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Notion connector
|
search_space_id: Search space ID to find the Notion connector
|
||||||
user_id: User ID for finding the correct Notion connector
|
user_id: User ID for finding the correct Notion connector
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +36,7 @@ def create_delete_notion_page_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured delete_notion_page tool
|
Configured delete_notion_page tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_notion_page(
|
async def delete_notion_page(
|
||||||
|
|
@ -63,7 +71,7 @@ def create_delete_notion_page_tool(
|
||||||
f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
|
f"delete_notion_page called: page_title='{page_title}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Notion tool not properly configured - missing required parameters"
|
"Notion tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -73,6 +81,7 @@ def create_delete_notion_page_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
# Get page context (page_id, account, title) from indexed data
|
# Get page context (page_id, account, title) from indexed data
|
||||||
metadata_service = NotionToolMetadataService(db_session)
|
metadata_service = NotionToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_delete_context(
|
context = await metadata_service.get_delete_context(
|
||||||
|
|
@ -136,7 +145,9 @@ def create_delete_notion_page_tool(
|
||||||
final_connector_id = result.params.get(
|
final_connector_id = result.params.get(
|
||||||
"connector_id", connector_id_from_context
|
"connector_id", connector_id_from_context
|
||||||
)
|
)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Deleting Notion page with final params: page_id={final_page_id}, connector_id={final_connector_id}, delete_from_kb={final_delete_from_kb}"
|
f"Deleting Notion page with final params: page_id={final_page_id}, connector_id={final_connector_id}, delete_from_kb={final_delete_from_kb}"
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
from app.connectors.notion_history import NotionAPIError, NotionHistoryConnector
|
||||||
|
from app.db import async_session_maker
|
||||||
from app.services.notion import NotionToolMetadataService
|
from app.services.notion import NotionToolMetadataService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -20,8 +21,14 @@ def create_update_notion_page_tool(
|
||||||
"""
|
"""
|
||||||
Factory function to create the update_notion_page tool.
|
Factory function to create the update_notion_page tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache (see
|
||||||
|
``create_create_notion_page_tool`` for the full rationale).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for accessing Notion connector
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
search_space_id: Search space ID to find the Notion connector
|
search_space_id: Search space ID to find the Notion connector
|
||||||
user_id: User ID for fetching user-specific context
|
user_id: User ID for fetching user-specific context
|
||||||
connector_id: Optional specific connector ID (if known)
|
connector_id: Optional specific connector ID (if known)
|
||||||
|
|
@ -29,6 +36,7 @@ def create_update_notion_page_tool(
|
||||||
Returns:
|
Returns:
|
||||||
Configured update_notion_page tool
|
Configured update_notion_page tool
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_notion_page(
|
async def update_notion_page(
|
||||||
|
|
@ -71,7 +79,7 @@ def create_update_notion_page_tool(
|
||||||
f"update_notion_page called: page_title='{page_title}', content_length={len(content) if content else 0}"
|
f"update_notion_page called: page_title='{page_title}', content_length={len(content) if content else 0}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Notion tool not properly configured - missing required parameters"
|
"Notion tool not properly configured - missing required parameters"
|
||||||
)
|
)
|
||||||
|
|
@ -88,6 +96,7 @@ def create_update_notion_page_tool(
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
metadata_service = NotionToolMetadataService(db_session)
|
metadata_service = NotionToolMetadataService(db_session)
|
||||||
context = await metadata_service.get_update_context(
|
context = await metadata_service.get_update_context(
|
||||||
search_space_id, user_id, page_title
|
search_space_id, user_id, page_title
|
||||||
|
|
@ -204,7 +213,9 @@ def create_update_notion_page_tool(
|
||||||
if result.get("status") == "success" and document_id is not None:
|
if result.get("status") == "success" and document_id is not None:
|
||||||
from app.services.notion import NotionKBSyncService
|
from app.services.notion import NotionKBSyncService
|
||||||
|
|
||||||
logger.info(f"Updating knowledge base for document {document_id}...")
|
logger.info(
|
||||||
|
f"Updating knowledge base for document {document_id}..."
|
||||||
|
)
|
||||||
kb_service = NotionKBSyncService(db_session)
|
kb_service = NotionKBSyncService(db_session)
|
||||||
kb_result = await kb_service.sync_after_update(
|
kb_result = await kb_service.sync_after_update(
|
||||||
document_id=document_id,
|
document_id=document_id,
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from sqlalchemy.future import select
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
from app.connectors.onedrive.client import OneDriveClient
|
from app.connectors.onedrive.client import OneDriveClient
|
||||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
from app.db import SearchSourceConnector, SearchSourceConnectorType, async_session_maker
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -48,6 +48,23 @@ def create_create_onedrive_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the create_onedrive_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured create_onedrive_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def create_onedrive_file(
|
async def create_onedrive_file(
|
||||||
name: str,
|
name: str,
|
||||||
|
|
@ -70,13 +87,14 @@ def create_create_onedrive_file_tool(
|
||||||
"""
|
"""
|
||||||
logger.info(f"create_onedrive_file called: name='{name}'")
|
logger.info(f"create_onedrive_file called: name='{name}'")
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "OneDrive tool not properly configured.",
|
"message": "OneDrive tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
|
@ -136,7 +154,9 @@ def create_create_onedrive_file_tool(
|
||||||
]
|
]
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Error fetching folders for connector %s", cid, exc_info=True
|
"Error fetching folders for connector %s",
|
||||||
|
cid,
|
||||||
|
exc_info=True,
|
||||||
)
|
)
|
||||||
parent_folders[cid] = []
|
parent_folders[cid] = []
|
||||||
|
|
||||||
|
|
@ -223,7 +243,9 @@ def create_create_onedrive_file_tool(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
)
|
)
|
||||||
if kb_result["status"] == "success":
|
if kb_result["status"] == "success":
|
||||||
kb_message_suffix = " Your knowledge base has also been updated."
|
kb_message_suffix = (
|
||||||
|
" Your knowledge base has also been updated."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync."
|
||||||
except Exception as kb_err:
|
except Exception as kb_err:
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from app.db import (
|
||||||
DocumentType,
|
DocumentType,
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
|
async_session_maker,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -23,6 +24,23 @@ def create_delete_onedrive_file_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the delete_onedrive_file tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured delete_onedrive_file tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def delete_onedrive_file(
|
async def delete_onedrive_file(
|
||||||
file_name: str,
|
file_name: str,
|
||||||
|
|
@ -56,13 +74,14 @@ def create_delete_onedrive_file_tool(
|
||||||
f"delete_onedrive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
f"delete_onedrive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": "OneDrive tool not properly configured.",
|
"message": "OneDrive tool not properly configured.",
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
doc_result = await db_session.execute(
|
doc_result = await db_session.execute(
|
||||||
select(Document)
|
select(Document)
|
||||||
.join(
|
.join(
|
||||||
|
|
@ -95,7 +114,9 @@ def create_delete_onedrive_file_tool(
|
||||||
Document.document_type == DocumentType.ONEDRIVE_FILE,
|
Document.document_type == DocumentType.ONEDRIVE_FILE,
|
||||||
func.lower(
|
func.lower(
|
||||||
cast(
|
cast(
|
||||||
Document.document_metadata["onedrive_file_name"],
|
Document.document_metadata[
|
||||||
|
"onedrive_file_name"
|
||||||
|
],
|
||||||
String,
|
String,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -193,14 +214,17 @@ def create_delete_onedrive_file_tool(
|
||||||
|
|
||||||
final_file_id = result.params.get("file_id", file_id)
|
final_file_id = result.params.get("file_id", file_id)
|
||||||
final_connector_id = result.params.get("connector_id", connector.id)
|
final_connector_id = result.params.get("connector_id", connector.id)
|
||||||
final_delete_from_kb = result.params.get("delete_from_kb", delete_from_kb)
|
final_delete_from_kb = result.params.get(
|
||||||
|
"delete_from_kb", delete_from_kb
|
||||||
|
)
|
||||||
|
|
||||||
if final_connector_id != connector.id:
|
if final_connector_id != connector.id:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSourceConnector).filter(
|
select(SearchSourceConnector).filter(
|
||||||
and_(
|
and_(
|
||||||
SearchSourceConnector.id == final_connector_id,
|
SearchSourceConnector.id == final_connector_id,
|
||||||
SearchSourceConnector.search_space_id == search_space_id,
|
SearchSourceConnector.search_space_id
|
||||||
|
== search_space_id,
|
||||||
SearchSourceConnector.user_id == user_id,
|
SearchSourceConnector.user_id == user_id,
|
||||||
SearchSourceConnector.connector_type
|
SearchSourceConnector.connector_type
|
||||||
== SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
|
== SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
|
||||||
|
|
|
||||||
|
|
@ -824,13 +824,22 @@ async def build_tools_async(
|
||||||
"""Async version of build_tools that also loads MCP tools from database.
|
"""Async version of build_tools that also loads MCP tools from database.
|
||||||
|
|
||||||
Design Note:
|
Design Note:
|
||||||
This function exists because MCP tools require database queries to load user configs,
|
This function exists because MCP tools require database queries to load
|
||||||
while built-in tools are created synchronously from static code.
|
user configs, while built-in tools are created synchronously from static
|
||||||
|
code.
|
||||||
|
|
||||||
Alternative: We could make build_tools() itself async and always query the database,
|
Alternative: We could make build_tools() itself async and always query
|
||||||
but that would force async everywhere even when only using built-in tools. The current
|
the database, but that would force async everywhere even when only using
|
||||||
design keeps the simple case (static tools only) synchronous while supporting dynamic
|
built-in tools. The current design keeps the simple case (static tools
|
||||||
database-loaded tools through this async wrapper.
|
only) synchronous while supporting dynamic database-loaded tools through
|
||||||
|
this async wrapper.
|
||||||
|
|
||||||
|
Phase 1.3: built-in tool construction (CPU; runs in a thread pool to
|
||||||
|
avoid event-loop stalls) and MCP tool loading (HTTP/DB I/O; runs on
|
||||||
|
the event loop) are kicked off concurrently. Cold-path savings are
|
||||||
|
bounded by the slower of the two — typically MCP at ~200ms-1.7s —
|
||||||
|
so the parallelization recovers the ~50-200ms previously spent
|
||||||
|
serially on built-in construction.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
dependencies: Dict containing all possible dependencies
|
dependencies: Dict containing all possible dependencies
|
||||||
|
|
@ -843,33 +852,70 @@ async def build_tools_async(
|
||||||
List of configured tool instances ready for the agent, including MCP tools.
|
List of configured tool instances ready for the agent, including MCP tools.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
import asyncio
|
||||||
import time
|
import time
|
||||||
|
|
||||||
_perf_log = logging.getLogger("surfsense.perf")
|
_perf_log = logging.getLogger("surfsense.perf")
|
||||||
_perf_log.setLevel(logging.DEBUG)
|
_perf_log.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
can_load_mcp = (
|
||||||
|
include_mcp_tools
|
||||||
|
and "db_session" in dependencies
|
||||||
|
and "search_space_id" in dependencies
|
||||||
|
)
|
||||||
|
|
||||||
|
# Built-in tool construction is synchronous + CPU-only. Off-loop it so
|
||||||
|
# MCP's HTTP/DB I/O can fire concurrently. ``build_tools`` is pure
|
||||||
|
# function over its inputs — safe to thread-shift.
|
||||||
_t0 = time.perf_counter()
|
_t0 = time.perf_counter()
|
||||||
tools = build_tools(dependencies, enabled_tools, disabled_tools, additional_tools)
|
builtin_task = asyncio.create_task(
|
||||||
|
asyncio.to_thread(
|
||||||
|
build_tools, dependencies, enabled_tools, disabled_tools, additional_tools
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
mcp_task: asyncio.Task | None = None
|
||||||
|
if can_load_mcp:
|
||||||
|
mcp_task = asyncio.create_task(
|
||||||
|
load_mcp_tools(
|
||||||
|
dependencies["db_session"],
|
||||||
|
dependencies["search_space_id"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Surface failures from each task independently so a flaky MCP
|
||||||
|
# endpoint never poisons built-in tool registration. ``return_exceptions``
|
||||||
|
# gives us per-task exceptions instead of dropping the second result
|
||||||
|
# when the first raises.
|
||||||
|
if mcp_task is not None:
|
||||||
|
builtin_result, mcp_result = await asyncio.gather(
|
||||||
|
builtin_task, mcp_task, return_exceptions=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
builtin_result = await builtin_task
|
||||||
|
mcp_result = None
|
||||||
|
|
||||||
|
if isinstance(builtin_result, BaseException):
|
||||||
|
raise builtin_result # built-in registration failure is non-recoverable
|
||||||
|
tools: list[BaseTool] = builtin_result
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[build_tools_async] Built-in tools in %.3fs (%d tools)",
|
"[build_tools_async] Built-in tools in %.3fs (%d tools, parallel)",
|
||||||
time.perf_counter() - _t0,
|
time.perf_counter() - _t0,
|
||||||
len(tools),
|
len(tools),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load MCP tools if requested and dependencies are available
|
if mcp_task is not None:
|
||||||
if (
|
if isinstance(mcp_result, BaseException):
|
||||||
include_mcp_tools
|
# ``return_exceptions=True`` captures the exception out-of-band,
|
||||||
and "db_session" in dependencies
|
# so ``sys.exc_info()`` is empty here. Pass the captured
|
||||||
and "search_space_id" in dependencies
|
# exception via ``exc_info=`` to get a real traceback.
|
||||||
):
|
logging.error(
|
||||||
try:
|
"Failed to load MCP tools: %s", mcp_result, exc_info=mcp_result
|
||||||
_t0 = time.perf_counter()
|
|
||||||
mcp_tools = await load_mcp_tools(
|
|
||||||
dependencies["db_session"],
|
|
||||||
dependencies["search_space_id"],
|
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
mcp_tools = mcp_result or []
|
||||||
_perf_log.info(
|
_perf_log.info(
|
||||||
"[build_tools_async] MCP tools loaded in %.3fs (%d tools)",
|
"[build_tools_async] MCP tools loaded in %.3fs (%d tools, parallel)",
|
||||||
time.perf_counter() - _t0,
|
time.perf_counter() - _t0,
|
||||||
len(mcp_tools),
|
len(mcp_tools),
|
||||||
)
|
)
|
||||||
|
|
@ -879,8 +925,6 @@ async def build_tools_async(
|
||||||
len(mcp_tools),
|
len(mcp_tools),
|
||||||
[t.name for t in mcp_tools],
|
[t.name for t in mcp_tools],
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
logging.exception("Failed to load MCP tools: %s", e)
|
|
||||||
|
|
||||||
logging.info(
|
logging.info(
|
||||||
"Total tools for agent: %d — %s",
|
"Total tools for agent: %d — %s",
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument
|
from app.db import SurfsenseDocsChunk, SurfsenseDocsDocument, async_session_maker
|
||||||
from app.utils.document_converters import embed_text
|
from app.utils.document_converters import embed_text
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -124,12 +124,19 @@ def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
||||||
"""
|
"""
|
||||||
Factory function to create the search_surfsense_docs tool.
|
Factory function to create the search_surfsense_docs tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_session: Database session for executing queries
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A configured tool function for searching Surfsense documentation
|
A configured tool function for searching Surfsense documentation
|
||||||
"""
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
async def search_surfsense_docs(query: str, top_k: int = 10) -> str:
|
||||||
|
|
@ -155,6 +162,7 @@ def create_search_surfsense_docs_tool(db_session: AsyncSession):
|
||||||
Returns:
|
Returns:
|
||||||
Relevant documentation content formatted with chunk IDs for citations
|
Relevant documentation content formatted with chunk IDs for citations
|
||||||
"""
|
"""
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
return await search_surfsense_docs_async(
|
return await search_surfsense_docs_async(
|
||||||
query=query,
|
query=query,
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_list_teams_channels_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the list_teams_channels tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured list_teams_channels tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def list_teams_channels() -> dict[str, Any]:
|
async def list_teams_channels() -> dict[str, Any]:
|
||||||
"""List all Microsoft Teams and their channels the user has access to.
|
"""List all Microsoft Teams and their channels the user has access to.
|
||||||
|
|
@ -23,11 +42,14 @@ def create_list_teams_channels_tool(
|
||||||
Dictionary with status and a list of teams, each containing
|
Dictionary with status and a list of teams, each containing
|
||||||
team_id, team_name, and a list of channels (id, name).
|
team_id, team_name, and a list of channels (id, name).
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Teams tool not properly configured."}
|
return {"status": "error", "message": "Teams tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_teams_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_teams_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Teams connector found."}
|
return {"status": "error", "message": "No Teams connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@ import httpx
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -15,6 +17,23 @@ def create_read_teams_messages_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the read_teams_messages tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured read_teams_messages tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def read_teams_messages(
|
async def read_teams_messages(
|
||||||
team_id: str,
|
team_id: str,
|
||||||
|
|
@ -32,13 +51,16 @@ def create_read_teams_messages_tool(
|
||||||
Dictionary with status and a list of messages including
|
Dictionary with status and a list of messages including
|
||||||
id, sender, content, timestamp.
|
id, sender, content, timestamp.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Teams tool not properly configured."}
|
return {"status": "error", "message": "Teams tool not properly configured."}
|
||||||
|
|
||||||
limit = min(limit, 50)
|
limit = min(limit, 50)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_teams_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_teams_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Teams connector found."}
|
return {"status": "error", "message": "No Teams connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.agents.new_chat.tools.hitl import request_approval
|
from app.agents.new_chat.tools.hitl import request_approval
|
||||||
|
from app.db import async_session_maker
|
||||||
|
|
||||||
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
from ._auth import GRAPH_API, get_access_token, get_teams_connector
|
||||||
|
|
||||||
|
|
@ -17,6 +18,23 @@ def create_send_teams_message_tool(
|
||||||
search_space_id: int | None = None,
|
search_space_id: int | None = None,
|
||||||
user_id: str | None = None,
|
user_id: str | None = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
Factory function to create the send_teams_message tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured send_teams_message tool
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def send_teams_message(
|
async def send_teams_message(
|
||||||
team_id: str,
|
team_id: str,
|
||||||
|
|
@ -39,11 +57,14 @@ def create_send_teams_message_tool(
|
||||||
IMPORTANT:
|
IMPORTANT:
|
||||||
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
- If status is "rejected", the user explicitly declined. Do NOT retry.
|
||||||
"""
|
"""
|
||||||
if db_session is None or search_space_id is None or user_id is None:
|
if search_space_id is None or user_id is None:
|
||||||
return {"status": "error", "message": "Teams tool not properly configured."}
|
return {"status": "error", "message": "Teams tool not properly configured."}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
connector = await get_teams_connector(db_session, search_space_id, user_id)
|
async with async_session_maker() as db_session:
|
||||||
|
connector = await get_teams_connector(
|
||||||
|
db_session, search_space_id, user_id
|
||||||
|
)
|
||||||
if not connector:
|
if not connector:
|
||||||
return {"status": "error", "message": "No Teams connector found."}
|
return {"status": "error", "message": "No Teams connector found."}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ from langchain_core.tools import tool
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.db import SearchSpace, User
|
from app.db import SearchSpace, User, async_session_maker
|
||||||
from app.utils.content_utils import extract_text_content
|
from app.utils.content_utils import extract_text_content
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -302,6 +302,25 @@ def create_update_memory_tool(
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
llm: Any | None = None,
|
llm: Any | None = None,
|
||||||
):
|
):
|
||||||
|
"""Factory function to create the user-memory update tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
The session's bound ``commit``/``rollback`` methods are captured at
|
||||||
|
call time, after ``async with`` has bound ``db_session`` locally.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: ID of the user whose memory document is being updated.
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
llm: Optional LLM for the forced-rewrite path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_memory tool for the user-memory scope.
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
uid = UUID(user_id) if isinstance(user_id, str) else user_id
|
uid = UUID(user_id) if isinstance(user_id, str) else user_id
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
|
|
@ -318,6 +337,7 @@ def create_update_memory_tool(
|
||||||
updated_memory: The FULL updated markdown document (not a diff).
|
updated_memory: The FULL updated markdown document (not a diff).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(select(User).where(User.id == uid))
|
result = await db_session.execute(select(User).where(User.id == uid))
|
||||||
user = result.scalars().first()
|
user = result.scalars().first()
|
||||||
if not user:
|
if not user:
|
||||||
|
|
@ -337,7 +357,6 @@ def create_update_memory_tool(
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Failed to update user memory: %s", e)
|
logger.exception("Failed to update user memory: %s", e)
|
||||||
await db_session.rollback()
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": f"Failed to update memory: {e}",
|
"message": f"Failed to update memory: {e}",
|
||||||
|
|
@ -351,6 +370,27 @@ def create_update_team_memory_tool(
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
llm: Any | None = None,
|
llm: Any | None = None,
|
||||||
):
|
):
|
||||||
|
"""Factory function to create the team-memory update tool.
|
||||||
|
|
||||||
|
The tool acquires its own short-lived ``AsyncSession`` per call via
|
||||||
|
:data:`async_session_maker` so the closure is safe to share across
|
||||||
|
HTTP requests by the compiled-agent cache. Capturing a per-request
|
||||||
|
session here would surface stale/closed sessions on cache hits.
|
||||||
|
The session's bound ``commit``/``rollback`` methods are captured at
|
||||||
|
call time, after ``async with`` has bound ``db_session`` locally.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
search_space_id: ID of the search space whose team memory is being
|
||||||
|
updated.
|
||||||
|
db_session: Reserved for registry compatibility. Per-call sessions
|
||||||
|
are opened via :data:`async_session_maker` inside the tool body.
|
||||||
|
llm: Optional LLM for the forced-rewrite path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured update_memory tool for the team-memory scope.
|
||||||
|
"""
|
||||||
|
del db_session # per-call session — see docstring
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
async def update_memory(updated_memory: str) -> dict[str, Any]:
|
async def update_memory(updated_memory: str) -> dict[str, Any]:
|
||||||
"""Update the team's shared memory document for this search space.
|
"""Update the team's shared memory document for this search space.
|
||||||
|
|
@ -366,6 +406,7 @@ def create_update_team_memory_tool(
|
||||||
updated_memory: The FULL updated markdown document (not a diff).
|
updated_memory: The FULL updated markdown document (not a diff).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
async with async_session_maker() as db_session:
|
||||||
result = await db_session.execute(
|
result = await db_session.execute(
|
||||||
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
||||||
)
|
)
|
||||||
|
|
@ -379,7 +420,9 @@ def create_update_team_memory_tool(
|
||||||
updated_memory=updated_memory,
|
updated_memory=updated_memory,
|
||||||
old_memory=old_memory,
|
old_memory=old_memory,
|
||||||
llm=llm,
|
llm=llm,
|
||||||
apply_fn=lambda content: setattr(space, "shared_memory_md", content),
|
apply_fn=lambda content: setattr(
|
||||||
|
space, "shared_memory_md", content
|
||||||
|
),
|
||||||
commit_fn=db_session.commit,
|
commit_fn=db_session.commit,
|
||||||
rollback_fn=db_session.rollback,
|
rollback_fn=db_session.rollback,
|
||||||
label="team memory",
|
label="team memory",
|
||||||
|
|
@ -387,7 +430,6 @@ def create_update_team_memory_tool(
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception("Failed to update team memory: %s", e)
|
logger.exception("Failed to update team memory: %s", e)
|
||||||
await db_session.rollback()
|
|
||||||
return {
|
return {
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"message": f"Failed to update team memory: {e}",
|
"message": f"Failed to update team memory: {e}",
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ from app.config import (
|
||||||
initialize_image_gen_router,
|
initialize_image_gen_router,
|
||||||
initialize_llm_router,
|
initialize_llm_router,
|
||||||
initialize_openrouter_integration,
|
initialize_openrouter_integration,
|
||||||
|
initialize_pricing_registration,
|
||||||
initialize_vision_llm_router,
|
initialize_vision_llm_router,
|
||||||
)
|
)
|
||||||
from app.db import User, create_db_and_tables, get_async_session
|
from app.db import User, create_db_and_tables, get_async_session
|
||||||
|
|
@ -420,6 +421,135 @@ def _stop_openrouter_background_refresh() -> None:
|
||||||
OpenRouterIntegrationService.get_instance().stop_background_refresh()
|
OpenRouterIntegrationService.get_instance().stop_background_refresh()
|
||||||
|
|
||||||
|
|
||||||
|
async def _warm_agent_jit_caches() -> None:
|
||||||
|
"""Pay the LangChain / LangGraph / Deepagents JIT cost at startup.
|
||||||
|
|
||||||
|
Why
|
||||||
|
----
|
||||||
|
A cold ``create_agent`` + ``StateGraph.compile()`` + Pydantic schema
|
||||||
|
generation chain takes 1.5-2 seconds of pure CPU on first invocation
|
||||||
|
inside any Python process: the graph compiler builds reducers,
|
||||||
|
Pydantic v2 generates and JITs validator schemas, deepagents
|
||||||
|
eagerly compiles its general-purpose subagent, etc. Subsequent
|
||||||
|
compiles in the same process pay only ~50% of that cost (the lazy
|
||||||
|
JIT bits are cached in module-level dicts).
|
||||||
|
|
||||||
|
Doing one throwaway compile during ``lifespan`` startup pre-pays
|
||||||
|
that cost so the *first real request* doesn't. We do NOT prime
|
||||||
|
:mod:`agent_cache` because the cache key requires real
|
||||||
|
``thread_id`` / ``user_id`` / ``search_space_id`` / etc. — the
|
||||||
|
throwaway agent is genuinely thrown away and immediately collected.
|
||||||
|
|
||||||
|
Safety
|
||||||
|
------
|
||||||
|
* No DB access. We construct a stub LLM (no real keys), pass an
|
||||||
|
empty tools list, and pass ``checkpointer=None`` so we never
|
||||||
|
touch Postgres.
|
||||||
|
* Bounded by ``asyncio.wait_for`` so a hang here can never block
|
||||||
|
worker startup. On any failure, we log + swallow — the worst
|
||||||
|
case is the first real request pays the full cold cost (i.e.
|
||||||
|
pre-warmup behaviour).
|
||||||
|
"""
|
||||||
|
import time as _time
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
t0 = _time.perf_counter()
|
||||||
|
try:
|
||||||
|
from langchain.agents import create_agent
|
||||||
|
from langchain.agents.middleware import (
|
||||||
|
ModelCallLimitMiddleware,
|
||||||
|
TodoListMiddleware,
|
||||||
|
ToolCallLimitMiddleware,
|
||||||
|
)
|
||||||
|
from langchain_core.language_models.fake_chat_models import (
|
||||||
|
FakeListChatModel,
|
||||||
|
)
|
||||||
|
from langchain_core.tools import tool
|
||||||
|
|
||||||
|
from app.agents.new_chat.context import SurfSenseContextSchema
|
||||||
|
|
||||||
|
# Minimal LLM stub. ``FakeListChatModel`` satisfies
|
||||||
|
# ``BaseChatModel`` without any network or auth — perfect for
|
||||||
|
# exercising the compile path without side effects.
|
||||||
|
stub_llm = FakeListChatModel(responses=["warmup-response"])
|
||||||
|
|
||||||
|
# Two trivial tools with arg + return schemas — exercises the
|
||||||
|
# Pydantic v2 schema JIT path. Without at least one tool the
|
||||||
|
# graph compile skips the tool-loop bytecode generation that
|
||||||
|
# accounts for ~30-50% of cold compile cost.
|
||||||
|
@tool
|
||||||
|
def _warmup_tool_a(query: str, limit: int = 5) -> str:
|
||||||
|
"""Warmup tool A — never actually invoked."""
|
||||||
|
return query[:limit]
|
||||||
|
|
||||||
|
@tool
|
||||||
|
def _warmup_tool_b(name: str, value: float | None = None) -> dict[str, object]:
|
||||||
|
"""Warmup tool B — never actually invoked."""
|
||||||
|
return {"name": name, "value": value}
|
||||||
|
|
||||||
|
# A handful of common middleware so the compile pre-pays the
|
||||||
|
# ``AgentMiddleware`` resolver path. These instances never run
|
||||||
|
# because the throwaway agent is immediately collected.
|
||||||
|
# ``SubAgentMiddleware`` is the single heaviest line in cold
|
||||||
|
# ``create_surfsense_deep_agent`` (1.5-2s of CPU per call to
|
||||||
|
# compile its general-purpose subagent's full inner graph),
|
||||||
|
# so we include it here to make sure that compile path is JIT'd.
|
||||||
|
warmup_middleware: list = [
|
||||||
|
TodoListMiddleware(),
|
||||||
|
ModelCallLimitMiddleware(
|
||||||
|
thread_limit=120, run_limit=80, exit_behavior="end"
|
||||||
|
),
|
||||||
|
ToolCallLimitMiddleware(
|
||||||
|
thread_limit=300, run_limit=80, exit_behavior="continue"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
from deepagents import SubAgentMiddleware
|
||||||
|
from deepagents.backends import StateBackend
|
||||||
|
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
|
||||||
|
|
||||||
|
gp_warmup_spec = { # type: ignore[var-annotated]
|
||||||
|
**GENERAL_PURPOSE_SUBAGENT,
|
||||||
|
"model": stub_llm,
|
||||||
|
"tools": [_warmup_tool_a],
|
||||||
|
"middleware": [TodoListMiddleware()],
|
||||||
|
}
|
||||||
|
warmup_middleware.append(
|
||||||
|
SubAgentMiddleware(backend=StateBackend, subagents=[gp_warmup_spec])
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
# Deepagents missing/incompatible — middleware-only warmup
|
||||||
|
# still produces a useful (smaller) speedup.
|
||||||
|
logger.debug("[startup] SubAgentMiddleware warmup skipped", exc_info=True)
|
||||||
|
|
||||||
|
compiled = create_agent(
|
||||||
|
stub_llm,
|
||||||
|
tools=[_warmup_tool_a, _warmup_tool_b],
|
||||||
|
system_prompt="You are a warmup stub.",
|
||||||
|
middleware=warmup_middleware,
|
||||||
|
context_schema=SurfSenseContextSchema,
|
||||||
|
checkpointer=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Touch the compiled graph's stream_channels / nodes so any
|
||||||
|
# remaining lazy schema work fires now instead of on first
|
||||||
|
# real invocation.
|
||||||
|
_ = list(getattr(compiled, "nodes", {}).keys())
|
||||||
|
|
||||||
|
del compiled
|
||||||
|
logger.info(
|
||||||
|
"[startup] Agent JIT warmup completed in %.3fs",
|
||||||
|
_time.perf_counter() - t0,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"[startup] Agent JIT warmup failed in %.3fs (non-fatal — first "
|
||||||
|
"real request will pay the full compile cost)",
|
||||||
|
_time.perf_counter() - t0,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
# Tune GC: lower gen-2 threshold so long-lived garbage is collected
|
# Tune GC: lower gen-2 threshold so long-lived garbage is collected
|
||||||
|
|
@ -432,6 +562,7 @@ async def lifespan(app: FastAPI):
|
||||||
await setup_checkpointer_tables()
|
await setup_checkpointer_tables()
|
||||||
initialize_openrouter_integration()
|
initialize_openrouter_integration()
|
||||||
_start_openrouter_background_refresh()
|
_start_openrouter_background_refresh()
|
||||||
|
initialize_pricing_registration()
|
||||||
initialize_llm_router()
|
initialize_llm_router()
|
||||||
initialize_image_gen_router()
|
initialize_image_gen_router()
|
||||||
initialize_vision_llm_router()
|
initialize_vision_llm_router()
|
||||||
|
|
@ -443,6 +574,18 @@ async def lifespan(app: FastAPI):
|
||||||
"Docs will be indexed on the next restart."
|
"Docs will be indexed on the next restart."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Phase 1.7 — JIT warmup. Bounded so a stuck warmup never delays
|
||||||
|
# worker readiness. ``shield`` so Uvicorn cancelling startup
|
||||||
|
# doesn't leave half-warmed Pydantic schemas in an inconsistent
|
||||||
|
# state.
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(asyncio.shield(_warm_agent_jit_caches()), timeout=20)
|
||||||
|
except (TimeoutError, Exception): # pragma: no cover - defensive
|
||||||
|
logging.getLogger(__name__).warning(
|
||||||
|
"[startup] Agent JIT warmup hit timeout/error — skipping; "
|
||||||
|
"first real request will pay the full compile cost."
|
||||||
|
)
|
||||||
|
|
||||||
log_system_snapshot("startup_complete")
|
log_system_snapshot("startup_complete")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
@ -452,6 +595,23 @@ async def lifespan(app: FastAPI):
|
||||||
|
|
||||||
|
|
||||||
def registration_allowed():
|
def registration_allowed():
|
||||||
|
"""Master auth kill switch keyed on the REGISTRATION_ENABLED env var.
|
||||||
|
|
||||||
|
Despite the name, this dependency does NOT only gate registration. When
|
||||||
|
REGISTRATION_ENABLED is FALSE it intentionally blocks every auth surface
|
||||||
|
that could mint or refresh a session for an attacker:
|
||||||
|
|
||||||
|
* email/password ``POST /auth/register``
|
||||||
|
* email/password ``POST /auth/jwt/login``
|
||||||
|
* the Google OAuth router (``/auth/google/authorize`` and the shared
|
||||||
|
``/auth/google/callback`` handles both new signups and login for
|
||||||
|
existing users, so flipping this off locks both)
|
||||||
|
* the bespoke ``/auth/google/authorize-redirect`` helper used by the UI
|
||||||
|
|
||||||
|
Use it as a temporary "freeze all new sessions" lever during incident
|
||||||
|
response. It is not a way to disable signup while keeping login working;
|
||||||
|
for that, override ``UserManager.oauth_callback`` instead.
|
||||||
|
"""
|
||||||
if not config.REGISTRATION_ENABLED:
|
if not config.REGISTRATION_ENABLED:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN, detail="Registration is disabled"
|
status_code=status.HTTP_403_FORBIDDEN, detail="Registration is disabled"
|
||||||
|
|
@ -596,32 +756,45 @@ app.add_middleware(
|
||||||
allow_headers=["*"], # Allows all headers
|
allow_headers=["*"], # Allows all headers
|
||||||
)
|
)
|
||||||
|
|
||||||
app.include_router(
|
# Password / email-based auth routers are only mounted when not running in
|
||||||
|
# Google-OAuth-only mode. Mounting them in OAuth-only prod previously left
|
||||||
|
# POST /auth/register reachable, which is the bypass that allowed bots to
|
||||||
|
# create non-OAuth users in spite of AUTH_TYPE=GOOGLE.
|
||||||
|
if config.AUTH_TYPE != "GOOGLE":
|
||||||
|
app.include_router(
|
||||||
fastapi_users.get_auth_router(auth_backend),
|
fastapi_users.get_auth_router(auth_backend),
|
||||||
prefix="/auth/jwt",
|
prefix="/auth/jwt",
|
||||||
tags=["auth"],
|
tags=["auth"],
|
||||||
dependencies=[Depends(rate_limit_login)],
|
dependencies=[
|
||||||
)
|
Depends(rate_limit_login),
|
||||||
app.include_router(
|
Depends(
|
||||||
|
registration_allowed
|
||||||
|
), # honour REGISTRATION_ENABLED kill switch on login too
|
||||||
|
],
|
||||||
|
)
|
||||||
|
app.include_router(
|
||||||
fastapi_users.get_register_router(UserRead, UserCreate),
|
fastapi_users.get_register_router(UserRead, UserCreate),
|
||||||
prefix="/auth",
|
prefix="/auth",
|
||||||
tags=["auth"],
|
tags=["auth"],
|
||||||
dependencies=[
|
dependencies=[
|
||||||
Depends(rate_limit_register),
|
Depends(rate_limit_register),
|
||||||
Depends(registration_allowed), # blocks registration when disabled
|
Depends(registration_allowed),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
app.include_router(
|
app.include_router(
|
||||||
fastapi_users.get_reset_password_router(),
|
fastapi_users.get_reset_password_router(),
|
||||||
prefix="/auth",
|
prefix="/auth",
|
||||||
tags=["auth"],
|
tags=["auth"],
|
||||||
dependencies=[Depends(rate_limit_password_reset)],
|
dependencies=[Depends(rate_limit_password_reset)],
|
||||||
)
|
)
|
||||||
app.include_router(
|
app.include_router(
|
||||||
fastapi_users.get_verify_router(UserRead),
|
fastapi_users.get_verify_router(UserRead),
|
||||||
prefix="/auth",
|
prefix="/auth",
|
||||||
tags=["auth"],
|
tags=["auth"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# /users/me (read/update profile) is needed in every auth mode, so it stays
|
||||||
|
# mounted unconditionally.
|
||||||
app.include_router(
|
app.include_router(
|
||||||
fastapi_users.get_users_router(UserRead, UserUpdate),
|
fastapi_users.get_users_router(UserRead, UserUpdate),
|
||||||
prefix="/users",
|
prefix="/users",
|
||||||
|
|
@ -679,16 +852,25 @@ if config.AUTH_TYPE == "GOOGLE":
|
||||||
),
|
),
|
||||||
prefix="/auth/google",
|
prefix="/auth/google",
|
||||||
tags=["auth"],
|
tags=["auth"],
|
||||||
dependencies=[
|
# REGISTRATION_ENABLED is a master auth kill switch: when set to FALSE
|
||||||
Depends(registration_allowed)
|
# it blocks BOTH new OAuth signups AND login of existing OAuth users
|
||||||
], # blocks OAuth registration when disabled
|
# (the fastapi-users OAuth router shares one callback for create+login,
|
||||||
|
# so this dependency closes both paths together).
|
||||||
|
dependencies=[Depends(registration_allowed)],
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add a redirect-based authorize endpoint for Firefox/Safari compatibility
|
# Add a redirect-based authorize endpoint for Firefox/Safari compatibility
|
||||||
# This endpoint performs a server-side redirect instead of returning JSON
|
# This endpoint performs a server-side redirect instead of returning JSON
|
||||||
# which fixes cross-site cookie issues where browsers don't send cookies
|
# which fixes cross-site cookie issues where browsers don't send cookies
|
||||||
# set via cross-origin fetch requests on subsequent redirects
|
# set via cross-origin fetch requests on subsequent redirects.
|
||||||
@app.get("/auth/google/authorize-redirect", tags=["auth"])
|
# The registration_allowed dependency mirrors the OAuth router above so
|
||||||
|
# the kill switch fails fast here instead of bouncing users to Google
|
||||||
|
# only to 403 on the callback.
|
||||||
|
@app.get(
|
||||||
|
"/auth/google/authorize-redirect",
|
||||||
|
tags=["auth"],
|
||||||
|
dependencies=[Depends(registration_allowed)],
|
||||||
|
)
|
||||||
async def google_authorize_redirect(
|
async def google_authorize_redirect(
|
||||||
request: Request,
|
request: Request,
|
||||||
):
|
):
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,12 @@ def init_worker(**kwargs):
|
||||||
initialize_image_gen_router,
|
initialize_image_gen_router,
|
||||||
initialize_llm_router,
|
initialize_llm_router,
|
||||||
initialize_openrouter_integration,
|
initialize_openrouter_integration,
|
||||||
|
initialize_pricing_registration,
|
||||||
initialize_vision_llm_router,
|
initialize_vision_llm_router,
|
||||||
)
|
)
|
||||||
|
|
||||||
initialize_openrouter_integration()
|
initialize_openrouter_integration()
|
||||||
|
initialize_pricing_registration()
|
||||||
initialize_llm_router()
|
initialize_llm_router()
|
||||||
initialize_image_gen_router()
|
initialize_image_gen_router()
|
||||||
initialize_vision_llm_router()
|
initialize_vision_llm_router()
|
||||||
|
|
|
||||||
|
|
@ -47,11 +47,37 @@ def load_global_llm_configs():
|
||||||
data = yaml.safe_load(f)
|
data = yaml.safe_load(f)
|
||||||
configs = data.get("global_llm_configs", [])
|
configs = data.get("global_llm_configs", [])
|
||||||
|
|
||||||
|
# Lazy import keeps the `app.config` -> `app.services` edge one-way
|
||||||
|
# and matches the `provider_api_base` pattern used elsewhere.
|
||||||
|
from app.services.provider_capabilities import derive_supports_image_input
|
||||||
|
|
||||||
seen_slugs: dict[str, int] = {}
|
seen_slugs: dict[str, int] = {}
|
||||||
for cfg in configs:
|
for cfg in configs:
|
||||||
cfg.setdefault("billing_tier", "free")
|
cfg.setdefault("billing_tier", "free")
|
||||||
cfg.setdefault("anonymous_enabled", False)
|
cfg.setdefault("anonymous_enabled", False)
|
||||||
cfg.setdefault("seo_enabled", False)
|
cfg.setdefault("seo_enabled", False)
|
||||||
|
# Capability flag: explicit YAML override always wins. When the
|
||||||
|
# operator has not annotated the model, defer to LiteLLM's
|
||||||
|
# authoritative model map (`supports_vision`) which already
|
||||||
|
# knows GPT-5.x / GPT-4o / Claude 3.x / Gemini 2.x are
|
||||||
|
# vision-capable. Unknown / unmapped models default-allow so
|
||||||
|
# we don't lock the user out of a freshly added third-party
|
||||||
|
# entry; the streaming-task safety net (driven by
|
||||||
|
# `is_known_text_only_chat_model`) is the only place a False
|
||||||
|
# actually blocks a request.
|
||||||
|
if "supports_image_input" not in cfg:
|
||||||
|
litellm_params = cfg.get("litellm_params") or {}
|
||||||
|
base_model = (
|
||||||
|
litellm_params.get("base_model")
|
||||||
|
if isinstance(litellm_params, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
cfg["supports_image_input"] = derive_supports_image_input(
|
||||||
|
provider=cfg.get("provider"),
|
||||||
|
model_name=cfg.get("model_name"),
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=cfg.get("custom_provider"),
|
||||||
|
)
|
||||||
|
|
||||||
if cfg.get("seo_enabled") and cfg.get("seo_slug"):
|
if cfg.get("seo_enabled") and cfg.get("seo_slug"):
|
||||||
slug = cfg["seo_slug"]
|
slug = cfg["seo_slug"]
|
||||||
|
|
@ -138,7 +164,11 @@ def load_global_image_gen_configs():
|
||||||
try:
|
try:
|
||||||
with open(global_config_file, encoding="utf-8") as f:
|
with open(global_config_file, encoding="utf-8") as f:
|
||||||
data = yaml.safe_load(f)
|
data = yaml.safe_load(f)
|
||||||
return data.get("global_image_generation_configs", [])
|
configs = data.get("global_image_generation_configs", []) or []
|
||||||
|
for cfg in configs:
|
||||||
|
if isinstance(cfg, dict):
|
||||||
|
cfg.setdefault("billing_tier", "free")
|
||||||
|
return configs
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to load global image generation configs: {e}")
|
print(f"Warning: Failed to load global image generation configs: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
@ -153,7 +183,11 @@ def load_global_vision_llm_configs():
|
||||||
try:
|
try:
|
||||||
with open(global_config_file, encoding="utf-8") as f:
|
with open(global_config_file, encoding="utf-8") as f:
|
||||||
data = yaml.safe_load(f)
|
data = yaml.safe_load(f)
|
||||||
return data.get("global_vision_llm_configs", [])
|
configs = data.get("global_vision_llm_configs", []) or []
|
||||||
|
for cfg in configs:
|
||||||
|
if isinstance(cfg, dict):
|
||||||
|
cfg.setdefault("billing_tier", "free")
|
||||||
|
return configs
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to load global vision LLM configs: {e}")
|
print(f"Warning: Failed to load global vision LLM configs: {e}")
|
||||||
return []
|
return []
|
||||||
|
|
@ -254,6 +288,15 @@ def load_openrouter_integration_settings() -> dict | None:
|
||||||
"anonymous_enabled_free", settings["anonymous_enabled"]
|
"anonymous_enabled_free", settings["anonymous_enabled"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Image generation + vision LLM emission are opt-in (issue L).
|
||||||
|
# OpenRouter's catalogue contains hundreds of image / vision
|
||||||
|
# capable models; auto-injecting all of them into every
|
||||||
|
# deployment would explode the model selector and surprise
|
||||||
|
# operators upgrading from prior versions. Default to False so
|
||||||
|
# admins must explicitly turn them on.
|
||||||
|
settings.setdefault("image_generation_enabled", False)
|
||||||
|
settings.setdefault("vision_enabled", False)
|
||||||
|
|
||||||
return settings
|
return settings
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to load OpenRouter integration settings: {e}")
|
print(f"Warning: Failed to load OpenRouter integration settings: {e}")
|
||||||
|
|
@ -296,10 +339,60 @@ def initialize_openrouter_integration():
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
print("Info: OpenRouter integration enabled but no models fetched")
|
print("Info: OpenRouter integration enabled but no models fetched")
|
||||||
|
|
||||||
|
# Image generation + vision LLM emissions are opt-in (issue L).
|
||||||
|
# Both reuse the catalogue already cached by ``service.initialize``
|
||||||
|
# so we don't make additional network calls here.
|
||||||
|
if settings.get("image_generation_enabled"):
|
||||||
|
try:
|
||||||
|
image_configs = service.get_image_generation_configs()
|
||||||
|
if image_configs:
|
||||||
|
config.GLOBAL_IMAGE_GEN_CONFIGS.extend(image_configs)
|
||||||
|
print(
|
||||||
|
f"Info: OpenRouter integration added {len(image_configs)} "
|
||||||
|
f"image-generation models"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to inject OpenRouter image-gen configs: {e}")
|
||||||
|
|
||||||
|
if settings.get("vision_enabled"):
|
||||||
|
try:
|
||||||
|
vision_configs = service.get_vision_llm_configs()
|
||||||
|
if vision_configs:
|
||||||
|
config.GLOBAL_VISION_LLM_CONFIGS.extend(vision_configs)
|
||||||
|
print(
|
||||||
|
f"Info: OpenRouter integration added {len(vision_configs)} "
|
||||||
|
f"vision LLM models"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to inject OpenRouter vision-LLM configs: {e}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Warning: Failed to initialize OpenRouter integration: {e}")
|
print(f"Warning: Failed to initialize OpenRouter integration: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_pricing_registration():
|
||||||
|
"""
|
||||||
|
Teach LiteLLM the per-token cost of every deployment in
|
||||||
|
``config.GLOBAL_LLM_CONFIGS`` (OpenRouter dynamic models pulled
|
||||||
|
from the OpenRouter catalogue + any operator-declared YAML pricing).
|
||||||
|
|
||||||
|
Must run AFTER ``initialize_openrouter_integration()`` so the
|
||||||
|
OpenRouter catalogue is populated and BEFORE the first LLM call so
|
||||||
|
``response_cost`` is available in ``TokenTrackingCallback``.
|
||||||
|
|
||||||
|
Failures are logged but never raised — startup must not be blocked
|
||||||
|
by a missing pricing entry; the worst-case is the model debits 0.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from app.services.pricing_registration import (
|
||||||
|
register_pricing_from_global_configs,
|
||||||
|
)
|
||||||
|
|
||||||
|
register_pricing_from_global_configs()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Warning: Failed to register LiteLLM pricing: {e}")
|
||||||
|
|
||||||
|
|
||||||
def initialize_llm_router():
|
def initialize_llm_router():
|
||||||
"""
|
"""
|
||||||
Initialize the LLM Router service for Auto mode.
|
Initialize the LLM Router service for Auto mode.
|
||||||
|
|
@ -444,14 +537,54 @@ class Config:
|
||||||
os.getenv("STRIPE_RECONCILIATION_BATCH_SIZE", "100")
|
os.getenv("STRIPE_RECONCILIATION_BATCH_SIZE", "100")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Premium token quota settings
|
# Premium credit (micro-USD) quota settings.
|
||||||
PREMIUM_TOKEN_LIMIT = int(os.getenv("PREMIUM_TOKEN_LIMIT", "3000000"))
|
#
|
||||||
|
# Storage unit is integer micro-USD (1_000_000 = $1.00). The legacy
|
||||||
|
# ``PREMIUM_TOKEN_LIMIT`` and ``STRIPE_TOKENS_PER_UNIT`` env vars are
|
||||||
|
# still honoured for one release as fall-back values — the prior
|
||||||
|
# $1-per-1M-tokens Stripe price means every existing value maps 1:1
|
||||||
|
# to micros, so operators upgrading without changing their .env still
|
||||||
|
# get correct behaviour. A startup deprecation warning fires below if
|
||||||
|
# they're set.
|
||||||
|
PREMIUM_CREDIT_MICROS_LIMIT = int(
|
||||||
|
os.getenv("PREMIUM_CREDIT_MICROS_LIMIT")
|
||||||
|
or os.getenv("PREMIUM_TOKEN_LIMIT", "5000000")
|
||||||
|
)
|
||||||
STRIPE_PREMIUM_TOKEN_PRICE_ID = os.getenv("STRIPE_PREMIUM_TOKEN_PRICE_ID")
|
STRIPE_PREMIUM_TOKEN_PRICE_ID = os.getenv("STRIPE_PREMIUM_TOKEN_PRICE_ID")
|
||||||
STRIPE_TOKENS_PER_UNIT = int(os.getenv("STRIPE_TOKENS_PER_UNIT", "1000000"))
|
STRIPE_CREDIT_MICROS_PER_UNIT = int(
|
||||||
|
os.getenv("STRIPE_CREDIT_MICROS_PER_UNIT")
|
||||||
|
or os.getenv("STRIPE_TOKENS_PER_UNIT", "1000000")
|
||||||
|
)
|
||||||
STRIPE_TOKEN_BUYING_ENABLED = (
|
STRIPE_TOKEN_BUYING_ENABLED = (
|
||||||
os.getenv("STRIPE_TOKEN_BUYING_ENABLED", "FALSE").upper() == "TRUE"
|
os.getenv("STRIPE_TOKEN_BUYING_ENABLED", "FALSE").upper() == "TRUE"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Safety ceiling on the per-call premium reservation. ``stream_new_chat``
|
||||||
|
# estimates an upper-bound cost from ``litellm.get_model_info`` x the
|
||||||
|
# config's ``quota_reserve_tokens`` and clamps the result to this value
|
||||||
|
# so a misconfigured "$1000/M" model can't lock the user's whole balance
|
||||||
|
# on one call. Default $1.00 covers realistic worst-cases (Opus + 4K
|
||||||
|
# reserve_tokens ≈ $0.36) with headroom.
|
||||||
|
QUOTA_MAX_RESERVE_MICROS = int(os.getenv("QUOTA_MAX_RESERVE_MICROS", "1000000"))
|
||||||
|
|
||||||
|
if os.getenv("PREMIUM_TOKEN_LIMIT") and not os.getenv(
|
||||||
|
"PREMIUM_CREDIT_MICROS_LIMIT"
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"Warning: PREMIUM_TOKEN_LIMIT is deprecated; rename to "
|
||||||
|
"PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping under the "
|
||||||
|
"current Stripe price). The old key will be removed in a "
|
||||||
|
"future release."
|
||||||
|
)
|
||||||
|
if os.getenv("STRIPE_TOKENS_PER_UNIT") and not os.getenv(
|
||||||
|
"STRIPE_CREDIT_MICROS_PER_UNIT"
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"Warning: STRIPE_TOKENS_PER_UNIT is deprecated; rename to "
|
||||||
|
"STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping). "
|
||||||
|
"The old key will be removed in a future release."
|
||||||
|
)
|
||||||
|
|
||||||
# Anonymous / no-login mode settings
|
# Anonymous / no-login mode settings
|
||||||
NOLOGIN_MODE_ENABLED = os.getenv("NOLOGIN_MODE_ENABLED", "FALSE").upper() == "TRUE"
|
NOLOGIN_MODE_ENABLED = os.getenv("NOLOGIN_MODE_ENABLED", "FALSE").upper() == "TRUE"
|
||||||
ANON_TOKEN_LIMIT = int(os.getenv("ANON_TOKEN_LIMIT", "500000"))
|
ANON_TOKEN_LIMIT = int(os.getenv("ANON_TOKEN_LIMIT", "500000"))
|
||||||
|
|
@ -464,6 +597,35 @@ class Config:
|
||||||
# Default quota reserve tokens when not specified per-model
|
# Default quota reserve tokens when not specified per-model
|
||||||
QUOTA_MAX_RESERVE_PER_CALL = int(os.getenv("QUOTA_MAX_RESERVE_PER_CALL", "8000"))
|
QUOTA_MAX_RESERVE_PER_CALL = int(os.getenv("QUOTA_MAX_RESERVE_PER_CALL", "8000"))
|
||||||
|
|
||||||
|
# Per-image reservation (in micro-USD) used by ``billable_call`` for the
|
||||||
|
# ``POST /image-generations`` endpoint when the global config does not
|
||||||
|
# override it. $0.05 covers realistic worst-cases for current OpenAI /
|
||||||
|
# OpenRouter image-gen pricing. Bypassed entirely for free configs.
|
||||||
|
QUOTA_DEFAULT_IMAGE_RESERVE_MICROS = int(
|
||||||
|
os.getenv("QUOTA_DEFAULT_IMAGE_RESERVE_MICROS", "50000")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Per-podcast reservation (in micro-USD). One agent LLM call generating
|
||||||
|
# a transcript, typically 5k-20k completion tokens. $0.20 covers a long
|
||||||
|
# premium-model run. Tune via env.
|
||||||
|
QUOTA_DEFAULT_PODCAST_RESERVE_MICROS = int(
|
||||||
|
os.getenv("QUOTA_DEFAULT_PODCAST_RESERVE_MICROS", "200000")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Per-video-presentation reservation (in micro-USD). Fan-out of N
|
||||||
|
# slide-scene generations (up to ``VIDEO_PRESENTATION_MAX_SLIDES=30``)
|
||||||
|
# plus refine retries; can produce many premium completions. $1.00
|
||||||
|
# covers worst-case. Tune via env.
|
||||||
|
#
|
||||||
|
# NOTE: this equals the existing ``QUOTA_MAX_RESERVE_MICROS`` default of
|
||||||
|
# 1_000_000. The override path in ``billable_call`` bypasses the
|
||||||
|
# per-call clamp in ``estimate_call_reserve_micros``, so this is the
|
||||||
|
# *actual* hold — raising it via env is fine but means a single video
|
||||||
|
# task can lock $1+ of credit.
|
||||||
|
QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS = int(
|
||||||
|
os.getenv("QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS", "1000000")
|
||||||
|
)
|
||||||
|
|
||||||
# Abuse prevention: concurrent stream cap and CAPTCHA
|
# Abuse prevention: concurrent stream cap and CAPTCHA
|
||||||
ANON_MAX_CONCURRENT_STREAMS = int(os.getenv("ANON_MAX_CONCURRENT_STREAMS", "2"))
|
ANON_MAX_CONCURRENT_STREAMS = int(os.getenv("ANON_MAX_CONCURRENT_STREAMS", "2"))
|
||||||
ANON_CAPTCHA_REQUEST_THRESHOLD = int(
|
ANON_CAPTCHA_REQUEST_THRESHOLD = int(
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,24 @@
|
||||||
# Structure matches NewLLMConfig:
|
# Structure matches NewLLMConfig:
|
||||||
# - Model configuration (provider, model_name, api_key, etc.)
|
# - Model configuration (provider, model_name, api_key, etc.)
|
||||||
# - Prompt configuration (system_instructions, citations_enabled)
|
# - Prompt configuration (system_instructions, citations_enabled)
|
||||||
|
#
|
||||||
|
# COST-BASED PREMIUM CREDITS:
|
||||||
|
# Each premium config bills the user's USD-credit balance based on the
|
||||||
|
# actual provider cost reported by LiteLLM. For models LiteLLM already
|
||||||
|
# knows (most OpenAI/Anthropic/etc. names) you don't need to do anything.
|
||||||
|
# For custom Azure deployment names (e.g. an in-house "gpt-5.4" deployment)
|
||||||
|
# or any model LiteLLM doesn't have in its built-in pricing table, declare
|
||||||
|
# per-token costs inline so they bill correctly:
|
||||||
|
#
|
||||||
|
# litellm_params:
|
||||||
|
# base_model: "my-custom-azure-deploy"
|
||||||
|
# # USD per token; e.g. 0.000003 == $3.00 per million input tokens
|
||||||
|
# input_cost_per_token: 0.000003
|
||||||
|
# output_cost_per_token: 0.000015
|
||||||
|
#
|
||||||
|
# OpenRouter dynamic models pull pricing automatically from OpenRouter's
|
||||||
|
# API — no inline declaration needed. Models without resolvable pricing
|
||||||
|
# debit $0 from the user's balance and log a WARNING.
|
||||||
|
|
||||||
# Router Settings for Auto Mode
|
# Router Settings for Auto Mode
|
||||||
# These settings control how the LiteLLM Router distributes requests across models
|
# These settings control how the LiteLLM Router distributes requests across models
|
||||||
|
|
@ -292,6 +310,17 @@ openrouter_integration:
|
||||||
free_rpm: 20
|
free_rpm: 20
|
||||||
free_tpm: 100000
|
free_tpm: 100000
|
||||||
|
|
||||||
|
# Image generation + vision LLM emission are OPT-IN. OpenRouter's catalogue
|
||||||
|
# contains hundreds of image- and vision-capable models; turning these on
|
||||||
|
# injects them into the global Image-Generation / Vision-LLM model
|
||||||
|
# selectors alongside any static configs. Tier (free/premium) is derived
|
||||||
|
# per model the same way it is for chat (`:free` suffix or zero pricing).
|
||||||
|
# When a user picks a premium image/vision model the call debits the
|
||||||
|
# shared $5 USD-cost-based premium credit pool — so leaving these off
|
||||||
|
# avoids surprise quota burn on existing deployments. Default: false.
|
||||||
|
image_generation_enabled: false
|
||||||
|
vision_enabled: false
|
||||||
|
|
||||||
litellm_params:
|
litellm_params:
|
||||||
max_tokens: 16384
|
max_tokens: 16384
|
||||||
system_instructions: ""
|
system_instructions: ""
|
||||||
|
|
|
||||||
|
|
@ -731,6 +731,7 @@ class TokenUsage(BaseModel, TimestampMixin):
|
||||||
prompt_tokens = Column(Integer, nullable=False, default=0)
|
prompt_tokens = Column(Integer, nullable=False, default=0)
|
||||||
completion_tokens = Column(Integer, nullable=False, default=0)
|
completion_tokens = Column(Integer, nullable=False, default=0)
|
||||||
total_tokens = Column(Integer, nullable=False, default=0)
|
total_tokens = Column(Integer, nullable=False, default=0)
|
||||||
|
cost_micros = Column(BigInteger, nullable=False, default=0, server_default="0")
|
||||||
model_breakdown = Column(JSONB, nullable=True)
|
model_breakdown = Column(JSONB, nullable=True)
|
||||||
call_details = Column(JSONB, nullable=True)
|
call_details = Column(JSONB, nullable=True)
|
||||||
|
|
||||||
|
|
@ -1793,7 +1794,15 @@ class PagePurchase(Base, TimestampMixin):
|
||||||
|
|
||||||
|
|
||||||
class PremiumTokenPurchase(Base, TimestampMixin):
|
class PremiumTokenPurchase(Base, TimestampMixin):
|
||||||
"""Tracks Stripe checkout sessions used to grant additional premium token credits."""
|
"""Tracks Stripe checkout sessions used to grant additional premium credit (USD micro-units).
|
||||||
|
|
||||||
|
Note: the table name is preserved (``premium_token_purchases``) for
|
||||||
|
operational continuity even though the unit is now USD micro-credits
|
||||||
|
instead of raw tokens. The ``credit_micros_granted`` column replaced
|
||||||
|
the legacy ``tokens_granted`` in migration 140; the stored values
|
||||||
|
were not transformed because the prior $1 = 1M tokens Stripe price
|
||||||
|
makes the unit conversion 1:1 numerically.
|
||||||
|
"""
|
||||||
|
|
||||||
__tablename__ = "premium_token_purchases"
|
__tablename__ = "premium_token_purchases"
|
||||||
__allow_unmapped__ = True
|
__allow_unmapped__ = True
|
||||||
|
|
@ -1810,7 +1819,7 @@ class PremiumTokenPurchase(Base, TimestampMixin):
|
||||||
)
|
)
|
||||||
stripe_payment_intent_id = Column(String(255), nullable=True, index=True)
|
stripe_payment_intent_id = Column(String(255), nullable=True, index=True)
|
||||||
quantity = Column(Integer, nullable=False)
|
quantity = Column(Integer, nullable=False)
|
||||||
tokens_granted = Column(BigInteger, nullable=False)
|
credit_micros_granted = Column(BigInteger, nullable=False)
|
||||||
amount_total = Column(Integer, nullable=True)
|
amount_total = Column(Integer, nullable=True)
|
||||||
currency = Column(String(10), nullable=True)
|
currency = Column(String(10), nullable=True)
|
||||||
status = Column(
|
status = Column(
|
||||||
|
|
@ -2109,16 +2118,16 @@ if config.AUTH_TYPE == "GOOGLE":
|
||||||
)
|
)
|
||||||
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
||||||
|
|
||||||
premium_tokens_limit = Column(
|
premium_credit_micros_limit = Column(
|
||||||
BigInteger,
|
BigInteger,
|
||||||
nullable=False,
|
nullable=False,
|
||||||
default=config.PREMIUM_TOKEN_LIMIT,
|
default=config.PREMIUM_CREDIT_MICROS_LIMIT,
|
||||||
server_default=str(config.PREMIUM_TOKEN_LIMIT),
|
server_default=str(config.PREMIUM_CREDIT_MICROS_LIMIT),
|
||||||
)
|
)
|
||||||
premium_tokens_used = Column(
|
premium_credit_micros_used = Column(
|
||||||
BigInteger, nullable=False, default=0, server_default="0"
|
BigInteger, nullable=False, default=0, server_default="0"
|
||||||
)
|
)
|
||||||
premium_tokens_reserved = Column(
|
premium_credit_micros_reserved = Column(
|
||||||
BigInteger, nullable=False, default=0, server_default="0"
|
BigInteger, nullable=False, default=0, server_default="0"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -2241,16 +2250,16 @@ else:
|
||||||
)
|
)
|
||||||
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
pages_used = Column(Integer, nullable=False, default=0, server_default="0")
|
||||||
|
|
||||||
premium_tokens_limit = Column(
|
premium_credit_micros_limit = Column(
|
||||||
BigInteger,
|
BigInteger,
|
||||||
nullable=False,
|
nullable=False,
|
||||||
default=config.PREMIUM_TOKEN_LIMIT,
|
default=config.PREMIUM_CREDIT_MICROS_LIMIT,
|
||||||
server_default=str(config.PREMIUM_TOKEN_LIMIT),
|
server_default=str(config.PREMIUM_CREDIT_MICROS_LIMIT),
|
||||||
)
|
)
|
||||||
premium_tokens_used = Column(
|
premium_credit_micros_used = Column(
|
||||||
BigInteger, nullable=False, default=0, server_default="0"
|
BigInteger, nullable=False, default=0, server_default="0"
|
||||||
)
|
)
|
||||||
premium_tokens_reserved = Column(
|
premium_credit_micros_reserved = Column(
|
||||||
BigInteger, nullable=False, default=0, server_default="0"
|
BigInteger, nullable=False, default=0, server_default="0"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,20 @@ class EtlPipelineService:
|
||||||
etl_service="VISION_LLM",
|
etl_service="VISION_LLM",
|
||||||
content_type="image",
|
content_type="image",
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception as exc:
|
||||||
|
# Special-case quota exhaustion so we log a clearer message
|
||||||
|
# — the vision LLM didn't "fail", the user just ran out of
|
||||||
|
# premium credit. Falling through to the document parser
|
||||||
|
# is a graceful degradation: OCR/Unstructured still
|
||||||
|
# extracts text from the image without burning credit.
|
||||||
|
from app.services.billable_calls import QuotaInsufficientError
|
||||||
|
|
||||||
|
if isinstance(exc, QuotaInsufficientError):
|
||||||
|
logging.info(
|
||||||
|
"Vision LLM quota exhausted for %s; falling back to document parser",
|
||||||
|
request.filename,
|
||||||
|
)
|
||||||
|
else:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Vision LLM failed for %s, falling back to document parser",
|
"Vision LLM failed for %s, falling back to document parser",
|
||||||
request.filename,
|
request.filename,
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ from fastapi import APIRouter, Depends
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
from app.agents.new_chat.feature_flags import AgentFeatureFlags, get_flags
|
||||||
|
from app.config import config
|
||||||
from app.db import User
|
from app.db import User
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
|
|
||||||
|
|
@ -58,10 +59,15 @@ class AgentFeatureFlagsRead(BaseModel):
|
||||||
|
|
||||||
enable_otel: bool
|
enable_otel: bool
|
||||||
|
|
||||||
|
enable_desktop_local_filesystem: bool
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_flags(cls, flags: AgentFeatureFlags) -> AgentFeatureFlagsRead:
|
def from_flags(cls, flags: AgentFeatureFlags) -> AgentFeatureFlagsRead:
|
||||||
# asdict() avoids missing-field bugs when AgentFeatureFlags grows.
|
# asdict() avoids missing-field bugs when AgentFeatureFlags grows.
|
||||||
return cls(**asdict(flags))
|
return cls(
|
||||||
|
**asdict(flags),
|
||||||
|
enable_desktop_local_filesystem=config.ENABLE_DESKTOP_LOCAL_FILESYSTEM,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@router.get("/agent/flags", response_model=AgentFeatureFlagsRead)
|
@router.get("/agent/flags", response_model=AgentFeatureFlagsRead)
|
||||||
|
|
|
||||||
|
|
@ -649,13 +649,9 @@ async def list_composio_drive_folders(
|
||||||
"""
|
"""
|
||||||
List folders AND files in user's Google Drive via Composio.
|
List folders AND files in user's Google Drive via Composio.
|
||||||
|
|
||||||
Uses the same GoogleDriveClient / list_folder_contents path as the native
|
Uses Composio's Google Drive tool execution path so managed OAuth tokens
|
||||||
connector, with Composio-sourced credentials. This means auth errors
|
do not need to be exposed through connected account state.
|
||||||
propagate identically (Google returns 401 → exception → auth_expired flag).
|
|
||||||
"""
|
"""
|
||||||
from app.connectors.google_drive import GoogleDriveClient, list_folder_contents
|
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
|
||||||
|
|
||||||
if not ComposioService.is_enabled():
|
if not ComposioService.is_enabled():
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=503,
|
status_code=503,
|
||||||
|
|
@ -689,10 +685,37 @@ async def list_composio_drive_folders(
|
||||||
detail="Composio connected account not found. Please reconnect the connector.",
|
detail="Composio connected account not found. Please reconnect the connector.",
|
||||||
)
|
)
|
||||||
|
|
||||||
credentials = build_composio_credentials(composio_connected_account_id)
|
service = ComposioService()
|
||||||
drive_client = GoogleDriveClient(session, connector_id, credentials=credentials)
|
entity_id = f"surfsense_{user.id}"
|
||||||
|
items = []
|
||||||
|
page_token = None
|
||||||
|
error = None
|
||||||
|
|
||||||
items, error = await list_folder_contents(drive_client, parent_id=parent_id)
|
while True:
|
||||||
|
page_items, next_token, page_error = await service.get_drive_files(
|
||||||
|
connected_account_id=composio_connected_account_id,
|
||||||
|
entity_id=entity_id,
|
||||||
|
folder_id=parent_id,
|
||||||
|
page_token=page_token,
|
||||||
|
page_size=100,
|
||||||
|
)
|
||||||
|
if page_error:
|
||||||
|
error = page_error
|
||||||
|
break
|
||||||
|
|
||||||
|
items.extend(page_items)
|
||||||
|
if not next_token:
|
||||||
|
break
|
||||||
|
page_token = next_token
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
item["isFolder"] = (
|
||||||
|
item.get("mimeType") == "application/vnd.google-apps.folder"
|
||||||
|
)
|
||||||
|
|
||||||
|
items.sort(
|
||||||
|
key=lambda item: (not item["isFolder"], item.get("name", "").lower())
|
||||||
|
)
|
||||||
|
|
||||||
if error:
|
if error:
|
||||||
error_lower = error.lower()
|
error_lower = error.lower()
|
||||||
|
|
|
||||||
|
|
@ -36,11 +36,17 @@ from app.schemas import (
|
||||||
ImageGenerationListRead,
|
ImageGenerationListRead,
|
||||||
ImageGenerationRead,
|
ImageGenerationRead,
|
||||||
)
|
)
|
||||||
|
from app.services.billable_calls import (
|
||||||
|
DEFAULT_IMAGE_RESERVE_MICROS,
|
||||||
|
QuotaInsufficientError,
|
||||||
|
billable_call,
|
||||||
|
)
|
||||||
from app.services.image_gen_router_service import (
|
from app.services.image_gen_router_service import (
|
||||||
IMAGE_GEN_AUTO_MODE_ID,
|
IMAGE_GEN_AUTO_MODE_ID,
|
||||||
ImageGenRouterService,
|
ImageGenRouterService,
|
||||||
is_image_gen_auto_mode,
|
is_image_gen_auto_mode,
|
||||||
)
|
)
|
||||||
|
from app.services.provider_api_base import resolve_api_base
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
from app.utils.rbac import check_permission
|
from app.utils.rbac import check_permission
|
||||||
from app.utils.signed_image_urls import verify_image_token
|
from app.utils.signed_image_urls import verify_image_token
|
||||||
|
|
@ -82,14 +88,62 @@ def _get_global_image_gen_config(config_id: int) -> dict | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_provider_prefix(provider: str, custom_provider: str | None) -> str:
|
||||||
|
"""Resolve the LiteLLM provider prefix used in model strings."""
|
||||||
|
if custom_provider:
|
||||||
|
return custom_provider
|
||||||
|
return _PROVIDER_MAP.get(provider.upper(), provider.lower())
|
||||||
|
|
||||||
|
|
||||||
def _build_model_string(
|
def _build_model_string(
|
||||||
provider: str, model_name: str, custom_provider: str | None
|
provider: str, model_name: str, custom_provider: str | None
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Build a litellm model string from provider + model_name."""
|
"""Build a litellm model string from provider + model_name."""
|
||||||
if custom_provider:
|
return f"{_resolve_provider_prefix(provider, custom_provider)}/{model_name}"
|
||||||
return f"{custom_provider}/{model_name}"
|
|
||||||
prefix = _PROVIDER_MAP.get(provider.upper(), provider.lower())
|
|
||||||
return f"{prefix}/{model_name}"
|
async def _resolve_billing_for_image_gen(
|
||||||
|
session: AsyncSession,
|
||||||
|
config_id: int | None,
|
||||||
|
search_space: SearchSpace,
|
||||||
|
) -> tuple[str, str, int]:
|
||||||
|
"""Resolve ``(billing_tier, base_model, reserve_micros)`` for a request.
|
||||||
|
|
||||||
|
The resolution mirrors ``_execute_image_generation``'s lookup tree but
|
||||||
|
only extracts the fields needed for billing — we do this *before*
|
||||||
|
``billable_call`` so the reservation is correctly sized for the
|
||||||
|
config that will actually run, and so we don't open an
|
||||||
|
``ImageGeneration`` row for a request that's about to 402.
|
||||||
|
|
||||||
|
User-owned (positive ID) BYOK configs are always free — they cost
|
||||||
|
the user nothing on our side. Auto mode currently treats as free
|
||||||
|
because the underlying router can dispatch to either premium or
|
||||||
|
free YAML configs and we don't surface the resolved deployment up
|
||||||
|
here yet. Bringing Auto under premium billing would require
|
||||||
|
threading the chosen deployment back from ``ImageGenRouterService``.
|
||||||
|
"""
|
||||||
|
resolved_id = config_id
|
||||||
|
if resolved_id is None:
|
||||||
|
resolved_id = search_space.image_generation_config_id or IMAGE_GEN_AUTO_MODE_ID
|
||||||
|
|
||||||
|
if is_image_gen_auto_mode(resolved_id):
|
||||||
|
return ("free", "auto", DEFAULT_IMAGE_RESERVE_MICROS)
|
||||||
|
|
||||||
|
if resolved_id < 0:
|
||||||
|
cfg = _get_global_image_gen_config(resolved_id) or {}
|
||||||
|
billing_tier = str(cfg.get("billing_tier", "free")).lower()
|
||||||
|
base_model = _build_model_string(
|
||||||
|
cfg.get("provider", ""),
|
||||||
|
cfg.get("model_name", ""),
|
||||||
|
cfg.get("custom_provider"),
|
||||||
|
)
|
||||||
|
reserve_micros = int(
|
||||||
|
cfg.get("quota_reserve_micros") or DEFAULT_IMAGE_RESERVE_MICROS
|
||||||
|
)
|
||||||
|
return (billing_tier, base_model, reserve_micros)
|
||||||
|
|
||||||
|
# Positive ID = user-owned BYOK image-gen config — always free.
|
||||||
|
return ("free", "user_byok", DEFAULT_IMAGE_RESERVE_MICROS)
|
||||||
|
|
||||||
|
|
||||||
async def _execute_image_generation(
|
async def _execute_image_generation(
|
||||||
|
|
@ -138,12 +192,18 @@ async def _execute_image_generation(
|
||||||
if not cfg:
|
if not cfg:
|
||||||
raise ValueError(f"Global image generation config {config_id} not found")
|
raise ValueError(f"Global image generation config {config_id} not found")
|
||||||
|
|
||||||
model_string = _build_model_string(
|
provider_prefix = _resolve_provider_prefix(
|
||||||
cfg.get("provider", ""), cfg["model_name"], cfg.get("custom_provider")
|
cfg.get("provider", ""), cfg.get("custom_provider")
|
||||||
)
|
)
|
||||||
|
model_string = f"{provider_prefix}/{cfg['model_name']}"
|
||||||
gen_kwargs["api_key"] = cfg.get("api_key")
|
gen_kwargs["api_key"] = cfg.get("api_key")
|
||||||
if cfg.get("api_base"):
|
api_base = resolve_api_base(
|
||||||
gen_kwargs["api_base"] = cfg["api_base"]
|
provider=cfg.get("provider"),
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=cfg.get("api_base"),
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
gen_kwargs["api_base"] = api_base
|
||||||
if cfg.get("api_version"):
|
if cfg.get("api_version"):
|
||||||
gen_kwargs["api_version"] = cfg["api_version"]
|
gen_kwargs["api_version"] = cfg["api_version"]
|
||||||
if cfg.get("litellm_params"):
|
if cfg.get("litellm_params"):
|
||||||
|
|
@ -165,12 +225,18 @@ async def _execute_image_generation(
|
||||||
if not db_cfg:
|
if not db_cfg:
|
||||||
raise ValueError(f"Image generation config {config_id} not found")
|
raise ValueError(f"Image generation config {config_id} not found")
|
||||||
|
|
||||||
model_string = _build_model_string(
|
provider_prefix = _resolve_provider_prefix(
|
||||||
db_cfg.provider.value, db_cfg.model_name, db_cfg.custom_provider
|
db_cfg.provider.value, db_cfg.custom_provider
|
||||||
)
|
)
|
||||||
|
model_string = f"{provider_prefix}/{db_cfg.model_name}"
|
||||||
gen_kwargs["api_key"] = db_cfg.api_key
|
gen_kwargs["api_key"] = db_cfg.api_key
|
||||||
if db_cfg.api_base:
|
api_base = resolve_api_base(
|
||||||
gen_kwargs["api_base"] = db_cfg.api_base
|
provider=db_cfg.provider.value,
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=db_cfg.api_base,
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
gen_kwargs["api_base"] = api_base
|
||||||
if db_cfg.api_version:
|
if db_cfg.api_version:
|
||||||
gen_kwargs["api_version"] = db_cfg.api_version
|
gen_kwargs["api_version"] = db_cfg.api_version
|
||||||
if db_cfg.litellm_params:
|
if db_cfg.litellm_params:
|
||||||
|
|
@ -225,10 +291,15 @@ async def get_global_image_gen_configs(
|
||||||
"litellm_params": {},
|
"litellm_params": {},
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
"is_auto_mode": True,
|
"is_auto_mode": True,
|
||||||
|
# Auto mode currently treated as free until per-deployment
|
||||||
|
# billing-tier surfacing lands (see _resolve_billing_for_image_gen).
|
||||||
|
"billing_tier": "free",
|
||||||
|
"is_premium": False,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
for cfg in global_configs:
|
for cfg in global_configs:
|
||||||
|
billing_tier = str(cfg.get("billing_tier", "free")).lower()
|
||||||
safe_configs.append(
|
safe_configs.append(
|
||||||
{
|
{
|
||||||
"id": cfg.get("id"),
|
"id": cfg.get("id"),
|
||||||
|
|
@ -241,6 +312,12 @@ async def get_global_image_gen_configs(
|
||||||
"api_version": cfg.get("api_version") or None,
|
"api_version": cfg.get("api_version") or None,
|
||||||
"litellm_params": cfg.get("litellm_params", {}),
|
"litellm_params": cfg.get("litellm_params", {}),
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
|
"billing_tier": billing_tier,
|
||||||
|
# Mirror chat (``new_llm_config_routes``) so the new-chat
|
||||||
|
# selector's premium badge logic keys off the same
|
||||||
|
# field across chat / image / vision tabs.
|
||||||
|
"is_premium": billing_tier == "premium",
|
||||||
|
"quota_reserve_micros": cfg.get("quota_reserve_micros"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -454,7 +531,26 @@ async def create_image_generation(
|
||||||
session: AsyncSession = Depends(get_async_session),
|
session: AsyncSession = Depends(get_async_session),
|
||||||
user: User = Depends(current_active_user),
|
user: User = Depends(current_active_user),
|
||||||
):
|
):
|
||||||
"""Create and execute an image generation request."""
|
"""Create and execute an image generation request.
|
||||||
|
|
||||||
|
Premium configs are gated by the user's shared premium credit pool.
|
||||||
|
The flow is:
|
||||||
|
|
||||||
|
1. Permission check + load the search space (cheap, no provider call).
|
||||||
|
2. Resolve which config will run so we know its billing tier and the
|
||||||
|
worst-case reservation size *before* opening any DB rows.
|
||||||
|
3. Wrap the entire ImageGeneration row insert + provider call in
|
||||||
|
``billable_call``. If quota is denied, ``billable_call`` raises
|
||||||
|
``QuotaInsufficientError`` *before* we flush a row, which we
|
||||||
|
translate to HTTP 402 (no orphaned rows on the user's account,
|
||||||
|
no inserted error rows for "you ran out of credit").
|
||||||
|
4. On success, the actual ``response_cost`` flows through the
|
||||||
|
LiteLLM callback into the accumulator, and ``billable_call``
|
||||||
|
finalizes the debit at exit. Inner ``try/except`` still catches
|
||||||
|
provider errors and stores them on ``error_message`` (HTTP 200
|
||||||
|
with ``error_message`` set is preserved for failed-but-not-quota
|
||||||
|
scenarios — clients already know how to surface those).
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
await check_permission(
|
await check_permission(
|
||||||
session,
|
session,
|
||||||
|
|
@ -471,6 +567,25 @@ async def create_image_generation(
|
||||||
if not search_space:
|
if not search_space:
|
||||||
raise HTTPException(status_code=404, detail="Search space not found")
|
raise HTTPException(status_code=404, detail="Search space not found")
|
||||||
|
|
||||||
|
billing_tier, base_model, reserve_micros = await _resolve_billing_for_image_gen(
|
||||||
|
session, data.image_generation_config_id, search_space
|
||||||
|
)
|
||||||
|
|
||||||
|
# billable_call runs OUTSIDE the inner try/except so QuotaInsufficientError
|
||||||
|
# propagates to the outer ``except QuotaInsufficientError`` handler
|
||||||
|
# below as HTTP 402 — it is intentionally NOT swallowed into
|
||||||
|
# ``error_message`` because that would (1) imply a successful row
|
||||||
|
# exists when none does, and (2) return HTTP 200 to a client
|
||||||
|
# whose request was actively *denied* (issue K).
|
||||||
|
async with billable_call(
|
||||||
|
user_id=search_space.user_id,
|
||||||
|
search_space_id=data.search_space_id,
|
||||||
|
billing_tier=billing_tier,
|
||||||
|
base_model=base_model,
|
||||||
|
quota_reserve_micros_override=reserve_micros,
|
||||||
|
usage_type="image_generation",
|
||||||
|
call_details={"model": base_model, "prompt": data.prompt[:100]},
|
||||||
|
):
|
||||||
db_image_gen = ImageGeneration(
|
db_image_gen = ImageGeneration(
|
||||||
prompt=data.prompt,
|
prompt=data.prompt,
|
||||||
model=data.model,
|
model=data.model,
|
||||||
|
|
@ -498,6 +613,24 @@ async def create_image_generation(
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
except QuotaInsufficientError as exc:
|
||||||
|
# The user's premium credit pool is empty. No DB row is created
|
||||||
|
# because ``billable_call`` denies before yielding (issue K).
|
||||||
|
await session.rollback()
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=402,
|
||||||
|
detail={
|
||||||
|
"error_code": "premium_quota_exhausted",
|
||||||
|
"usage_type": exc.usage_type,
|
||||||
|
"used_micros": exc.used_micros,
|
||||||
|
"limit_micros": exc.limit_micros,
|
||||||
|
"remaining_micros": exc.remaining_micros,
|
||||||
|
"message": (
|
||||||
|
"Out of premium credits for image generation. "
|
||||||
|
"Purchase additional credits or switch to a free model."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
) from exc
|
||||||
except SQLAlchemyError:
|
except SQLAlchemyError:
|
||||||
await session.rollback()
|
await session.rollback()
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
|
||||||
|
|
@ -1366,7 +1366,11 @@ async def append_message(
|
||||||
# flush assigns the PK/defaults without a round-trip SELECT
|
# flush assigns the PK/defaults without a round-trip SELECT
|
||||||
await session.flush()
|
await session.flush()
|
||||||
|
|
||||||
# Persist token usage if provided (for assistant messages)
|
# Persist token usage if provided (for assistant messages).
|
||||||
|
# ``cost_micros`` is the provider USD cost reported by LiteLLM,
|
||||||
|
# forwarded by the FE through the appendMessage round-trip so
|
||||||
|
# the historical TokenUsage row matches the credit debit applied
|
||||||
|
# at finalize time.
|
||||||
token_usage_data = raw_body.get("token_usage")
|
token_usage_data = raw_body.get("token_usage")
|
||||||
if token_usage_data and message_role == NewChatMessageRole.ASSISTANT:
|
if token_usage_data and message_role == NewChatMessageRole.ASSISTANT:
|
||||||
await record_token_usage(
|
await record_token_usage(
|
||||||
|
|
@ -1377,6 +1381,7 @@ async def append_message(
|
||||||
prompt_tokens=token_usage_data.get("prompt_tokens", 0),
|
prompt_tokens=token_usage_data.get("prompt_tokens", 0),
|
||||||
completion_tokens=token_usage_data.get("completion_tokens", 0),
|
completion_tokens=token_usage_data.get("completion_tokens", 0),
|
||||||
total_tokens=token_usage_data.get("total_tokens", 0),
|
total_tokens=token_usage_data.get("total_tokens", 0),
|
||||||
|
cost_micros=token_usage_data.get("cost_micros", 0),
|
||||||
model_breakdown=token_usage_data.get("usage"),
|
model_breakdown=token_usage_data.get("usage"),
|
||||||
call_details=token_usage_data.get("call_details"),
|
call_details=token_usage_data.get("call_details"),
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ from app.schemas import (
|
||||||
NewLLMConfigUpdate,
|
NewLLMConfigUpdate,
|
||||||
)
|
)
|
||||||
from app.services.llm_service import validate_llm_config
|
from app.services.llm_service import validate_llm_config
|
||||||
|
from app.services.provider_capabilities import derive_supports_image_input
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
from app.utils.rbac import check_permission
|
from app.utils.rbac import check_permission
|
||||||
|
|
||||||
|
|
@ -36,6 +37,39 @@ router = APIRouter()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_byok_config(config: NewLLMConfig) -> NewLLMConfigRead:
|
||||||
|
"""Augment a BYOK chat config row with the derived ``supports_image_input``.
|
||||||
|
|
||||||
|
There is no DB column for ``supports_image_input`` — the value is
|
||||||
|
resolved at the API boundary from LiteLLM's authoritative model map
|
||||||
|
(default-allow on unknown). Returning ``NewLLMConfigRead`` here keeps
|
||||||
|
the response shape consistent across list / detail / create / update
|
||||||
|
endpoints without having to remember to set the field at every call
|
||||||
|
site.
|
||||||
|
"""
|
||||||
|
provider_value = (
|
||||||
|
config.provider.value
|
||||||
|
if hasattr(config.provider, "value")
|
||||||
|
else str(config.provider)
|
||||||
|
)
|
||||||
|
litellm_params = config.litellm_params or {}
|
||||||
|
base_model = (
|
||||||
|
litellm_params.get("base_model") if isinstance(litellm_params, dict) else None
|
||||||
|
)
|
||||||
|
supports_image_input = derive_supports_image_input(
|
||||||
|
provider=provider_value,
|
||||||
|
model_name=config.model_name,
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=config.custom_provider,
|
||||||
|
)
|
||||||
|
# ``model_validate`` runs the Pydantic conversion using the ORM
|
||||||
|
# attribute access path enabled by ``ConfigDict(from_attributes=True)``,
|
||||||
|
# then we layer the derived field on. ``model_copy(update=...)`` keeps
|
||||||
|
# the surface immutable from the caller's perspective.
|
||||||
|
base_read = NewLLMConfigRead.model_validate(config)
|
||||||
|
return base_read.model_copy(update={"supports_image_input": supports_image_input})
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Global Configs Routes
|
# Global Configs Routes
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -84,11 +118,41 @@ async def get_global_new_llm_configs(
|
||||||
"seo_title": None,
|
"seo_title": None,
|
||||||
"seo_description": None,
|
"seo_description": None,
|
||||||
"quota_reserve_tokens": None,
|
"quota_reserve_tokens": None,
|
||||||
|
# Auto routes across the configured pool, which usually
|
||||||
|
# includes at least one vision-capable deployment, so
|
||||||
|
# treat Auto as image-capable. The router itself will
|
||||||
|
# still pick a vision-capable deployment for messages
|
||||||
|
# carrying image_url blocks (LiteLLM Router falls back
|
||||||
|
# on ``404`` per its ``allowed_fails`` policy).
|
||||||
|
"supports_image_input": True,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add individual global configs
|
# Add individual global configs
|
||||||
for cfg in global_configs:
|
for cfg in global_configs:
|
||||||
|
# Capability resolution: explicit value (YAML override or OR
|
||||||
|
# `_supports_image_input(model)` payload baked in by the
|
||||||
|
# OpenRouter integration service) wins. Fall back to the
|
||||||
|
# LiteLLM-driven helper which default-allows on unknown so
|
||||||
|
# we don't hide vision-capable models that happen to lack a
|
||||||
|
# YAML annotation. The streaming task safety net is the
|
||||||
|
# only place a False ever blocks.
|
||||||
|
if "supports_image_input" in cfg:
|
||||||
|
supports_image_input = bool(cfg.get("supports_image_input"))
|
||||||
|
else:
|
||||||
|
cfg_litellm_params = cfg.get("litellm_params") or {}
|
||||||
|
cfg_base_model = (
|
||||||
|
cfg_litellm_params.get("base_model")
|
||||||
|
if isinstance(cfg_litellm_params, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
supports_image_input = derive_supports_image_input(
|
||||||
|
provider=cfg.get("provider"),
|
||||||
|
model_name=cfg.get("model_name"),
|
||||||
|
base_model=cfg_base_model,
|
||||||
|
custom_provider=cfg.get("custom_provider"),
|
||||||
|
)
|
||||||
|
|
||||||
safe_config = {
|
safe_config = {
|
||||||
"id": cfg.get("id"),
|
"id": cfg.get("id"),
|
||||||
"name": cfg.get("name"),
|
"name": cfg.get("name"),
|
||||||
|
|
@ -113,6 +177,7 @@ async def get_global_new_llm_configs(
|
||||||
"seo_title": cfg.get("seo_title"),
|
"seo_title": cfg.get("seo_title"),
|
||||||
"seo_description": cfg.get("seo_description"),
|
"seo_description": cfg.get("seo_description"),
|
||||||
"quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
|
"quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
|
||||||
|
"supports_image_input": supports_image_input,
|
||||||
}
|
}
|
||||||
safe_configs.append(safe_config)
|
safe_configs.append(safe_config)
|
||||||
|
|
||||||
|
|
@ -171,7 +236,7 @@ async def create_new_llm_config(
|
||||||
await session.commit()
|
await session.commit()
|
||||||
await session.refresh(db_config)
|
await session.refresh(db_config)
|
||||||
|
|
||||||
return db_config
|
return _serialize_byok_config(db_config)
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
@ -213,7 +278,7 @@ async def list_new_llm_configs(
|
||||||
.limit(limit)
|
.limit(limit)
|
||||||
)
|
)
|
||||||
|
|
||||||
return result.scalars().all()
|
return [_serialize_byok_config(cfg) for cfg in result.scalars().all()]
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
@ -268,7 +333,7 @@ async def get_new_llm_config(
|
||||||
"You don't have permission to view LLM configurations in this search space",
|
"You don't have permission to view LLM configurations in this search space",
|
||||||
)
|
)
|
||||||
|
|
||||||
return config
|
return _serialize_byok_config(config)
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
@ -360,7 +425,7 @@ async def update_new_llm_config(
|
||||||
await session.commit()
|
await session.commit()
|
||||||
await session.refresh(config)
|
await session.refresh(config)
|
||||||
|
|
||||||
return config
|
return _serialize_byok_config(config)
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
|
|
@ -591,6 +591,7 @@ async def _get_image_gen_config_by_id(
|
||||||
"model_name": "auto",
|
"model_name": "auto",
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
"is_auto_mode": True,
|
"is_auto_mode": True,
|
||||||
|
"billing_tier": "free",
|
||||||
}
|
}
|
||||||
|
|
||||||
if config_id < 0:
|
if config_id < 0:
|
||||||
|
|
@ -607,6 +608,7 @@ async def _get_image_gen_config_by_id(
|
||||||
"api_version": cfg.get("api_version") or None,
|
"api_version": cfg.get("api_version") or None,
|
||||||
"litellm_params": cfg.get("litellm_params", {}),
|
"litellm_params": cfg.get("litellm_params", {}),
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
|
"billing_tier": cfg.get("billing_tier", "free"),
|
||||||
}
|
}
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -649,6 +651,7 @@ async def _get_vision_llm_config_by_id(
|
||||||
"model_name": "auto",
|
"model_name": "auto",
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
"is_auto_mode": True,
|
"is_auto_mode": True,
|
||||||
|
"billing_tier": "free",
|
||||||
}
|
}
|
||||||
|
|
||||||
if config_id < 0:
|
if config_id < 0:
|
||||||
|
|
@ -665,6 +668,7 @@ async def _get_vision_llm_config_by_id(
|
||||||
"api_version": cfg.get("api_version") or None,
|
"api_version": cfg.get("api_version") or None,
|
||||||
"litellm_params": cfg.get("litellm_params", {}),
|
"litellm_params": cfg.get("litellm_params", {}),
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
|
"billing_tier": cfg.get("billing_tier", "free"),
|
||||||
}
|
}
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -251,9 +251,16 @@ async def _fulfill_completed_token_purchase(
|
||||||
metadata = _get_metadata(checkout_session)
|
metadata = _get_metadata(checkout_session)
|
||||||
user_id = metadata.get("user_id")
|
user_id = metadata.get("user_id")
|
||||||
quantity = int(metadata.get("quantity", "0"))
|
quantity = int(metadata.get("quantity", "0"))
|
||||||
tokens_per_unit = int(metadata.get("tokens_per_unit", "0"))
|
# Read the new metadata key first, fall back to the legacy one so
|
||||||
|
# in-flight checkout sessions created before the cost-credits
|
||||||
|
# release still fulfil correctly (the unit is numerically the
|
||||||
|
# same: $1 buys 1_000_000 micro-USD == 1_000_000 tokens).
|
||||||
|
credit_micros_per_unit = int(
|
||||||
|
metadata.get("credit_micros_per_unit")
|
||||||
|
or metadata.get("tokens_per_unit", "0")
|
||||||
|
)
|
||||||
|
|
||||||
if not user_id or quantity <= 0 or tokens_per_unit <= 0:
|
if not user_id or quantity <= 0 or credit_micros_per_unit <= 0:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Skipping token fulfillment for session %s: incomplete metadata %s",
|
"Skipping token fulfillment for session %s: incomplete metadata %s",
|
||||||
checkout_session_id,
|
checkout_session_id,
|
||||||
|
|
@ -268,7 +275,7 @@ async def _fulfill_completed_token_purchase(
|
||||||
getattr(checkout_session, "payment_intent", None)
|
getattr(checkout_session, "payment_intent", None)
|
||||||
),
|
),
|
||||||
quantity=quantity,
|
quantity=quantity,
|
||||||
tokens_granted=quantity * tokens_per_unit,
|
credit_micros_granted=quantity * credit_micros_per_unit,
|
||||||
amount_total=getattr(checkout_session, "amount_total", None),
|
amount_total=getattr(checkout_session, "amount_total", None),
|
||||||
currency=getattr(checkout_session, "currency", None),
|
currency=getattr(checkout_session, "currency", None),
|
||||||
status=PremiumTokenPurchaseStatus.PENDING,
|
status=PremiumTokenPurchaseStatus.PENDING,
|
||||||
|
|
@ -303,9 +310,14 @@ async def _fulfill_completed_token_purchase(
|
||||||
purchase.stripe_payment_intent_id = _normalize_optional_string(
|
purchase.stripe_payment_intent_id = _normalize_optional_string(
|
||||||
getattr(checkout_session, "payment_intent", None)
|
getattr(checkout_session, "payment_intent", None)
|
||||||
)
|
)
|
||||||
user.premium_tokens_limit = (
|
# Top up the user's credit balance by the granted micro-USD amount.
|
||||||
max(user.premium_tokens_used, user.premium_tokens_limit)
|
# ``max(used, limit)`` clamps the case where the legacy code wrote a
|
||||||
+ purchase.tokens_granted
|
# used value above the limit (e.g. underbilling rounding) so adding
|
||||||
|
# ``credit_micros_granted`` always lifts the limit by the full pack
|
||||||
|
# size rather than disappearing into past overuse.
|
||||||
|
user.premium_credit_micros_limit = (
|
||||||
|
max(user.premium_credit_micros_used, user.premium_credit_micros_limit)
|
||||||
|
+ purchase.credit_micros_granted
|
||||||
)
|
)
|
||||||
|
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
|
|
@ -532,12 +544,18 @@ async def create_token_checkout_session(
|
||||||
user: User = Depends(current_active_user),
|
user: User = Depends(current_active_user),
|
||||||
db_session: AsyncSession = Depends(get_async_session),
|
db_session: AsyncSession = Depends(get_async_session),
|
||||||
):
|
):
|
||||||
"""Create a Stripe Checkout Session for buying premium token packs."""
|
"""Create a Stripe Checkout Session for buying premium credit packs.
|
||||||
|
|
||||||
|
Each pack grants ``STRIPE_CREDIT_MICROS_PER_UNIT`` micro-USD of
|
||||||
|
credit (default 1_000_000 = $1.00). The user's balance is debited
|
||||||
|
at the actual provider cost reported by LiteLLM at finalize time,
|
||||||
|
so $1 of credit always buys $1 worth of provider usage at cost.
|
||||||
|
"""
|
||||||
_ensure_token_buying_enabled()
|
_ensure_token_buying_enabled()
|
||||||
stripe_client = get_stripe_client()
|
stripe_client = get_stripe_client()
|
||||||
price_id = _get_required_token_price_id()
|
price_id = _get_required_token_price_id()
|
||||||
success_url, cancel_url = _get_token_checkout_urls(body.search_space_id)
|
success_url, cancel_url = _get_token_checkout_urls(body.search_space_id)
|
||||||
tokens_granted = body.quantity * config.STRIPE_TOKENS_PER_UNIT
|
credit_micros_granted = body.quantity * config.STRIPE_CREDIT_MICROS_PER_UNIT
|
||||||
|
|
||||||
try:
|
try:
|
||||||
checkout_session = stripe_client.v1.checkout.sessions.create(
|
checkout_session = stripe_client.v1.checkout.sessions.create(
|
||||||
|
|
@ -556,8 +574,8 @@ async def create_token_checkout_session(
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"user_id": str(user.id),
|
"user_id": str(user.id),
|
||||||
"quantity": str(body.quantity),
|
"quantity": str(body.quantity),
|
||||||
"tokens_per_unit": str(config.STRIPE_TOKENS_PER_UNIT),
|
"credit_micros_per_unit": str(config.STRIPE_CREDIT_MICROS_PER_UNIT),
|
||||||
"purchase_type": "premium_tokens",
|
"purchase_type": "premium_credit",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
@ -583,7 +601,7 @@ async def create_token_checkout_session(
|
||||||
getattr(checkout_session, "payment_intent", None)
|
getattr(checkout_session, "payment_intent", None)
|
||||||
),
|
),
|
||||||
quantity=body.quantity,
|
quantity=body.quantity,
|
||||||
tokens_granted=tokens_granted,
|
credit_micros_granted=credit_micros_granted,
|
||||||
amount_total=getattr(checkout_session, "amount_total", None),
|
amount_total=getattr(checkout_session, "amount_total", None),
|
||||||
currency=getattr(checkout_session, "currency", None),
|
currency=getattr(checkout_session, "currency", None),
|
||||||
status=PremiumTokenPurchaseStatus.PENDING,
|
status=PremiumTokenPurchaseStatus.PENDING,
|
||||||
|
|
@ -598,14 +616,19 @@ async def create_token_checkout_session(
|
||||||
async def get_token_status(
|
async def get_token_status(
|
||||||
user: User = Depends(current_active_user),
|
user: User = Depends(current_active_user),
|
||||||
):
|
):
|
||||||
"""Return token-buying availability and current premium quota for frontend."""
|
"""Return token-buying availability and current premium credit quota for frontend.
|
||||||
used = user.premium_tokens_used
|
|
||||||
limit = user.premium_tokens_limit
|
Values are in micro-USD (1_000_000 = $1.00); the FE divides by 1M
|
||||||
|
when displaying. The route name is preserved for back-compat with
|
||||||
|
pinned client deployments.
|
||||||
|
"""
|
||||||
|
used = user.premium_credit_micros_used
|
||||||
|
limit = user.premium_credit_micros_limit
|
||||||
return TokenStripeStatusResponse(
|
return TokenStripeStatusResponse(
|
||||||
token_buying_enabled=config.STRIPE_TOKEN_BUYING_ENABLED,
|
token_buying_enabled=config.STRIPE_TOKEN_BUYING_ENABLED,
|
||||||
premium_tokens_used=used,
|
premium_credit_micros_used=used,
|
||||||
premium_tokens_limit=limit,
|
premium_credit_micros_limit=limit,
|
||||||
premium_tokens_remaining=max(0, limit - used),
|
premium_credit_micros_remaining=max(0, limit - used),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,10 +82,15 @@ async def get_global_vision_llm_configs(
|
||||||
"litellm_params": {},
|
"litellm_params": {},
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
"is_auto_mode": True,
|
"is_auto_mode": True,
|
||||||
|
# Auto mode treated as free until per-deployment billing-tier
|
||||||
|
# surfacing lands; see ``get_vision_llm`` for parity.
|
||||||
|
"billing_tier": "free",
|
||||||
|
"is_premium": False,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
for cfg in global_configs:
|
for cfg in global_configs:
|
||||||
|
billing_tier = str(cfg.get("billing_tier", "free")).lower()
|
||||||
safe_configs.append(
|
safe_configs.append(
|
||||||
{
|
{
|
||||||
"id": cfg.get("id"),
|
"id": cfg.get("id"),
|
||||||
|
|
@ -98,6 +103,14 @@ async def get_global_vision_llm_configs(
|
||||||
"api_version": cfg.get("api_version") or None,
|
"api_version": cfg.get("api_version") or None,
|
||||||
"litellm_params": cfg.get("litellm_params", {}),
|
"litellm_params": cfg.get("litellm_params", {}),
|
||||||
"is_global": True,
|
"is_global": True,
|
||||||
|
"billing_tier": billing_tier,
|
||||||
|
# Mirror chat (``new_llm_config_routes``) so the new-chat
|
||||||
|
# selector's premium badge logic keys off the same
|
||||||
|
# field across chat / image / vision tabs.
|
||||||
|
"is_premium": billing_tier == "premium",
|
||||||
|
"quota_reserve_tokens": cfg.get("quota_reserve_tokens"),
|
||||||
|
"input_cost_per_token": cfg.get("input_cost_per_token"),
|
||||||
|
"output_cost_per_token": cfg.get("output_cost_per_token"),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -215,6 +215,12 @@ class GlobalImageGenConfigRead(BaseModel):
|
||||||
Schema for reading global image generation configs from YAML.
|
Schema for reading global image generation configs from YAML.
|
||||||
Global configs have negative IDs. API key is hidden.
|
Global configs have negative IDs. API key is hidden.
|
||||||
ID 0 is reserved for Auto mode (LiteLLM Router load balancing).
|
ID 0 is reserved for Auto mode (LiteLLM Router load balancing).
|
||||||
|
|
||||||
|
The ``billing_tier`` field allows the frontend to show a Premium/Free
|
||||||
|
badge and (more importantly) tells the backend whether to debit the
|
||||||
|
user's premium credit pool when this config is used. ``"free"`` is
|
||||||
|
the default for backward compatibility — admins must explicitly opt
|
||||||
|
a global config into ``"premium"``.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: int = Field(
|
id: int = Field(
|
||||||
|
|
@ -231,3 +237,24 @@ class GlobalImageGenConfigRead(BaseModel):
|
||||||
litellm_params: dict[str, Any] | None = None
|
litellm_params: dict[str, Any] | None = None
|
||||||
is_global: bool = True
|
is_global: bool = True
|
||||||
is_auto_mode: bool = False
|
is_auto_mode: bool = False
|
||||||
|
billing_tier: str = Field(
|
||||||
|
default="free",
|
||||||
|
description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
|
||||||
|
)
|
||||||
|
is_premium: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description=(
|
||||||
|
"Convenience boolean derived server-side from "
|
||||||
|
"``billing_tier == 'premium'``. The new-chat model selector "
|
||||||
|
"keys its Free/Premium badge off this field for parity with "
|
||||||
|
"chat (`GlobalLLMConfigRead.is_premium`)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
quota_reserve_micros: int | None = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Optional override for the reservation amount (in micro-USD) used when "
|
||||||
|
"this image generation is premium. Falls back to "
|
||||||
|
"QUOTA_DEFAULT_IMAGE_RESERVE_MICROS when omitted."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ class TokenUsageSummary(BaseModel):
|
||||||
prompt_tokens: int = 0
|
prompt_tokens: int = 0
|
||||||
completion_tokens: int = 0
|
completion_tokens: int = 0
|
||||||
total_tokens: int = 0
|
total_tokens: int = 0
|
||||||
|
cost_micros: int = 0
|
||||||
model_breakdown: dict | None = None
|
model_breakdown: dict | None = None
|
||||||
model_config = ConfigDict(from_attributes=True)
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,20 @@ class NewLLMConfigRead(NewLLMConfigBase):
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
search_space_id: int
|
search_space_id: int
|
||||||
user_id: uuid.UUID
|
user_id: uuid.UUID
|
||||||
|
# Capability flag derived at the API boundary (no DB column). Default
|
||||||
|
# True matches the conservative-allow stance — a BYOK row that the
|
||||||
|
# route forgot to augment is not pre-judged. The streaming-task
|
||||||
|
# safety net is the only place a False actually blocks a request.
|
||||||
|
supports_image_input: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description=(
|
||||||
|
"Whether the BYOK chat config can accept image inputs. Derived "
|
||||||
|
"at the route boundary from LiteLLM's authoritative model map "
|
||||||
|
"(``litellm.supports_vision``) — there is no DB column. "
|
||||||
|
"Default True is the conservative-allow stance for unknown / "
|
||||||
|
"unmapped models."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
|
@ -121,6 +135,15 @@ class NewLLMConfigPublic(BaseModel):
|
||||||
created_at: datetime
|
created_at: datetime
|
||||||
search_space_id: int
|
search_space_id: int
|
||||||
user_id: uuid.UUID
|
user_id: uuid.UUID
|
||||||
|
# Capability flag derived at the API boundary (see NewLLMConfigRead).
|
||||||
|
supports_image_input: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description=(
|
||||||
|
"Whether the BYOK chat config can accept image inputs. Derived "
|
||||||
|
"at the route boundary from LiteLLM's authoritative model map. "
|
||||||
|
"Default True is the conservative-allow stance."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
model_config = ConfigDict(from_attributes=True)
|
model_config = ConfigDict(from_attributes=True)
|
||||||
|
|
||||||
|
|
@ -172,6 +195,19 @@ class GlobalNewLLMConfigRead(BaseModel):
|
||||||
seo_title: str | None = None
|
seo_title: str | None = None
|
||||||
seo_description: str | None = None
|
seo_description: str | None = None
|
||||||
quota_reserve_tokens: int | None = None
|
quota_reserve_tokens: int | None = None
|
||||||
|
supports_image_input: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description=(
|
||||||
|
"Whether the model accepts image inputs (multimodal vision). "
|
||||||
|
"Derived server-side: OpenRouter dynamic configs use "
|
||||||
|
"``architecture.input_modalities``; YAML / BYOK use LiteLLM's "
|
||||||
|
"authoritative model map (``litellm.supports_vision``). The "
|
||||||
|
"new-chat selector hints with a 'No image' badge when this is "
|
||||||
|
"False and there are pending image attachments. The streaming "
|
||||||
|
"task fails fast only when LiteLLM *explicitly* marks a model "
|
||||||
|
"as text-only — unknown / unmapped models default-allow."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
|
||||||
|
|
@ -70,13 +70,17 @@ class CreateTokenCheckoutSessionResponse(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class TokenPurchaseRead(BaseModel):
|
class TokenPurchaseRead(BaseModel):
|
||||||
"""Serialized premium token purchase record."""
|
"""Serialized premium credit purchase record.
|
||||||
|
|
||||||
|
``credit_micros_granted`` is in micro-USD (1_000_000 = $1.00). The
|
||||||
|
schema name kept ``Token`` for API back-compat with pinned clients.
|
||||||
|
"""
|
||||||
|
|
||||||
id: uuid.UUID
|
id: uuid.UUID
|
||||||
stripe_checkout_session_id: str
|
stripe_checkout_session_id: str
|
||||||
stripe_payment_intent_id: str | None = None
|
stripe_payment_intent_id: str | None = None
|
||||||
quantity: int
|
quantity: int
|
||||||
tokens_granted: int
|
credit_micros_granted: int
|
||||||
amount_total: int | None = None
|
amount_total: int | None = None
|
||||||
currency: str | None = None
|
currency: str | None = None
|
||||||
status: str
|
status: str
|
||||||
|
|
@ -87,15 +91,19 @@ class TokenPurchaseRead(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class TokenPurchaseHistoryResponse(BaseModel):
|
class TokenPurchaseHistoryResponse(BaseModel):
|
||||||
"""Response containing the user's premium token purchases."""
|
"""Response containing the user's premium credit purchases."""
|
||||||
|
|
||||||
purchases: list[TokenPurchaseRead]
|
purchases: list[TokenPurchaseRead]
|
||||||
|
|
||||||
|
|
||||||
class TokenStripeStatusResponse(BaseModel):
|
class TokenStripeStatusResponse(BaseModel):
|
||||||
"""Response describing token-buying availability and current quota."""
|
"""Response describing premium-credit-buying availability and balance.
|
||||||
|
|
||||||
|
All ``premium_credit_micros_*`` fields are in micro-USD; the FE
|
||||||
|
divides by 1_000_000 to display USD.
|
||||||
|
"""
|
||||||
|
|
||||||
token_buying_enabled: bool
|
token_buying_enabled: bool
|
||||||
premium_tokens_used: int = 0
|
premium_credit_micros_used: int = 0
|
||||||
premium_tokens_limit: int = 0
|
premium_credit_micros_limit: int = 0
|
||||||
premium_tokens_remaining: int = 0
|
premium_credit_micros_remaining: int = 0
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,15 @@ class VisionLLMConfigPublic(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class GlobalVisionLLMConfigRead(BaseModel):
|
class GlobalVisionLLMConfigRead(BaseModel):
|
||||||
|
"""Schema for reading global vision LLM configs from YAML.
|
||||||
|
|
||||||
|
The ``billing_tier`` field allows the frontend to show a Premium/Free
|
||||||
|
badge and (more importantly) tells the backend whether to debit the
|
||||||
|
user's premium credit pool when this config is used. ``"free"`` is
|
||||||
|
the default for backward compatibility — admins must explicitly opt
|
||||||
|
a global config into ``"premium"``.
|
||||||
|
"""
|
||||||
|
|
||||||
id: int = Field(...)
|
id: int = Field(...)
|
||||||
name: str
|
name: str
|
||||||
description: str | None = None
|
description: str | None = None
|
||||||
|
|
@ -73,3 +82,35 @@ class GlobalVisionLLMConfigRead(BaseModel):
|
||||||
litellm_params: dict[str, Any] | None = None
|
litellm_params: dict[str, Any] | None = None
|
||||||
is_global: bool = True
|
is_global: bool = True
|
||||||
is_auto_mode: bool = False
|
is_auto_mode: bool = False
|
||||||
|
billing_tier: str = Field(
|
||||||
|
default="free",
|
||||||
|
description="'free' or 'premium'. Premium debits the user's premium credit pool (USD-cost-based).",
|
||||||
|
)
|
||||||
|
is_premium: bool = Field(
|
||||||
|
default=False,
|
||||||
|
description=(
|
||||||
|
"Convenience boolean derived server-side from "
|
||||||
|
"``billing_tier == 'premium'``. The new-chat model selector "
|
||||||
|
"keys its Free/Premium badge off this field for parity with "
|
||||||
|
"chat (`GlobalLLMConfigRead.is_premium`)."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
quota_reserve_tokens: int | None = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Optional override for the per-call reservation in *tokens* — "
|
||||||
|
"converted to micro-USD via the model's input/output prices at "
|
||||||
|
"reservation time. Falls back to QUOTA_DEFAULT_RESERVE_TOKENS."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
input_cost_per_token: float | None = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"Optional input price in USD/token. Used by pricing_registration to "
|
||||||
|
"register custom Azure / OpenRouter aliases with LiteLLM at startup."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
output_cost_per_token: float | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Optional output price in USD/token. Pair with input_cost_per_token.",
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -163,13 +163,47 @@ def clear_healthy(config_id: int | None = None) -> None:
|
||||||
_healthy_until.pop(int(config_id), None)
|
_healthy_until.pop(int(config_id), None)
|
||||||
|
|
||||||
|
|
||||||
def _global_candidates() -> list[dict]:
|
def _cfg_supports_image_input(cfg: dict) -> bool:
|
||||||
|
"""True if the global cfg can accept image inputs.
|
||||||
|
|
||||||
|
Prefers the explicit ``supports_image_input`` flag (set by the YAML
|
||||||
|
loader / OpenRouter integration). Falls back to a LiteLLM lookup so
|
||||||
|
a YAML entry whose flag was somehow stripped doesn't get wrongly
|
||||||
|
excluded. Default-allows on unknown — the streaming-task safety net
|
||||||
|
is the actual block, not this filter.
|
||||||
|
"""
|
||||||
|
if "supports_image_input" in cfg:
|
||||||
|
return bool(cfg.get("supports_image_input"))
|
||||||
|
# Lazy import: provider_capabilities -> llm_config -> services chain;
|
||||||
|
# importing at module load would create an init-order cycle through
|
||||||
|
# ``app.config``.
|
||||||
|
from app.services.provider_capabilities import derive_supports_image_input
|
||||||
|
|
||||||
|
cfg_litellm_params = cfg.get("litellm_params") or {}
|
||||||
|
base_model = (
|
||||||
|
cfg_litellm_params.get("base_model")
|
||||||
|
if isinstance(cfg_litellm_params, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
return derive_supports_image_input(
|
||||||
|
provider=cfg.get("provider"),
|
||||||
|
model_name=cfg.get("model_name"),
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=cfg.get("custom_provider"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _global_candidates(*, requires_image_input: bool = False) -> list[dict]:
|
||||||
"""Return Auto-eligible global cfgs.
|
"""Return Auto-eligible global cfgs.
|
||||||
|
|
||||||
Drops cfgs flagged ``health_gated`` (best non-null OpenRouter uptime
|
Drops cfgs flagged ``health_gated`` (best non-null OpenRouter uptime
|
||||||
below ``_HEALTH_GATE_UPTIME_PCT``) so chronically broken providers
|
below ``_HEALTH_GATE_UPTIME_PCT``) so chronically broken providers
|
||||||
can't be picked as the thread's pin. Also excludes configs currently
|
can't be picked as the thread's pin. Also excludes configs currently
|
||||||
in runtime cooldown (e.g. temporary 429 bursts).
|
in runtime cooldown (e.g. temporary 429 bursts).
|
||||||
|
|
||||||
|
When ``requires_image_input`` is True (image turn), additionally
|
||||||
|
filters out configs whose ``supports_image_input`` resolves to False
|
||||||
|
so a text-only deployment can't be pinned for an image request.
|
||||||
"""
|
"""
|
||||||
candidates = [
|
candidates = [
|
||||||
cfg
|
cfg
|
||||||
|
|
@ -177,6 +211,7 @@ def _global_candidates() -> list[dict]:
|
||||||
if _is_usable_global_config(cfg)
|
if _is_usable_global_config(cfg)
|
||||||
and not cfg.get("health_gated")
|
and not cfg.get("health_gated")
|
||||||
and not _is_runtime_cooled_down(int(cfg.get("id", 0)))
|
and not _is_runtime_cooled_down(int(cfg.get("id", 0)))
|
||||||
|
and (not requires_image_input or _cfg_supports_image_input(cfg))
|
||||||
]
|
]
|
||||||
return sorted(candidates, key=lambda c: int(c.get("id", 0)))
|
return sorted(candidates, key=lambda c: int(c.get("id", 0)))
|
||||||
|
|
||||||
|
|
@ -185,6 +220,15 @@ def _tier_of(cfg: dict) -> str:
|
||||||
return str(cfg.get("billing_tier", "free")).lower()
|
return str(cfg.get("billing_tier", "free")).lower()
|
||||||
|
|
||||||
|
|
||||||
|
def _is_preferred_premium_auto_config(cfg: dict) -> bool:
|
||||||
|
"""Return True for the operator-preferred premium Auto model."""
|
||||||
|
return (
|
||||||
|
_tier_of(cfg) == "premium"
|
||||||
|
and str(cfg.get("provider", "")).upper() == "AZURE_OPENAI"
|
||||||
|
and str(cfg.get("model_name", "")).lower() == "gpt-5.4"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _select_pin(eligible: list[dict], thread_id: int) -> tuple[dict, int]:
|
def _select_pin(eligible: list[dict], thread_id: int) -> tuple[dict, int]:
|
||||||
"""Pick a config with quality-first ranking + deterministic spread.
|
"""Pick a config with quality-first ranking + deterministic spread.
|
||||||
|
|
||||||
|
|
@ -237,11 +281,20 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
selected_llm_config_id: int,
|
selected_llm_config_id: int,
|
||||||
force_repin_free: bool = False,
|
force_repin_free: bool = False,
|
||||||
exclude_config_ids: set[int] | None = None,
|
exclude_config_ids: set[int] | None = None,
|
||||||
|
requires_image_input: bool = False,
|
||||||
) -> AutoPinResolution:
|
) -> AutoPinResolution:
|
||||||
"""Resolve Auto (Fastest) to one concrete config id and persist the pin.
|
"""Resolve Auto (Fastest) to one concrete config id and persist the pin.
|
||||||
|
|
||||||
For non-auto selections, this function clears any existing pin and returns
|
For non-auto selections, this function clears any existing pin and returns
|
||||||
the selected id as-is.
|
the selected id as-is.
|
||||||
|
|
||||||
|
When ``requires_image_input`` is True (the current turn carries an
|
||||||
|
``image_url`` block), the candidate pool is filtered to vision-capable
|
||||||
|
cfgs and any existing pin that can't accept image input is treated as
|
||||||
|
invalid (force re-pin). If no vision-capable cfg is available the
|
||||||
|
function raises ``ValueError`` so the streaming task surfaces the same
|
||||||
|
friendly ``MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT`` error instead of
|
||||||
|
silently routing the image to a text-only deployment.
|
||||||
"""
|
"""
|
||||||
thread = (
|
thread = (
|
||||||
(
|
(
|
||||||
|
|
@ -274,14 +327,24 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
|
|
||||||
excluded_ids = {int(cid) for cid in (exclude_config_ids or set())}
|
excluded_ids = {int(cid) for cid in (exclude_config_ids or set())}
|
||||||
candidates = [
|
candidates = [
|
||||||
c for c in _global_candidates() if int(c.get("id", 0)) not in excluded_ids
|
c
|
||||||
|
for c in _global_candidates(requires_image_input=requires_image_input)
|
||||||
|
if int(c.get("id", 0)) not in excluded_ids
|
||||||
]
|
]
|
||||||
if not candidates:
|
if not candidates:
|
||||||
|
if requires_image_input:
|
||||||
|
# Distinguish the "no vision-capable cfg" case from generic
|
||||||
|
# "no usable cfg" so the streaming task can map this to the
|
||||||
|
# MODEL_DOES_NOT_SUPPORT_IMAGE_INPUT SSE error.
|
||||||
|
raise ValueError(
|
||||||
|
"No vision-capable global LLM configs are available for Auto mode"
|
||||||
|
)
|
||||||
raise ValueError("No usable global LLM configs are available for Auto mode")
|
raise ValueError("No usable global LLM configs are available for Auto mode")
|
||||||
candidate_by_id = {int(c["id"]): c for c in candidates}
|
candidate_by_id = {int(c["id"]): c for c in candidates}
|
||||||
|
|
||||||
# Reuse an existing valid pin without re-checking current quota (no silent
|
# Reuse an existing valid pin without re-checking current quota (no silent
|
||||||
# tier switch), unless the caller explicitly requests a forced repin to free.
|
# tier switch), unless the caller explicitly requests a forced repin to free
|
||||||
|
# *or* the turn requires image input but the pin can't handle it.
|
||||||
pinned_id = thread.pinned_llm_config_id
|
pinned_id = thread.pinned_llm_config_id
|
||||||
if (
|
if (
|
||||||
not force_repin_free
|
not force_repin_free
|
||||||
|
|
@ -311,6 +374,29 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
from_existing_pin=True,
|
from_existing_pin=True,
|
||||||
)
|
)
|
||||||
if pinned_id is not None:
|
if pinned_id is not None:
|
||||||
|
# If the pin is *only* invalid because it can't handle the image
|
||||||
|
# turn (it's still a healthy, usable config in the broader pool),
|
||||||
|
# log that explicitly so operators can correlate the re-pin with
|
||||||
|
# the user's image attachment instead of suspecting a cooldown.
|
||||||
|
if requires_image_input:
|
||||||
|
try:
|
||||||
|
pinned_global = next(
|
||||||
|
c
|
||||||
|
for c in config.GLOBAL_LLM_CONFIGS
|
||||||
|
if int(c.get("id", 0)) == int(pinned_id)
|
||||||
|
)
|
||||||
|
except StopIteration:
|
||||||
|
pinned_global = None
|
||||||
|
if pinned_global is not None and not _cfg_supports_image_input(
|
||||||
|
pinned_global
|
||||||
|
):
|
||||||
|
logger.info(
|
||||||
|
"auto_pin_repinned_for_image thread_id=%s search_space_id=%s "
|
||||||
|
"previous_config_id=%s",
|
||||||
|
thread_id,
|
||||||
|
search_space_id,
|
||||||
|
pinned_id,
|
||||||
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
|
"auto_pin_invalid thread_id=%s search_space_id=%s pinned_config_id=%s",
|
||||||
thread_id,
|
thread_id,
|
||||||
|
|
@ -322,11 +408,19 @@ async def resolve_or_get_pinned_llm_config_id(
|
||||||
False if force_repin_free else await _is_premium_eligible(session, user_id)
|
False if force_repin_free else await _is_premium_eligible(session, user_id)
|
||||||
)
|
)
|
||||||
if premium_eligible:
|
if premium_eligible:
|
||||||
eligible = candidates
|
premium_candidates = [c for c in candidates if _tier_of(c) == "premium"]
|
||||||
|
preferred_premium = [
|
||||||
|
c for c in premium_candidates if _is_preferred_premium_auto_config(c)
|
||||||
|
]
|
||||||
|
eligible = preferred_premium or premium_candidates
|
||||||
else:
|
else:
|
||||||
eligible = [c for c in candidates if _tier_of(c) != "premium"]
|
eligible = [c for c in candidates if _tier_of(c) != "premium"]
|
||||||
|
|
||||||
if not eligible:
|
if not eligible:
|
||||||
|
if requires_image_input:
|
||||||
|
raise ValueError(
|
||||||
|
"Auto mode could not find a vision-capable LLM config for this user and quota state"
|
||||||
|
)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Auto mode could not find an eligible LLM config for this user and quota state"
|
"Auto mode could not find an eligible LLM config for this user and quota state"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
566
surfsense_backend/app/services/billable_calls.py
Normal file
566
surfsense_backend/app/services/billable_calls.py
Normal file
|
|
@ -0,0 +1,566 @@
|
||||||
|
"""
|
||||||
|
Per-call billable wrapper for image generation, vision LLM extraction, and
|
||||||
|
any other short-lived premium operation that must charge against the user's
|
||||||
|
shared premium credit pool.
|
||||||
|
|
||||||
|
The ``billable_call`` async context manager encapsulates the standard
|
||||||
|
"reserve → execute → finalize / release → record audit row" lifecycle in a
|
||||||
|
single primitive so callers (the image-generation REST route and the
|
||||||
|
vision-LLM wrapper used during indexing) don't have to re-implement it.
|
||||||
|
|
||||||
|
KEY DESIGN POINTS (issue A, B):
|
||||||
|
|
||||||
|
1. **Session isolation.** ``billable_call`` takes no caller transaction.
|
||||||
|
All ``TokenQuotaService.premium_*`` calls and the audit-row insert run
|
||||||
|
inside their own session context. Route callers use
|
||||||
|
``shielded_async_session()`` by default; Celery callers can provide a
|
||||||
|
worker-loop-safe session factory. This guarantees that quota
|
||||||
|
commit/rollback can never accidentally flush or roll back rows the caller
|
||||||
|
has staged in its main session (e.g. a freshly-created
|
||||||
|
``ImageGeneration`` row).
|
||||||
|
|
||||||
|
2. **ContextVar safety.** The accumulator is scoped via
|
||||||
|
:func:`scoped_turn` (which uses ``ContextVar.reset(token)``), so a
|
||||||
|
nested ``billable_call`` inside an outer chat turn cannot corrupt the
|
||||||
|
chat turn's accumulator.
|
||||||
|
|
||||||
|
3. **Free configs are still audited.** Free calls bypass the reserve /
|
||||||
|
finalize dance entirely but still record a ``TokenUsage`` audit row with
|
||||||
|
the LiteLLM-reported ``cost_micros``. This keeps the cost-attribution
|
||||||
|
pipeline complete for analytics even when nothing is debited.
|
||||||
|
|
||||||
|
4. **Quota denial raises ``QuotaInsufficientError``.** The route handler is
|
||||||
|
responsible for translating that into HTTP 402. We *do not* catch the
|
||||||
|
denial inside ``billable_call`` — letting it propagate also prevents
|
||||||
|
the image-generation route from creating an ``ImageGeneration`` row
|
||||||
|
for a request that never actually ran.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from collections.abc import AsyncIterator, Callable
|
||||||
|
from contextlib import AbstractAsyncContextManager, asynccontextmanager, suppress
|
||||||
|
from typing import Any
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
from app.config import config
|
||||||
|
from app.db import shielded_async_session
|
||||||
|
from app.services.token_quota_service import (
|
||||||
|
TokenQuotaService,
|
||||||
|
estimate_call_reserve_micros,
|
||||||
|
)
|
||||||
|
from app.services.token_tracking_service import (
|
||||||
|
TurnTokenAccumulator,
|
||||||
|
record_token_usage,
|
||||||
|
scoped_turn,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
AUDIT_TIMEOUT_SECONDS = 10.0
|
||||||
|
BACKGROUND_ARTIFACT_USAGE_TYPES = frozenset(
|
||||||
|
{"video_presentation_generation", "podcast_generation"}
|
||||||
|
)
|
||||||
|
BillableSessionFactory = Callable[[], AbstractAsyncContextManager[AsyncSession]]
|
||||||
|
|
||||||
|
|
||||||
|
class QuotaInsufficientError(Exception):
|
||||||
|
"""Raised when ``TokenQuotaService.premium_reserve`` denies a billable
|
||||||
|
call because the user has exhausted their premium credit pool.
|
||||||
|
|
||||||
|
The route handler should catch this and return HTTP 402 Payment
|
||||||
|
Required (or the equivalent for the surface area). Outside of the HTTP
|
||||||
|
layer (e.g. the ``QuotaCheckedVisionLLM`` wrapper used during indexing)
|
||||||
|
callers may catch this and degrade gracefully — e.g. fall back to OCR
|
||||||
|
when vision is unavailable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
usage_type: str,
|
||||||
|
used_micros: int,
|
||||||
|
limit_micros: int,
|
||||||
|
remaining_micros: int,
|
||||||
|
) -> None:
|
||||||
|
self.usage_type = usage_type
|
||||||
|
self.used_micros = used_micros
|
||||||
|
self.limit_micros = limit_micros
|
||||||
|
self.remaining_micros = remaining_micros
|
||||||
|
super().__init__(
|
||||||
|
f"Premium credit exhausted for {usage_type}: "
|
||||||
|
f"used={used_micros} limit={limit_micros} remaining={remaining_micros} (micro-USD)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BillingSettlementError(Exception):
|
||||||
|
"""Raised when a premium call completed but credit settlement failed."""
|
||||||
|
|
||||||
|
def __init__(self, *, usage_type: str, user_id: UUID, cause: Exception) -> None:
|
||||||
|
self.usage_type = usage_type
|
||||||
|
self.user_id = user_id
|
||||||
|
super().__init__(
|
||||||
|
f"Failed to settle premium credit for {usage_type} user={user_id}: {cause}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _rollback_safely(session: AsyncSession) -> None:
|
||||||
|
rollback = getattr(session, "rollback", None)
|
||||||
|
if rollback is not None:
|
||||||
|
with suppress(Exception):
|
||||||
|
await rollback()
|
||||||
|
|
||||||
|
|
||||||
|
async def _record_audit_best_effort(
|
||||||
|
*,
|
||||||
|
session_factory: BillableSessionFactory,
|
||||||
|
usage_type: str,
|
||||||
|
search_space_id: int,
|
||||||
|
user_id: UUID,
|
||||||
|
prompt_tokens: int,
|
||||||
|
completion_tokens: int,
|
||||||
|
total_tokens: int,
|
||||||
|
cost_micros: int,
|
||||||
|
model_breakdown: dict[str, Any],
|
||||||
|
call_details: dict[str, Any] | None,
|
||||||
|
thread_id: int | None,
|
||||||
|
message_id: int | None,
|
||||||
|
audit_label: str,
|
||||||
|
timeout_seconds: float = AUDIT_TIMEOUT_SECONDS,
|
||||||
|
) -> None:
|
||||||
|
"""Persist a TokenUsage row without letting audit failure block callers.
|
||||||
|
|
||||||
|
Premium settlement is mandatory, but TokenUsage is an audit trail. If the
|
||||||
|
audit insert or commit hangs, user-facing artifacts such as videos and
|
||||||
|
podcasts must still be able to transition to READY after settlement.
|
||||||
|
"""
|
||||||
|
audit_thread_id = (
|
||||||
|
None if usage_type in BACKGROUND_ARTIFACT_USAGE_TYPES else thread_id
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _persist() -> None:
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] audit start label=%s usage_type=%s user=%s thread=%s "
|
||||||
|
"total_tokens=%d cost_micros=%d",
|
||||||
|
audit_label,
|
||||||
|
usage_type,
|
||||||
|
user_id,
|
||||||
|
audit_thread_id,
|
||||||
|
total_tokens,
|
||||||
|
cost_micros,
|
||||||
|
)
|
||||||
|
async with session_factory() as audit_session:
|
||||||
|
try:
|
||||||
|
await record_token_usage(
|
||||||
|
audit_session,
|
||||||
|
usage_type=usage_type,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
user_id=user_id,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
total_tokens=total_tokens,
|
||||||
|
cost_micros=cost_micros,
|
||||||
|
model_breakdown=model_breakdown,
|
||||||
|
call_details=call_details,
|
||||||
|
thread_id=audit_thread_id,
|
||||||
|
message_id=message_id,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] audit row staged label=%s usage_type=%s user=%s thread=%s",
|
||||||
|
audit_label,
|
||||||
|
usage_type,
|
||||||
|
user_id,
|
||||||
|
audit_thread_id,
|
||||||
|
)
|
||||||
|
await audit_session.commit()
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] audit commit OK label=%s usage_type=%s user=%s thread=%s",
|
||||||
|
audit_label,
|
||||||
|
usage_type,
|
||||||
|
user_id,
|
||||||
|
audit_thread_id,
|
||||||
|
)
|
||||||
|
except BaseException:
|
||||||
|
await _rollback_safely(audit_session)
|
||||||
|
raise
|
||||||
|
|
||||||
|
try:
|
||||||
|
await asyncio.wait_for(_persist(), timeout=timeout_seconds)
|
||||||
|
except TimeoutError:
|
||||||
|
logger.warning(
|
||||||
|
"[billable_call] audit timed out label=%s usage_type=%s user=%s thread=%s "
|
||||||
|
"timeout=%.1fs total_tokens=%d cost_micros=%d",
|
||||||
|
audit_label,
|
||||||
|
usage_type,
|
||||||
|
user_id,
|
||||||
|
audit_thread_id,
|
||||||
|
timeout_seconds,
|
||||||
|
total_tokens,
|
||||||
|
cost_micros,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"[billable_call] audit failed label=%s usage_type=%s user=%s thread=%s "
|
||||||
|
"total_tokens=%d cost_micros=%d",
|
||||||
|
audit_label,
|
||||||
|
usage_type,
|
||||||
|
user_id,
|
||||||
|
audit_thread_id,
|
||||||
|
total_tokens,
|
||||||
|
cost_micros,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def billable_call(
|
||||||
|
*,
|
||||||
|
user_id: UUID,
|
||||||
|
search_space_id: int,
|
||||||
|
billing_tier: str,
|
||||||
|
base_model: str,
|
||||||
|
quota_reserve_tokens: int | None = None,
|
||||||
|
quota_reserve_micros_override: int | None = None,
|
||||||
|
usage_type: str,
|
||||||
|
thread_id: int | None = None,
|
||||||
|
message_id: int | None = None,
|
||||||
|
call_details: dict[str, Any] | None = None,
|
||||||
|
billable_session_factory: BillableSessionFactory | None = None,
|
||||||
|
audit_timeout_seconds: float = AUDIT_TIMEOUT_SECONDS,
|
||||||
|
) -> AsyncIterator[TurnTokenAccumulator]:
|
||||||
|
"""Wrap a single billable LLM/image call.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: Owner of the credit pool to debit. For vision-LLM during
|
||||||
|
indexing this is the *search-space owner* (issue M), not the
|
||||||
|
triggering user.
|
||||||
|
search_space_id: Required — recorded on the ``TokenUsage`` audit row.
|
||||||
|
billing_tier: ``"premium"`` debits; anything else (``"free"``) skips
|
||||||
|
the reserve/finalize dance but still records an audit row with
|
||||||
|
the captured cost.
|
||||||
|
base_model: Used by :func:`estimate_call_reserve_micros` to compute
|
||||||
|
a worst-case reservation from LiteLLM's pricing table.
|
||||||
|
quota_reserve_tokens: Optional per-config override for the chat-style
|
||||||
|
reserve estimator (vision LLM uses this).
|
||||||
|
quota_reserve_micros_override: Optional flat micro-USD reservation
|
||||||
|
(image generation uses this — its cost shape is per-image, not
|
||||||
|
per-token).
|
||||||
|
usage_type: ``"image_generation"`` / ``"vision_extraction"`` / etc.
|
||||||
|
Recorded on the ``TokenUsage`` row.
|
||||||
|
thread_id, message_id: Optional FK columns on ``TokenUsage``.
|
||||||
|
call_details: Optional per-call metadata (model name, parameters)
|
||||||
|
forwarded to ``record_token_usage``.
|
||||||
|
billable_session_factory: Optional async context factory used for
|
||||||
|
reserve/finalize/release/audit sessions. Defaults to
|
||||||
|
``shielded_async_session`` for route callers; Celery callers pass
|
||||||
|
a worker-loop-safe session factory.
|
||||||
|
audit_timeout_seconds: Upper bound for TokenUsage audit persistence.
|
||||||
|
Audit failure is best-effort and does not undo successful
|
||||||
|
settlement.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
The ``TurnTokenAccumulator`` scoped to this call. The caller invokes
|
||||||
|
the underlying LLM/image API while inside the ``async with``; the
|
||||||
|
``TokenTrackingCallback`` populates the accumulator automatically.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
QuotaInsufficientError: when premium and ``premium_reserve`` denies.
|
||||||
|
"""
|
||||||
|
is_premium = billing_tier == "premium"
|
||||||
|
session_factory = billable_session_factory or shielded_async_session
|
||||||
|
|
||||||
|
async with scoped_turn() as acc:
|
||||||
|
# ---------- Free path: just audit -------------------------------
|
||||||
|
if not is_premium:
|
||||||
|
try:
|
||||||
|
yield acc
|
||||||
|
finally:
|
||||||
|
# Always audit, even on exception, so we capture cost when
|
||||||
|
# provider returns successfully but the caller raises later.
|
||||||
|
await _record_audit_best_effort(
|
||||||
|
session_factory=session_factory,
|
||||||
|
usage_type=usage_type,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
user_id=user_id,
|
||||||
|
prompt_tokens=acc.total_prompt_tokens,
|
||||||
|
completion_tokens=acc.total_completion_tokens,
|
||||||
|
total_tokens=acc.grand_total,
|
||||||
|
cost_micros=acc.total_cost_micros,
|
||||||
|
model_breakdown=acc.per_message_summary(),
|
||||||
|
call_details=call_details,
|
||||||
|
thread_id=thread_id,
|
||||||
|
message_id=message_id,
|
||||||
|
audit_label="free",
|
||||||
|
timeout_seconds=audit_timeout_seconds,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
# ---------- Premium path: reserve → execute → finalize ----------
|
||||||
|
if quota_reserve_micros_override is not None:
|
||||||
|
reserve_micros = max(1, int(quota_reserve_micros_override))
|
||||||
|
else:
|
||||||
|
reserve_micros = estimate_call_reserve_micros(
|
||||||
|
base_model=base_model or "",
|
||||||
|
quota_reserve_tokens=quota_reserve_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
request_id = str(uuid4())
|
||||||
|
|
||||||
|
async with session_factory() as quota_session:
|
||||||
|
reserve_result = await TokenQuotaService.premium_reserve(
|
||||||
|
db_session=quota_session,
|
||||||
|
user_id=user_id,
|
||||||
|
request_id=request_id,
|
||||||
|
reserve_micros=reserve_micros,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not reserve_result.allowed:
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] reserve DENIED user=%s usage_type=%s "
|
||||||
|
"reserve=%d used=%d limit=%d remaining=%d",
|
||||||
|
user_id,
|
||||||
|
usage_type,
|
||||||
|
reserve_micros,
|
||||||
|
reserve_result.used,
|
||||||
|
reserve_result.limit,
|
||||||
|
reserve_result.remaining,
|
||||||
|
)
|
||||||
|
raise QuotaInsufficientError(
|
||||||
|
usage_type=usage_type,
|
||||||
|
used_micros=reserve_result.used,
|
||||||
|
limit_micros=reserve_result.limit,
|
||||||
|
remaining_micros=reserve_result.remaining,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] reserve OK user=%s usage_type=%s reserve_micros=%d "
|
||||||
|
"(remaining=%d)",
|
||||||
|
user_id,
|
||||||
|
usage_type,
|
||||||
|
reserve_micros,
|
||||||
|
reserve_result.remaining,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
yield acc
|
||||||
|
except BaseException:
|
||||||
|
# Release on any failure (including QuotaInsufficientError raised
|
||||||
|
# from a downstream call, asyncio cancellation, etc.). We use
|
||||||
|
# BaseException so cancellation also releases.
|
||||||
|
try:
|
||||||
|
async with session_factory() as quota_session:
|
||||||
|
await TokenQuotaService.premium_release(
|
||||||
|
db_session=quota_session,
|
||||||
|
user_id=user_id,
|
||||||
|
reserved_micros=reserve_micros,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"[billable_call] premium_release failed for user=%s "
|
||||||
|
"reserve_micros=%d (reservation will be GC'd by quota "
|
||||||
|
"reconciliation if/when implemented)",
|
||||||
|
user_id,
|
||||||
|
reserve_micros,
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
# ---------- Success: finalize + audit ----------------------------
|
||||||
|
actual_micros = acc.total_cost_micros
|
||||||
|
try:
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] finalize start user=%s usage_type=%s actual=%d "
|
||||||
|
"reserved=%d thread=%s",
|
||||||
|
user_id,
|
||||||
|
usage_type,
|
||||||
|
actual_micros,
|
||||||
|
reserve_micros,
|
||||||
|
thread_id,
|
||||||
|
)
|
||||||
|
async with session_factory() as quota_session:
|
||||||
|
final_result = await TokenQuotaService.premium_finalize(
|
||||||
|
db_session=quota_session,
|
||||||
|
user_id=user_id,
|
||||||
|
request_id=request_id,
|
||||||
|
actual_micros=actual_micros,
|
||||||
|
reserved_micros=reserve_micros,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"[billable_call] finalize user=%s usage_type=%s actual=%d "
|
||||||
|
"reserved=%d → used=%d/%d (remaining=%d)",
|
||||||
|
user_id,
|
||||||
|
usage_type,
|
||||||
|
actual_micros,
|
||||||
|
reserve_micros,
|
||||||
|
final_result.used,
|
||||||
|
final_result.limit,
|
||||||
|
final_result.remaining,
|
||||||
|
)
|
||||||
|
except Exception as finalize_exc:
|
||||||
|
# Last-ditch: if finalize itself fails, we must at least release
|
||||||
|
# so the reservation doesn't leak.
|
||||||
|
logger.exception(
|
||||||
|
"[billable_call] premium_finalize failed for user=%s; "
|
||||||
|
"attempting release",
|
||||||
|
user_id,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
async with session_factory() as quota_session:
|
||||||
|
await TokenQuotaService.premium_release(
|
||||||
|
db_session=quota_session,
|
||||||
|
user_id=user_id,
|
||||||
|
reserved_micros=reserve_micros,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception(
|
||||||
|
"[billable_call] release after finalize failure ALSO failed "
|
||||||
|
"for user=%s",
|
||||||
|
user_id,
|
||||||
|
)
|
||||||
|
raise BillingSettlementError(
|
||||||
|
usage_type=usage_type,
|
||||||
|
user_id=user_id,
|
||||||
|
cause=finalize_exc,
|
||||||
|
) from finalize_exc
|
||||||
|
|
||||||
|
await _record_audit_best_effort(
|
||||||
|
session_factory=session_factory,
|
||||||
|
usage_type=usage_type,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
user_id=user_id,
|
||||||
|
prompt_tokens=acc.total_prompt_tokens,
|
||||||
|
completion_tokens=acc.total_completion_tokens,
|
||||||
|
total_tokens=acc.grand_total,
|
||||||
|
cost_micros=actual_micros,
|
||||||
|
model_breakdown=acc.per_message_summary(),
|
||||||
|
call_details=call_details,
|
||||||
|
thread_id=thread_id,
|
||||||
|
message_id=message_id,
|
||||||
|
audit_label="premium",
|
||||||
|
timeout_seconds=audit_timeout_seconds,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _resolve_agent_billing_for_search_space(
|
||||||
|
session: AsyncSession,
|
||||||
|
search_space_id: int,
|
||||||
|
*,
|
||||||
|
thread_id: int | None = None,
|
||||||
|
) -> tuple[UUID, str, str]:
|
||||||
|
"""Resolve ``(owner_user_id, billing_tier, base_model)`` for the search-space
|
||||||
|
agent LLM.
|
||||||
|
|
||||||
|
Used by Celery tasks (podcast generation, video presentation) to bill the
|
||||||
|
search-space owner's premium credit pool when the agent LLM is premium.
|
||||||
|
|
||||||
|
Resolution rules mirror chat at ``stream_new_chat.py:2294-2351``:
|
||||||
|
|
||||||
|
- Search space not found / no ``agent_llm_id``: raise ``ValueError``.
|
||||||
|
- **Auto mode** (``id == AUTO_FASTEST_ID == 0``):
|
||||||
|
* ``thread_id`` is set: delegate to
|
||||||
|
``resolve_or_get_pinned_llm_config_id`` (the same call chat uses) and
|
||||||
|
recurse into the resolved id. Reuses chat's existing pin if present
|
||||||
|
so the same model bills for chat + downstream podcast/video. If the
|
||||||
|
user is not premium-eligible, the pin service auto-restricts to free
|
||||||
|
deployments — denial only happens later in
|
||||||
|
``billable_call.premium_reserve`` if the pin really is premium and
|
||||||
|
credit ran out mid-flow.
|
||||||
|
* ``thread_id`` is None: fallback to ``("free", "auto")``. Forward-compat
|
||||||
|
for any future direct-API path; today both Celery tasks always pass
|
||||||
|
``thread_id``.
|
||||||
|
- **Negative id** (global YAML / OpenRouter): ``cfg["billing_tier"]``
|
||||||
|
(defaults to ``"free"`` via ``app/config/__init__.py:52`` setdefault),
|
||||||
|
``base_model = litellm_params.get("base_model") or model_name`` —
|
||||||
|
NOT provider-prefixed, matching chat's cost-map lookup convention.
|
||||||
|
- **Positive id** (user BYOK ``NewLLMConfig``): always free (matches
|
||||||
|
``AgentConfig.from_new_llm_config`` which hard-codes ``billing_tier="free"``);
|
||||||
|
``base_model`` from ``litellm_params`` or ``model_name``.
|
||||||
|
|
||||||
|
Note on imports: ``llm_service``, ``auto_model_pin_service``, and
|
||||||
|
``llm_router_service`` are imported lazily inside the function body to
|
||||||
|
avoid hoisting litellm side-effects (``litellm.callbacks =
|
||||||
|
[token_tracker]``, ``litellm.drop_params``, etc.) into
|
||||||
|
``billable_calls.py``'s module load path.
|
||||||
|
"""
|
||||||
|
from sqlalchemy import select
|
||||||
|
|
||||||
|
from app.db import NewLLMConfig, SearchSpace
|
||||||
|
|
||||||
|
result = await session.execute(
|
||||||
|
select(SearchSpace).where(SearchSpace.id == search_space_id)
|
||||||
|
)
|
||||||
|
search_space = result.scalars().first()
|
||||||
|
if search_space is None:
|
||||||
|
raise ValueError(f"Search space {search_space_id} not found")
|
||||||
|
|
||||||
|
agent_llm_id = search_space.agent_llm_id
|
||||||
|
if agent_llm_id is None:
|
||||||
|
raise ValueError(
|
||||||
|
f"Search space {search_space_id} has no agent_llm_id configured"
|
||||||
|
)
|
||||||
|
|
||||||
|
owner_user_id: UUID = search_space.user_id
|
||||||
|
|
||||||
|
from app.services.auto_model_pin_service import (
|
||||||
|
AUTO_FASTEST_ID,
|
||||||
|
resolve_or_get_pinned_llm_config_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
if agent_llm_id == AUTO_FASTEST_ID:
|
||||||
|
if thread_id is None:
|
||||||
|
return owner_user_id, "free", "auto"
|
||||||
|
try:
|
||||||
|
resolution = await resolve_or_get_pinned_llm_config_id(
|
||||||
|
session,
|
||||||
|
thread_id=thread_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
user_id=str(owner_user_id),
|
||||||
|
selected_llm_config_id=AUTO_FASTEST_ID,
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(
|
||||||
|
"[agent_billing] Auto-mode pin resolution failed for "
|
||||||
|
"search_space=%s thread=%s; falling back to free",
|
||||||
|
search_space_id,
|
||||||
|
thread_id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
return owner_user_id, "free", "auto"
|
||||||
|
agent_llm_id = resolution.resolved_llm_config_id
|
||||||
|
|
||||||
|
if agent_llm_id < 0:
|
||||||
|
from app.services.llm_service import get_global_llm_config
|
||||||
|
|
||||||
|
cfg = get_global_llm_config(agent_llm_id) or {}
|
||||||
|
billing_tier = str(cfg.get("billing_tier", "free")).lower()
|
||||||
|
litellm_params = cfg.get("litellm_params") or {}
|
||||||
|
base_model = litellm_params.get("base_model") or cfg.get("model_name") or ""
|
||||||
|
return owner_user_id, billing_tier, base_model
|
||||||
|
|
||||||
|
nlc_result = await session.execute(
|
||||||
|
select(NewLLMConfig).where(
|
||||||
|
NewLLMConfig.id == agent_llm_id,
|
||||||
|
NewLLMConfig.search_space_id == search_space_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
nlc = nlc_result.scalars().first()
|
||||||
|
base_model = ""
|
||||||
|
if nlc is not None:
|
||||||
|
litellm_params = nlc.litellm_params or {}
|
||||||
|
base_model = litellm_params.get("base_model") or nlc.model_name or ""
|
||||||
|
return owner_user_id, "free", base_model
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"BillingSettlementError",
|
||||||
|
"QuotaInsufficientError",
|
||||||
|
"_resolve_agent_billing_for_search_space",
|
||||||
|
"billable_call",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Re-export the config knob so callers don't have to import config just for
|
||||||
|
# the default image reserve.
|
||||||
|
DEFAULT_IMAGE_RESERVE_MICROS = config.QUOTA_DEFAULT_IMAGE_RESERVE_MICROS
|
||||||
|
|
@ -408,12 +408,37 @@ class ComposioService:
|
||||||
files = []
|
files = []
|
||||||
next_token = None
|
next_token = None
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
inner_data = data.get("data", data)
|
||||||
|
response_data = (
|
||||||
|
inner_data.get("response_data", {})
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else {}
|
||||||
|
)
|
||||||
# Try direct access first, then nested
|
# Try direct access first, then nested
|
||||||
files = data.get("files", []) or data.get("data", {}).get("files", [])
|
files = (
|
||||||
|
data.get("files", [])
|
||||||
|
or (
|
||||||
|
inner_data.get("files", [])
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
or response_data.get("files", [])
|
||||||
|
)
|
||||||
next_token = (
|
next_token = (
|
||||||
data.get("nextPageToken")
|
data.get("nextPageToken")
|
||||||
or data.get("next_page_token")
|
or data.get("next_page_token")
|
||||||
or data.get("data", {}).get("nextPageToken")
|
or (
|
||||||
|
inner_data.get("nextPageToken")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
inner_data.get("next_page_token")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or response_data.get("nextPageToken")
|
||||||
|
or response_data.get("next_page_token")
|
||||||
)
|
)
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
files = data
|
files = data
|
||||||
|
|
@ -819,24 +844,61 @@ class ComposioService:
|
||||||
next_token = None
|
next_token = None
|
||||||
result_size_estimate = None
|
result_size_estimate = None
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
inner_data = data.get("data", data)
|
||||||
|
response_data = (
|
||||||
|
inner_data.get("response_data", {})
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else {}
|
||||||
|
)
|
||||||
messages = (
|
messages = (
|
||||||
data.get("messages", [])
|
data.get("messages", [])
|
||||||
or data.get("data", {}).get("messages", [])
|
or (
|
||||||
|
inner_data.get("messages", [])
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
or response_data.get("messages", [])
|
||||||
or data.get("emails", [])
|
or data.get("emails", [])
|
||||||
|
or (
|
||||||
|
inner_data.get("emails", [])
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
or response_data.get("emails", [])
|
||||||
)
|
)
|
||||||
# Check for pagination token in various possible locations
|
# Check for pagination token in various possible locations
|
||||||
next_token = (
|
next_token = (
|
||||||
data.get("nextPageToken")
|
data.get("nextPageToken")
|
||||||
or data.get("next_page_token")
|
or data.get("next_page_token")
|
||||||
or data.get("data", {}).get("nextPageToken")
|
or (
|
||||||
or data.get("data", {}).get("next_page_token")
|
inner_data.get("nextPageToken")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
inner_data.get("next_page_token")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or response_data.get("nextPageToken")
|
||||||
|
or response_data.get("next_page_token")
|
||||||
)
|
)
|
||||||
# Extract resultSizeEstimate if available (Gmail API provides this)
|
# Extract resultSizeEstimate if available (Gmail API provides this)
|
||||||
result_size_estimate = (
|
result_size_estimate = (
|
||||||
data.get("resultSizeEstimate")
|
data.get("resultSizeEstimate")
|
||||||
or data.get("result_size_estimate")
|
or data.get("result_size_estimate")
|
||||||
or data.get("data", {}).get("resultSizeEstimate")
|
or (
|
||||||
or data.get("data", {}).get("result_size_estimate")
|
inner_data.get("resultSizeEstimate")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or (
|
||||||
|
inner_data.get("result_size_estimate")
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
or response_data.get("resultSizeEstimate")
|
||||||
|
or response_data.get("result_size_estimate")
|
||||||
)
|
)
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
messages = data
|
messages = data
|
||||||
|
|
@ -864,7 +926,7 @@ class ComposioService:
|
||||||
try:
|
try:
|
||||||
result = await self.execute_tool(
|
result = await self.execute_tool(
|
||||||
connected_account_id=connected_account_id,
|
connected_account_id=connected_account_id,
|
||||||
tool_name="GMAIL_GET_MESSAGE_BY_MESSAGE_ID",
|
tool_name="GMAIL_FETCH_MESSAGE_BY_MESSAGE_ID",
|
||||||
params={"message_id": message_id}, # snake_case
|
params={"message_id": message_id}, # snake_case
|
||||||
entity_id=entity_id,
|
entity_id=entity_id,
|
||||||
)
|
)
|
||||||
|
|
@ -872,7 +934,13 @@ class ComposioService:
|
||||||
if not result.get("success"):
|
if not result.get("success"):
|
||||||
return None, result.get("error", "Unknown error")
|
return None, result.get("error", "Unknown error")
|
||||||
|
|
||||||
return result.get("data"), None
|
data = result.get("data")
|
||||||
|
if isinstance(data, dict):
|
||||||
|
inner_data = data.get("data", data)
|
||||||
|
if isinstance(inner_data, dict):
|
||||||
|
return inner_data.get("response_data", inner_data), None
|
||||||
|
|
||||||
|
return data, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to get Gmail message detail: {e!s}")
|
logger.error(f"Failed to get Gmail message detail: {e!s}")
|
||||||
|
|
@ -928,10 +996,27 @@ class ComposioService:
|
||||||
# Try different possible response structures
|
# Try different possible response structures
|
||||||
events = []
|
events = []
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
|
inner_data = data.get("data", data)
|
||||||
|
response_data = (
|
||||||
|
inner_data.get("response_data", {})
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else {}
|
||||||
|
)
|
||||||
events = (
|
events = (
|
||||||
data.get("items", [])
|
data.get("items", [])
|
||||||
or data.get("data", {}).get("items", [])
|
or (
|
||||||
|
inner_data.get("items", [])
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
or response_data.get("items", [])
|
||||||
or data.get("events", [])
|
or data.get("events", [])
|
||||||
|
or (
|
||||||
|
inner_data.get("events", [])
|
||||||
|
if isinstance(inner_data, dict)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
or response_data.get("events", [])
|
||||||
)
|
)
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
events = data
|
events = data
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import os
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from threading import Lock
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -2769,12 +2771,22 @@ class ConnectorService:
|
||||||
"""
|
"""
|
||||||
Get all available (enabled) connector types for a search space.
|
Get all available (enabled) connector types for a search space.
|
||||||
|
|
||||||
|
Phase 1.4: results are cached per ``search_space_id`` for
|
||||||
|
:data:`_DISCOVERY_TTL_SECONDS`. Cache key is independent of session
|
||||||
|
identity — the cached value is plain data, safe to share across
|
||||||
|
requests. Invalidate on connector add/update/delete via
|
||||||
|
:func:`invalidate_connector_discovery_cache`.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
search_space_id: The search space ID
|
search_space_id: The search space ID
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of SearchSourceConnectorType enums for enabled connectors
|
List of SearchSourceConnectorType enums for enabled connectors
|
||||||
"""
|
"""
|
||||||
|
cached = _get_cached_connectors(search_space_id)
|
||||||
|
if cached is not None:
|
||||||
|
return list(cached)
|
||||||
|
|
||||||
query = (
|
query = (
|
||||||
select(SearchSourceConnector.connector_type)
|
select(SearchSourceConnector.connector_type)
|
||||||
.filter(
|
.filter(
|
||||||
|
|
@ -2784,8 +2796,9 @@ class ConnectorService:
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await self.session.execute(query)
|
result = await self.session.execute(query)
|
||||||
connector_types = result.scalars().all()
|
connector_types = list(result.scalars().all())
|
||||||
return list(connector_types)
|
_set_cached_connectors(search_space_id, connector_types)
|
||||||
|
return connector_types
|
||||||
|
|
||||||
async def get_available_document_types(
|
async def get_available_document_types(
|
||||||
self,
|
self,
|
||||||
|
|
@ -2794,12 +2807,22 @@ class ConnectorService:
|
||||||
"""
|
"""
|
||||||
Get all document types that have at least one document in the search space.
|
Get all document types that have at least one document in the search space.
|
||||||
|
|
||||||
|
Phase 1.4: cached per ``search_space_id`` for
|
||||||
|
:data:`_DISCOVERY_TTL_SECONDS`. Invalidate via
|
||||||
|
:func:`invalidate_connector_discovery_cache` when a connector
|
||||||
|
finishes indexing new documents (or document types are otherwise
|
||||||
|
added/removed).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
search_space_id: The search space ID
|
search_space_id: The search space ID
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of document type strings that have documents indexed
|
List of document type strings that have documents indexed
|
||||||
"""
|
"""
|
||||||
|
cached = _get_cached_doc_types(search_space_id)
|
||||||
|
if cached is not None:
|
||||||
|
return list(cached)
|
||||||
|
|
||||||
from sqlalchemy import distinct
|
from sqlalchemy import distinct
|
||||||
|
|
||||||
from app.db import Document
|
from app.db import Document
|
||||||
|
|
@ -2809,5 +2832,164 @@ class ConnectorService:
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await self.session.execute(query)
|
result = await self.session.execute(query)
|
||||||
doc_types = result.scalars().all()
|
doc_types = [str(dt) for dt in result.scalars().all()]
|
||||||
return [str(dt) for dt in doc_types]
|
_set_cached_doc_types(search_space_id, doc_types)
|
||||||
|
return doc_types
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Connector / document-type discovery TTL cache (Phase 1.4)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
#
|
||||||
|
# Both ``get_available_connectors`` and ``get_available_document_types`` are
|
||||||
|
# called on EVERY chat turn from ``create_surfsense_deep_agent``. Each query
|
||||||
|
# hits Postgres and contributes to per-turn agent build latency. Their
|
||||||
|
# results change infrequently — only when the user adds/edits/removes a
|
||||||
|
# connector, or when an indexer commits a new document type. A short TTL
|
||||||
|
# cache (default 30s, env-tunable) collapses N concurrent calls into one
|
||||||
|
# DB roundtrip with bounded staleness.
|
||||||
|
#
|
||||||
|
# Invalidation: connector mutation routes (create / update / delete) call
|
||||||
|
# ``invalidate_connector_discovery_cache(search_space_id)`` to clear the
|
||||||
|
# entry for the affected space. Multi-replica deployments still pay one
|
||||||
|
# DB roundtrip per replica per TTL window, which is fine — staleness is
|
||||||
|
# bounded and the alternative (cross-replica fanout) is not worth the
|
||||||
|
# coupling here.
|
||||||
|
|
||||||
|
_DISCOVERY_TTL_SECONDS: float = float(
|
||||||
|
os.getenv("SURFSENSE_CONNECTOR_DISCOVERY_TTL_SECONDS", "30")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Per-search-space caches. Keyed by ``search_space_id``; value is
|
||||||
|
# ``(expires_at_monotonic, payload)``. Plain dicts protected by a lock —
|
||||||
|
# read-mostly workload, sub-microsecond contention.
|
||||||
|
_connectors_cache: dict[int, tuple[float, list[SearchSourceConnectorType]]] = {}
|
||||||
|
_doc_types_cache: dict[int, tuple[float, list[str]]] = {}
|
||||||
|
_cache_lock = Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_connectors(
|
||||||
|
search_space_id: int,
|
||||||
|
) -> list[SearchSourceConnectorType] | None:
|
||||||
|
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||||
|
return None
|
||||||
|
with _cache_lock:
|
||||||
|
entry = _connectors_cache.get(search_space_id)
|
||||||
|
if entry is None:
|
||||||
|
return None
|
||||||
|
expires_at, payload = entry
|
||||||
|
if time.monotonic() >= expires_at:
|
||||||
|
_connectors_cache.pop(search_space_id, None)
|
||||||
|
return None
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def _set_cached_connectors(
|
||||||
|
search_space_id: int, payload: list[SearchSourceConnectorType]
|
||||||
|
) -> None:
|
||||||
|
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||||
|
return
|
||||||
|
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
|
||||||
|
with _cache_lock:
|
||||||
|
_connectors_cache[search_space_id] = (expires_at, list(payload))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_doc_types(search_space_id: int) -> list[str] | None:
|
||||||
|
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||||
|
return None
|
||||||
|
with _cache_lock:
|
||||||
|
entry = _doc_types_cache.get(search_space_id)
|
||||||
|
if entry is None:
|
||||||
|
return None
|
||||||
|
expires_at, payload = entry
|
||||||
|
if time.monotonic() >= expires_at:
|
||||||
|
_doc_types_cache.pop(search_space_id, None)
|
||||||
|
return None
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def _set_cached_doc_types(search_space_id: int, payload: list[str]) -> None:
|
||||||
|
if _DISCOVERY_TTL_SECONDS <= 0:
|
||||||
|
return
|
||||||
|
expires_at = time.monotonic() + _DISCOVERY_TTL_SECONDS
|
||||||
|
with _cache_lock:
|
||||||
|
_doc_types_cache[search_space_id] = (expires_at, list(payload))
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate_connector_discovery_cache(search_space_id: int | None = None) -> None:
|
||||||
|
"""Drop cached discovery results for ``search_space_id`` (or all spaces).
|
||||||
|
|
||||||
|
Connector CRUD routes / indexer pipelines call this when they mutate
|
||||||
|
the rows backing :func:`ConnectorService.get_available_connectors` /
|
||||||
|
:func:`get_available_document_types`. ``None`` clears every space —
|
||||||
|
useful in tests and on bulk imports.
|
||||||
|
"""
|
||||||
|
with _cache_lock:
|
||||||
|
if search_space_id is None:
|
||||||
|
_connectors_cache.clear()
|
||||||
|
_doc_types_cache.clear()
|
||||||
|
else:
|
||||||
|
_connectors_cache.pop(search_space_id, None)
|
||||||
|
_doc_types_cache.pop(search_space_id, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _invalidate_connectors_only(search_space_id: int | None = None) -> None:
|
||||||
|
with _cache_lock:
|
||||||
|
if search_space_id is None:
|
||||||
|
_connectors_cache.clear()
|
||||||
|
else:
|
||||||
|
_connectors_cache.pop(search_space_id, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _invalidate_doc_types_only(search_space_id: int | None = None) -> None:
|
||||||
|
with _cache_lock:
|
||||||
|
if search_space_id is None:
|
||||||
|
_doc_types_cache.clear()
|
||||||
|
else:
|
||||||
|
_doc_types_cache.pop(search_space_id, None)
|
||||||
|
|
||||||
|
|
||||||
|
def _register_invalidation_listeners() -> None:
|
||||||
|
"""Wire SQLAlchemy ORM events so cache stays consistent automatically.
|
||||||
|
|
||||||
|
Listening on ``after_insert`` / ``after_update`` / ``after_delete``
|
||||||
|
means every successful INSERT/UPDATE/DELETE that goes through the ORM
|
||||||
|
invalidates the affected search space's cached discovery payload —
|
||||||
|
no need to sprinkle ``invalidate_*`` calls across 30+ connector
|
||||||
|
routes. Bulk operations that bypass the ORM (e.g.
|
||||||
|
``session.execute(insert(...))`` without a mapped object) still need
|
||||||
|
explicit invalidation; document indexers already commit through the
|
||||||
|
ORM so document-type discovery is covered.
|
||||||
|
"""
|
||||||
|
from sqlalchemy import event
|
||||||
|
|
||||||
|
# Imported here (not at module top) to avoid a circular import:
|
||||||
|
# app.services.connector_service is itself imported from app.db's
|
||||||
|
# ecosystem indirectly via several CRUD modules.
|
||||||
|
from app.db import Document, SearchSourceConnector
|
||||||
|
|
||||||
|
def _connector_changed(_mapper, _connection, target) -> None:
|
||||||
|
sid = getattr(target, "search_space_id", None)
|
||||||
|
if sid is not None:
|
||||||
|
_invalidate_connectors_only(int(sid))
|
||||||
|
|
||||||
|
def _document_changed(_mapper, _connection, target) -> None:
|
||||||
|
sid = getattr(target, "search_space_id", None)
|
||||||
|
if sid is not None:
|
||||||
|
_invalidate_doc_types_only(int(sid))
|
||||||
|
|
||||||
|
for evt in ("after_insert", "after_update", "after_delete"):
|
||||||
|
event.listen(SearchSourceConnector, evt, _connector_changed)
|
||||||
|
event.listen(Document, evt, _document_changed)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
_register_invalidation_listeners()
|
||||||
|
except Exception: # pragma: no cover - defensive; never block module import
|
||||||
|
import logging as _logging
|
||||||
|
|
||||||
|
_logging.getLogger(__name__).exception(
|
||||||
|
"Failed to register connector discovery cache invalidation listeners; "
|
||||||
|
"stale cache risk: explicit invalidate_connector_discovery_cache calls "
|
||||||
|
"may be required."
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from app.db import (
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
)
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -78,14 +78,49 @@ class GmailToolMetadataService:
|
||||||
def __init__(self, db_session: AsyncSession):
|
def __init__(self, db_session: AsyncSession):
|
||||||
self._db_session = db_session
|
self._db_session = db_session
|
||||||
|
|
||||||
async def _build_credentials(self, connector: SearchSourceConnector) -> Credentials:
|
def _is_composio_connector(self, connector: SearchSourceConnector) -> bool:
|
||||||
if (
|
return (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GMAIL_CONNECTOR
|
||||||
):
|
)
|
||||||
|
|
||||||
|
def _get_composio_connected_account_id(
|
||||||
|
self, connector: SearchSourceConnector
|
||||||
|
) -> str:
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
return build_composio_credentials(cca_id)
|
raise ValueError("Composio connected_account_id not found")
|
||||||
|
return cca_id
|
||||||
|
|
||||||
|
def _unwrap_composio_data(self, data: Any) -> Any:
|
||||||
|
if isinstance(data, dict):
|
||||||
|
inner = data.get("data", data)
|
||||||
|
if isinstance(inner, dict):
|
||||||
|
return inner.get("response_data", inner)
|
||||||
|
return inner
|
||||||
|
return data
|
||||||
|
|
||||||
|
async def _execute_composio_gmail_tool(
|
||||||
|
self,
|
||||||
|
connector: SearchSourceConnector,
|
||||||
|
tool_name: str,
|
||||||
|
params: dict[str, Any],
|
||||||
|
) -> tuple[Any, str | None]:
|
||||||
|
result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=self._get_composio_connected_account_id(connector),
|
||||||
|
tool_name=tool_name,
|
||||||
|
params=params,
|
||||||
|
entity_id=f"surfsense_{connector.user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
return None, result.get("error", "Unknown Composio Gmail error")
|
||||||
|
return self._unwrap_composio_data(result.get("data")), None
|
||||||
|
|
||||||
|
async def _build_credentials(self, connector: SearchSourceConnector) -> Credentials:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
raise ValueError(
|
||||||
|
"Composio Gmail connectors must use Composio tool execution"
|
||||||
|
)
|
||||||
|
|
||||||
config_data = dict(connector.config)
|
config_data = dict(connector.config)
|
||||||
|
|
||||||
|
|
@ -139,6 +174,12 @@ class GmailToolMetadataService:
|
||||||
if not connector:
|
if not connector:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
_profile, error = await self._execute_composio_gmail_tool(
|
||||||
|
connector, "GMAIL_GET_PROFILE", {"user_id": "me"}
|
||||||
|
)
|
||||||
|
return bool(error)
|
||||||
|
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
service = build("gmail", "v1", credentials=creds)
|
service = build("gmail", "v1", credentials=creds)
|
||||||
await asyncio.get_event_loop().run_in_executor(
|
await asyncio.get_event_loop().run_in_executor(
|
||||||
|
|
@ -221,6 +262,13 @@ class GmailToolMetadataService:
|
||||||
)
|
)
|
||||||
connector = result.scalar_one_or_none()
|
connector = result.scalar_one_or_none()
|
||||||
if connector:
|
if connector:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
profile, error = await self._execute_composio_gmail_tool(
|
||||||
|
connector, "GMAIL_GET_PROFILE", {"user_id": "me"}
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
else:
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
service = build("gmail", "v1", credentials=creds)
|
service = build("gmail", "v1", credentials=creds)
|
||||||
profile = await asyncio.get_event_loop().run_in_executor(
|
profile = await asyncio.get_event_loop().run_in_executor(
|
||||||
|
|
@ -298,6 +346,23 @@ class GmailToolMetadataService:
|
||||||
Returns ``None`` on any failure so callers can degrade gracefully.
|
Returns ``None`` on any failure so callers can degrade gracefully.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
if not draft_id:
|
||||||
|
draft_id = await self._find_composio_draft_id(connector, message_id)
|
||||||
|
if not draft_id:
|
||||||
|
return None
|
||||||
|
|
||||||
|
draft, error = await self._execute_composio_gmail_tool(
|
||||||
|
connector,
|
||||||
|
"GMAIL_GET_DRAFT",
|
||||||
|
{"user_id": "me", "draft_id": draft_id, "format": "full"},
|
||||||
|
)
|
||||||
|
if error or not isinstance(draft, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
payload = draft.get("message", {}).get("payload", {})
|
||||||
|
return self._extract_body_from_payload(payload)
|
||||||
|
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
service = build("gmail", "v1", credentials=creds)
|
service = build("gmail", "v1", credentials=creds)
|
||||||
|
|
||||||
|
|
@ -326,6 +391,33 @@ class GmailToolMetadataService:
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
async def _find_composio_draft_id(
|
||||||
|
self, connector: SearchSourceConnector, message_id: str
|
||||||
|
) -> str | None:
|
||||||
|
page_token = ""
|
||||||
|
while True:
|
||||||
|
params: dict[str, Any] = {
|
||||||
|
"user_id": "me",
|
||||||
|
"max_results": 100,
|
||||||
|
"verbose": False,
|
||||||
|
}
|
||||||
|
if page_token:
|
||||||
|
params["page_token"] = page_token
|
||||||
|
|
||||||
|
data, error = await self._execute_composio_gmail_tool(
|
||||||
|
connector, "GMAIL_LIST_DRAFTS", params
|
||||||
|
)
|
||||||
|
if error or not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
|
||||||
|
for draft in data.get("drafts", []):
|
||||||
|
if draft.get("message", {}).get("id") == message_id:
|
||||||
|
return draft.get("id")
|
||||||
|
|
||||||
|
page_token = data.get("nextPageToken") or data.get("next_page_token") or ""
|
||||||
|
if not page_token:
|
||||||
|
return None
|
||||||
|
|
||||||
async def _find_draft_id(self, service: Any, message_id: str) -> str | None:
|
async def _find_draft_id(self, service: Any, message_id: str) -> str | None:
|
||||||
"""Resolve a draft ID from its message ID by scanning drafts.list."""
|
"""Resolve a draft ID from its message ID by scanning drafts.list."""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ from app.db import (
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
)
|
)
|
||||||
|
from app.services.composio_service import ComposioService
|
||||||
from app.utils.document_converters import (
|
from app.utils.document_converters import (
|
||||||
create_document_chunks,
|
create_document_chunks,
|
||||||
embed_text,
|
embed_text,
|
||||||
|
|
@ -21,7 +22,6 @@ from app.utils.document_converters import (
|
||||||
generate_document_summary,
|
generate_document_summary,
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -203,15 +203,38 @@ class GoogleCalendarKBSyncService:
|
||||||
logger.warning("Document %s not found in KB", document_id)
|
logger.warning("Document %s not found in KB", document_id)
|
||||||
return {"status": "not_indexed"}
|
return {"status": "not_indexed"}
|
||||||
|
|
||||||
|
calendar_id = (document.document_metadata or {}).get(
|
||||||
|
"calendar_id"
|
||||||
|
) or "primary"
|
||||||
|
connector = await self._get_connector(connector_id)
|
||||||
|
if (
|
||||||
|
connector.connector_type
|
||||||
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
|
):
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
raise ValueError("Composio connected_account_id not found")
|
||||||
|
composio_result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=cca_id,
|
||||||
|
tool_name="GOOGLECALENDAR_EVENTS_GET",
|
||||||
|
params={"calendar_id": calendar_id, "event_id": event_id},
|
||||||
|
entity_id=f"surfsense_{user_id}",
|
||||||
|
)
|
||||||
|
if not composio_result.get("success"):
|
||||||
|
raise RuntimeError(
|
||||||
|
composio_result.get("error", "Unknown Composio Calendar error")
|
||||||
|
)
|
||||||
|
live_event = composio_result.get("data", {})
|
||||||
|
if isinstance(live_event, dict):
|
||||||
|
live_event = live_event.get("data", live_event)
|
||||||
|
if isinstance(live_event, dict):
|
||||||
|
live_event = live_event.get("response_data", live_event)
|
||||||
|
else:
|
||||||
creds = await self._build_credentials_for_connector(connector_id)
|
creds = await self._build_credentials_for_connector(connector_id)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
service = await loop.run_in_executor(
|
service = await loop.run_in_executor(
|
||||||
None, lambda: build("calendar", "v3", credentials=creds)
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
)
|
)
|
||||||
|
|
||||||
calendar_id = (document.document_metadata or {}).get(
|
|
||||||
"calendar_id"
|
|
||||||
) or "primary"
|
|
||||||
live_event = await loop.run_in_executor(
|
live_event = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -322,7 +345,7 @@ class GoogleCalendarKBSyncService:
|
||||||
await self.db_session.rollback()
|
await self.db_session.rollback()
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
async def _build_credentials_for_connector(self, connector_id: int) -> Credentials:
|
async def _get_connector(self, connector_id: int) -> SearchSourceConnector:
|
||||||
result = await self.db_session.execute(
|
result = await self.db_session.execute(
|
||||||
select(SearchSourceConnector).where(
|
select(SearchSourceConnector).where(
|
||||||
SearchSourceConnector.id == connector_id
|
SearchSourceConnector.id == connector_id
|
||||||
|
|
@ -331,15 +354,17 @@ class GoogleCalendarKBSyncService:
|
||||||
connector = result.scalar_one_or_none()
|
connector = result.scalar_one_or_none()
|
||||||
if not connector:
|
if not connector:
|
||||||
raise ValueError(f"Connector {connector_id} not found")
|
raise ValueError(f"Connector {connector_id} not found")
|
||||||
|
return connector
|
||||||
|
|
||||||
|
async def _build_credentials_for_connector(self, connector_id: int) -> Credentials:
|
||||||
|
connector = await self._get_connector(connector_id)
|
||||||
if (
|
if (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
):
|
):
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
raise ValueError(
|
||||||
if cca_id:
|
"Composio Calendar connectors must use Composio tool execution"
|
||||||
return build_composio_credentials(cca_id)
|
)
|
||||||
raise ValueError("Composio connected_account_id not found")
|
|
||||||
|
|
||||||
config_data = dict(connector.config)
|
config_data = dict(connector.config)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from app.db import (
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
)
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -94,15 +94,49 @@ class GoogleCalendarToolMetadataService:
|
||||||
def __init__(self, db_session: AsyncSession):
|
def __init__(self, db_session: AsyncSession):
|
||||||
self._db_session = db_session
|
self._db_session = db_session
|
||||||
|
|
||||||
async def _build_credentials(self, connector: SearchSourceConnector) -> Credentials:
|
def _is_composio_connector(self, connector: SearchSourceConnector) -> bool:
|
||||||
if (
|
return (
|
||||||
connector.connector_type
|
connector.connector_type
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR
|
||||||
):
|
)
|
||||||
|
|
||||||
|
def _get_composio_connected_account_id(
|
||||||
|
self, connector: SearchSourceConnector
|
||||||
|
) -> str:
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
if cca_id:
|
if not cca_id:
|
||||||
return build_composio_credentials(cca_id)
|
|
||||||
raise ValueError("Composio connected_account_id not found")
|
raise ValueError("Composio connected_account_id not found")
|
||||||
|
return cca_id
|
||||||
|
|
||||||
|
async def _execute_composio_calendar_tool(
|
||||||
|
self,
|
||||||
|
connector: SearchSourceConnector,
|
||||||
|
tool_name: str,
|
||||||
|
params: dict,
|
||||||
|
) -> tuple[dict | list | None, str | None]:
|
||||||
|
service = ComposioService()
|
||||||
|
result = await service.execute_tool(
|
||||||
|
connected_account_id=self._get_composio_connected_account_id(connector),
|
||||||
|
tool_name=tool_name,
|
||||||
|
params=params,
|
||||||
|
entity_id=f"surfsense_{connector.user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
return None, result.get("error", "Unknown Composio Calendar error")
|
||||||
|
|
||||||
|
data = result.get("data")
|
||||||
|
if isinstance(data, dict):
|
||||||
|
inner = data.get("data", data)
|
||||||
|
if isinstance(inner, dict):
|
||||||
|
return inner.get("response_data", inner), None
|
||||||
|
return inner, None
|
||||||
|
return data, None
|
||||||
|
|
||||||
|
async def _build_credentials(self, connector: SearchSourceConnector) -> Credentials:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
raise ValueError(
|
||||||
|
"Composio Calendar connectors must use Composio tool execution"
|
||||||
|
)
|
||||||
|
|
||||||
config_data = dict(connector.config)
|
config_data = dict(connector.config)
|
||||||
|
|
||||||
|
|
@ -156,6 +190,14 @@ class GoogleCalendarToolMetadataService:
|
||||||
if not connector:
|
if not connector:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
_data, error = await self._execute_composio_calendar_tool(
|
||||||
|
connector,
|
||||||
|
"GOOGLECALENDAR_GET_CALENDAR",
|
||||||
|
{"calendar_id": "primary"},
|
||||||
|
)
|
||||||
|
return bool(error)
|
||||||
|
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
await loop.run_in_executor(
|
await loop.run_in_executor(
|
||||||
|
|
@ -255,6 +297,23 @@ class GoogleCalendarToolMetadataService:
|
||||||
timezone_str = ""
|
timezone_str = ""
|
||||||
if connector:
|
if connector:
|
||||||
try:
|
try:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
cal_list, cal_error = await self._execute_composio_calendar_tool(
|
||||||
|
connector, "GOOGLECALENDAR_LIST_CALENDARS", {}
|
||||||
|
)
|
||||||
|
if cal_error:
|
||||||
|
raise RuntimeError(cal_error)
|
||||||
|
(
|
||||||
|
settings,
|
||||||
|
settings_error,
|
||||||
|
) = await self._execute_composio_calendar_tool(
|
||||||
|
connector,
|
||||||
|
"GOOGLECALENDAR_SETTINGS_GET",
|
||||||
|
{"setting": "timezone"},
|
||||||
|
)
|
||||||
|
if not settings_error and isinstance(settings, dict):
|
||||||
|
timezone_str = settings.get("value", "")
|
||||||
|
else:
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
service = await loop.run_in_executor(
|
service = await loop.run_in_executor(
|
||||||
|
|
@ -264,7 +323,22 @@ class GoogleCalendarToolMetadataService:
|
||||||
cal_list = await loop.run_in_executor(
|
cal_list = await loop.run_in_executor(
|
||||||
None, lambda: service.calendarList().list().execute()
|
None, lambda: service.calendarList().list().execute()
|
||||||
)
|
)
|
||||||
for cal in cal_list.get("items", []):
|
|
||||||
|
tz_setting = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: service.settings().get(setting="timezone").execute(),
|
||||||
|
)
|
||||||
|
timezone_str = tz_setting.get("value", "")
|
||||||
|
|
||||||
|
calendar_items = []
|
||||||
|
if isinstance(cal_list, dict):
|
||||||
|
calendar_items = (
|
||||||
|
cal_list.get("items") or cal_list.get("calendars") or []
|
||||||
|
)
|
||||||
|
elif isinstance(cal_list, list):
|
||||||
|
calendar_items = cal_list
|
||||||
|
|
||||||
|
for cal in calendar_items:
|
||||||
calendars.append(
|
calendars.append(
|
||||||
{
|
{
|
||||||
"id": cal.get("id", ""),
|
"id": cal.get("id", ""),
|
||||||
|
|
@ -272,12 +346,6 @@ class GoogleCalendarToolMetadataService:
|
||||||
"primary": cal.get("primary", False),
|
"primary": cal.get("primary", False),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
tz_setting = await loop.run_in_executor(
|
|
||||||
None,
|
|
||||||
lambda: service.settings().get(setting="timezone").execute(),
|
|
||||||
)
|
|
||||||
timezone_str = tz_setting.get("value", "")
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Failed to fetch calendars/timezone for connector %s",
|
"Failed to fetch calendars/timezone for connector %s",
|
||||||
|
|
@ -321,12 +389,21 @@ class GoogleCalendarToolMetadataService:
|
||||||
|
|
||||||
event_dict = event.to_dict()
|
event_dict = event.to_dict()
|
||||||
try:
|
try:
|
||||||
|
calendar_id = event.calendar_id or "primary"
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
live_event, error = await self._execute_composio_calendar_tool(
|
||||||
|
connector,
|
||||||
|
"GOOGLECALENDAR_EVENTS_GET",
|
||||||
|
{"calendar_id": calendar_id, "event_id": event.event_id},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
else:
|
||||||
creds = await self._build_credentials(connector)
|
creds = await self._build_credentials(connector)
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
service = await loop.run_in_executor(
|
service = await loop.run_in_executor(
|
||||||
None, lambda: build("calendar", "v3", credentials=creds)
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
)
|
)
|
||||||
calendar_id = event.calendar_id or "primary"
|
|
||||||
live_event = await loop.run_in_executor(
|
live_event = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: (
|
lambda: (
|
||||||
|
|
@ -376,14 +453,32 @@ class GoogleCalendarToolMetadataService:
|
||||||
) -> dict:
|
) -> dict:
|
||||||
resolved = await self._resolve_event(search_space_id, user_id, event_ref)
|
resolved = await self._resolve_event(search_space_id, user_id, event_ref)
|
||||||
if not resolved:
|
if not resolved:
|
||||||
|
live_resolved = await self._resolve_live_event(
|
||||||
|
search_space_id, user_id, event_ref
|
||||||
|
)
|
||||||
|
if not live_resolved:
|
||||||
return {
|
return {
|
||||||
"error": (
|
"error": (
|
||||||
f"Event '{event_ref}' not found in your indexed Google Calendar events. "
|
f"Event '{event_ref}' not found in your indexed or live Google Calendar events. "
|
||||||
"This could mean: (1) the event doesn't exist, (2) it hasn't been indexed yet, "
|
"This could mean: (1) the event doesn't exist, "
|
||||||
"or (3) the event name is different."
|
"(2) the event name is different, or "
|
||||||
|
"(3) the connected calendar account cannot access it."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
connector, live_event = live_resolved
|
||||||
|
account = GoogleCalendarAccount.from_connector(connector)
|
||||||
|
acc_dict = account.to_dict()
|
||||||
|
auth_expired = await self._check_account_health(connector.id)
|
||||||
|
acc_dict["auth_expired"] = auth_expired
|
||||||
|
if auth_expired:
|
||||||
|
await self._persist_auth_expired(connector.id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"account": acc_dict,
|
||||||
|
"event": self._event_dict_from_live_event(live_event),
|
||||||
|
}
|
||||||
|
|
||||||
document, connector = resolved
|
document, connector = resolved
|
||||||
account = GoogleCalendarAccount.from_connector(connector)
|
account = GoogleCalendarAccount.from_connector(connector)
|
||||||
event = GoogleCalendarEvent.from_document(document)
|
event = GoogleCalendarEvent.from_document(document)
|
||||||
|
|
@ -429,3 +524,110 @@ class GoogleCalendarToolMetadataService:
|
||||||
if row:
|
if row:
|
||||||
return row[0], row[1]
|
return row[0], row[1]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
async def _resolve_live_event(
|
||||||
|
self, search_space_id: int, user_id: str, event_ref: str
|
||||||
|
) -> tuple[SearchSourceConnector, dict] | None:
|
||||||
|
result = await self._db_session.execute(
|
||||||
|
select(SearchSourceConnector)
|
||||||
|
.filter(
|
||||||
|
and_(
|
||||||
|
SearchSourceConnector.search_space_id == search_space_id,
|
||||||
|
SearchSourceConnector.user_id == user_id,
|
||||||
|
SearchSourceConnector.connector_type.in_(CALENDAR_CONNECTOR_TYPES),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.order_by(SearchSourceConnector.last_indexed_at.desc())
|
||||||
|
)
|
||||||
|
connectors = result.scalars().all()
|
||||||
|
|
||||||
|
for connector in connectors:
|
||||||
|
try:
|
||||||
|
events = await self._search_live_events(connector, event_ref)
|
||||||
|
except Exception:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to search live calendar events for connector %s",
|
||||||
|
connector.id,
|
||||||
|
exc_info=True,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
continue
|
||||||
|
|
||||||
|
normalized_ref = event_ref.strip().lower()
|
||||||
|
exact_match = next(
|
||||||
|
(
|
||||||
|
event
|
||||||
|
for event in events
|
||||||
|
if event.get("summary", "").strip().lower() == normalized_ref
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
return connector, exact_match or events[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def _search_live_events(
|
||||||
|
self, connector: SearchSourceConnector, event_ref: str
|
||||||
|
) -> list[dict]:
|
||||||
|
if self._is_composio_connector(connector):
|
||||||
|
data, error = await self._execute_composio_calendar_tool(
|
||||||
|
connector,
|
||||||
|
"GOOGLECALENDAR_EVENTS_LIST",
|
||||||
|
{
|
||||||
|
"calendar_id": "primary",
|
||||||
|
"q": event_ref,
|
||||||
|
"max_results": 10,
|
||||||
|
"single_events": True,
|
||||||
|
"order_by": "startTime",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
raise RuntimeError(error)
|
||||||
|
if isinstance(data, dict):
|
||||||
|
return data.get("items") or data.get("events") or []
|
||||||
|
return data if isinstance(data, list) else []
|
||||||
|
|
||||||
|
creds = await self._build_credentials(connector)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
service = await loop.run_in_executor(
|
||||||
|
None, lambda: build("calendar", "v3", credentials=creds)
|
||||||
|
)
|
||||||
|
response = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: (
|
||||||
|
service.events()
|
||||||
|
.list(
|
||||||
|
calendarId="primary",
|
||||||
|
q=event_ref,
|
||||||
|
maxResults=10,
|
||||||
|
singleEvents=True,
|
||||||
|
orderBy="startTime",
|
||||||
|
)
|
||||||
|
.execute()
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return response.get("items", [])
|
||||||
|
|
||||||
|
def _event_dict_from_live_event(self, event: dict) -> dict:
|
||||||
|
start_data = event.get("start", {})
|
||||||
|
end_data = event.get("end", {})
|
||||||
|
return {
|
||||||
|
"event_id": event.get("id", ""),
|
||||||
|
"summary": event.get("summary", "No Title"),
|
||||||
|
"start": start_data.get("dateTime", start_data.get("date", "")),
|
||||||
|
"end": end_data.get("dateTime", end_data.get("date", "")),
|
||||||
|
"description": event.get("description", ""),
|
||||||
|
"location": event.get("location", ""),
|
||||||
|
"attendees": [
|
||||||
|
{
|
||||||
|
"email": attendee.get("email", ""),
|
||||||
|
"responseStatus": attendee.get("responseStatus", ""),
|
||||||
|
}
|
||||||
|
for attendee in event.get("attendees", [])
|
||||||
|
],
|
||||||
|
"calendar_id": event.get("calendarId", "primary"),
|
||||||
|
"document_id": None,
|
||||||
|
"indexed_at": None,
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from app.db import (
|
||||||
SearchSourceConnector,
|
SearchSourceConnector,
|
||||||
SearchSourceConnectorType,
|
SearchSourceConnectorType,
|
||||||
)
|
)
|
||||||
from app.utils.google_credentials import build_composio_credentials
|
from app.services.composio_service import ComposioService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -67,6 +67,42 @@ class GoogleDriveToolMetadataService:
|
||||||
def __init__(self, db_session: AsyncSession):
|
def __init__(self, db_session: AsyncSession):
|
||||||
self._db_session = db_session
|
self._db_session = db_session
|
||||||
|
|
||||||
|
def _is_composio_connector(self, connector: SearchSourceConnector) -> bool:
|
||||||
|
return (
|
||||||
|
connector.connector_type
|
||||||
|
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_composio_connected_account_id(
|
||||||
|
self, connector: SearchSourceConnector
|
||||||
|
) -> str:
|
||||||
|
cca_id = connector.config.get("composio_connected_account_id")
|
||||||
|
if not cca_id:
|
||||||
|
raise ValueError("Composio connected_account_id not found")
|
||||||
|
return cca_id
|
||||||
|
|
||||||
|
async def _execute_composio_drive_tool(
|
||||||
|
self,
|
||||||
|
connector: SearchSourceConnector,
|
||||||
|
tool_name: str,
|
||||||
|
params: dict,
|
||||||
|
) -> tuple[dict | list | None, str | None]:
|
||||||
|
result = await ComposioService().execute_tool(
|
||||||
|
connected_account_id=self._get_composio_connected_account_id(connector),
|
||||||
|
tool_name=tool_name,
|
||||||
|
params=params,
|
||||||
|
entity_id=f"surfsense_{connector.user_id}",
|
||||||
|
)
|
||||||
|
if not result.get("success"):
|
||||||
|
return None, result.get("error", "Unknown Composio Drive error")
|
||||||
|
data = result.get("data")
|
||||||
|
if isinstance(data, dict):
|
||||||
|
inner = data.get("data", data)
|
||||||
|
if isinstance(inner, dict):
|
||||||
|
return inner.get("response_data", inner), None
|
||||||
|
return inner, None
|
||||||
|
return data, None
|
||||||
|
|
||||||
async def get_creation_context(self, search_space_id: int, user_id: str) -> dict:
|
async def get_creation_context(self, search_space_id: int, user_id: str) -> dict:
|
||||||
accounts = await self._get_google_drive_accounts(search_space_id, user_id)
|
accounts = await self._get_google_drive_accounts(search_space_id, user_id)
|
||||||
|
|
||||||
|
|
@ -200,19 +236,21 @@ class GoogleDriveToolMetadataService:
|
||||||
if not connector:
|
if not connector:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
pre_built_creds = None
|
if self._is_composio_connector(connector):
|
||||||
if (
|
_data, error = await self._execute_composio_drive_tool(
|
||||||
connector.connector_type
|
connector,
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
"GOOGLEDRIVE_LIST_FILES",
|
||||||
):
|
{
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
"q": "trashed = false",
|
||||||
if cca_id:
|
"page_size": 1,
|
||||||
pre_built_creds = build_composio_credentials(cca_id)
|
"fields": "files(id)",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return bool(error)
|
||||||
|
|
||||||
client = GoogleDriveClient(
|
client = GoogleDriveClient(
|
||||||
session=self._db_session,
|
session=self._db_session,
|
||||||
connector_id=connector_id,
|
connector_id=connector_id,
|
||||||
credentials=pre_built_creds,
|
|
||||||
)
|
)
|
||||||
await client.list_files(
|
await client.list_files(
|
||||||
query="trashed = false", page_size=1, fields="files(id)"
|
query="trashed = false", page_size=1, fields="files(id)"
|
||||||
|
|
@ -274,19 +312,39 @@ class GoogleDriveToolMetadataService:
|
||||||
parent_folders[connector_id] = []
|
parent_folders[connector_id] = []
|
||||||
continue
|
continue
|
||||||
|
|
||||||
pre_built_creds = None
|
if self._is_composio_connector(connector):
|
||||||
if (
|
data, error = await self._execute_composio_drive_tool(
|
||||||
connector.connector_type
|
connector,
|
||||||
== SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR
|
"GOOGLEDRIVE_LIST_FILES",
|
||||||
):
|
{
|
||||||
cca_id = connector.config.get("composio_connected_account_id")
|
"q": "mimeType = 'application/vnd.google-apps.folder' and trashed = false and 'root' in parents",
|
||||||
if cca_id:
|
"fields": "files(id,name)",
|
||||||
pre_built_creds = build_composio_credentials(cca_id)
|
"page_size": 50,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
if error:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to list folders for connector %s: %s",
|
||||||
|
connector_id,
|
||||||
|
error,
|
||||||
|
)
|
||||||
|
parent_folders[connector_id] = []
|
||||||
|
continue
|
||||||
|
folders = []
|
||||||
|
if isinstance(data, dict):
|
||||||
|
folders = data.get("files", [])
|
||||||
|
elif isinstance(data, list):
|
||||||
|
folders = data
|
||||||
|
parent_folders[connector_id] = [
|
||||||
|
{"folder_id": f["id"], "name": f["name"]}
|
||||||
|
for f in folders
|
||||||
|
if f.get("id") and f.get("name")
|
||||||
|
]
|
||||||
|
continue
|
||||||
|
|
||||||
client = GoogleDriveClient(
|
client = GoogleDriveClient(
|
||||||
session=self._db_session,
|
session=self._db_session,
|
||||||
connector_id=connector_id,
|
connector_id=connector_id,
|
||||||
credentials=pre_built_creds,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
folders, _, error = await client.list_files(
|
folders, _, error = await client.list_files(
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ from typing import Any
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
from litellm.utils import ImageResponse
|
from litellm.utils import ImageResponse
|
||||||
|
|
||||||
|
from app.services.provider_api_base import resolve_api_base
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Special ID for Auto mode - uses router for load balancing
|
# Special ID for Auto mode - uses router for load balancing
|
||||||
|
|
@ -152,10 +154,10 @@ class ImageGenRouterService:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Build model string
|
# Build model string
|
||||||
if config.get("custom_provider"):
|
|
||||||
model_string = f"{config['custom_provider']}/{config['model_name']}"
|
|
||||||
else:
|
|
||||||
provider = config.get("provider", "").upper()
|
provider = config.get("provider", "").upper()
|
||||||
|
if config.get("custom_provider"):
|
||||||
|
provider_prefix = config["custom_provider"]
|
||||||
|
else:
|
||||||
provider_prefix = IMAGE_GEN_PROVIDER_MAP.get(provider, provider.lower())
|
provider_prefix = IMAGE_GEN_PROVIDER_MAP.get(provider, provider.lower())
|
||||||
model_string = f"{provider_prefix}/{config['model_name']}"
|
model_string = f"{provider_prefix}/{config['model_name']}"
|
||||||
|
|
||||||
|
|
@ -165,9 +167,16 @@ class ImageGenRouterService:
|
||||||
"api_key": config.get("api_key"),
|
"api_key": config.get("api_key"),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add optional api_base
|
# Resolve ``api_base`` so deployments don't silently inherit
|
||||||
if config.get("api_base"):
|
# ``AZURE_OPENAI_ENDPOINT`` / ``OPENAI_API_BASE`` and 404 against
|
||||||
litellm_params["api_base"] = config["api_base"]
|
# the wrong provider (see ``provider_api_base`` docstring).
|
||||||
|
api_base = resolve_api_base(
|
||||||
|
provider=provider,
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=config.get("api_base"),
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
litellm_params["api_base"] = api_base
|
||||||
|
|
||||||
# Add api_version (required for Azure)
|
# Add api_version (required for Azure)
|
||||||
if config.get("api_version"):
|
if config.get("api_version"):
|
||||||
|
|
|
||||||
|
|
@ -134,42 +134,14 @@ PROVIDER_MAP = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Default ``api_base`` per LiteLLM provider prefix. Used as a safety net when
|
# ``PROVIDER_DEFAULT_API_BASE`` and ``PROVIDER_KEY_DEFAULT_API_BASE`` were
|
||||||
# a global LLM config does *not* specify ``api_base``: without this, LiteLLM
|
# hoisted to ``app.services.provider_api_base`` so vision and image-gen
|
||||||
# happily picks up provider-agnostic env vars (e.g. ``AZURE_API_BASE``,
|
# call sites can share the exact same defense (OpenRouter / Groq / etc.
|
||||||
# ``OPENAI_API_BASE``) and routes, say, an ``openrouter/anthropic/claude-3-haiku``
|
# 404-ing against an inherited Azure endpoint). Re-exported here for
|
||||||
# request to an Azure endpoint, which then 404s with ``Resource not found``.
|
# backward compatibility with any external import.
|
||||||
# Only providers with a well-known, stable public base URL are listed here —
|
from app.services.provider_api_base import ( # noqa: E402
|
||||||
# self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
|
resolve_api_base,
|
||||||
# huggingface, databricks, cloudflare, replicate) are intentionally omitted
|
)
|
||||||
# so their existing config-driven behaviour is preserved.
|
|
||||||
PROVIDER_DEFAULT_API_BASE = {
|
|
||||||
"openrouter": "https://openrouter.ai/api/v1",
|
|
||||||
"groq": "https://api.groq.com/openai/v1",
|
|
||||||
"mistral": "https://api.mistral.ai/v1",
|
|
||||||
"perplexity": "https://api.perplexity.ai",
|
|
||||||
"xai": "https://api.x.ai/v1",
|
|
||||||
"cerebras": "https://api.cerebras.ai/v1",
|
|
||||||
"deepinfra": "https://api.deepinfra.com/v1/openai",
|
|
||||||
"fireworks_ai": "https://api.fireworks.ai/inference/v1",
|
|
||||||
"together_ai": "https://api.together.xyz/v1",
|
|
||||||
"anyscale": "https://api.endpoints.anyscale.com/v1",
|
|
||||||
"cometapi": "https://api.cometapi.com/v1",
|
|
||||||
"sambanova": "https://api.sambanova.ai/v1",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# Canonical provider → base URL when a config uses a generic ``openai``-style
|
|
||||||
# prefix but the ``provider`` field tells us which API it really is
|
|
||||||
# (e.g. DeepSeek/Alibaba/Moonshot/Zhipu/MiniMax all use ``openai`` compat but
|
|
||||||
# each has its own base URL).
|
|
||||||
PROVIDER_KEY_DEFAULT_API_BASE = {
|
|
||||||
"DEEPSEEK": "https://api.deepseek.com/v1",
|
|
||||||
"ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
|
||||||
"MOONSHOT": "https://api.moonshot.ai/v1",
|
|
||||||
"ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
|
|
||||||
"MINIMAX": "https://api.minimax.io/v1",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class LLMRouterService:
|
class LLMRouterService:
|
||||||
|
|
@ -466,14 +438,14 @@ class LLMRouterService:
|
||||||
# Resolve ``api_base``. Config value wins; otherwise apply a
|
# Resolve ``api_base``. Config value wins; otherwise apply a
|
||||||
# provider-aware default so the deployment does not silently
|
# provider-aware default so the deployment does not silently
|
||||||
# inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
|
# inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
|
||||||
# requests to the wrong endpoint. See ``PROVIDER_DEFAULT_API_BASE``
|
# requests to the wrong endpoint. See ``provider_api_base``
|
||||||
# docstring for the motivating bug (OpenRouter models 404-ing
|
# docstring for the motivating bug (OpenRouter models 404-ing
|
||||||
# against an Azure endpoint).
|
# against an Azure endpoint).
|
||||||
api_base = config.get("api_base")
|
api_base = resolve_api_base(
|
||||||
if not api_base:
|
provider=provider,
|
||||||
api_base = PROVIDER_KEY_DEFAULT_API_BASE.get(provider)
|
provider_prefix=provider_prefix,
|
||||||
if not api_base:
|
config_api_base=config.get("api_base"),
|
||||||
api_base = PROVIDER_DEFAULT_API_BASE.get(provider_prefix)
|
)
|
||||||
if api_base:
|
if api_base:
|
||||||
litellm_params["api_base"] = api_base
|
litellm_params["api_base"] = api_base
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ from app.services.llm_router_service import (
|
||||||
get_auto_mode_llm,
|
get_auto_mode_llm,
|
||||||
is_auto_mode,
|
is_auto_mode,
|
||||||
)
|
)
|
||||||
|
from app.services.provider_api_base import resolve_api_base
|
||||||
from app.services.token_tracking_service import token_tracker
|
from app.services.token_tracking_service import token_tracker
|
||||||
|
|
||||||
# Configure litellm to automatically drop unsupported parameters
|
# Configure litellm to automatically drop unsupported parameters
|
||||||
|
|
@ -496,8 +497,14 @@ async def get_vision_llm(
|
||||||
- Auto mode (ID 0): VisionLLMRouterService
|
- Auto mode (ID 0): VisionLLMRouterService
|
||||||
- Global (negative ID): YAML configs
|
- Global (negative ID): YAML configs
|
||||||
- DB (positive ID): VisionLLMConfig table
|
- DB (positive ID): VisionLLMConfig table
|
||||||
|
|
||||||
|
Premium global configs are wrapped in :class:`QuotaCheckedVisionLLM`
|
||||||
|
so each ``ainvoke`` debits the search-space owner's premium credit
|
||||||
|
pool. User-owned BYOK configs and free global configs are returned
|
||||||
|
unwrapped — they don't consume premium credit (issue M).
|
||||||
"""
|
"""
|
||||||
from app.db import VisionLLMConfig
|
from app.db import VisionLLMConfig
|
||||||
|
from app.services.quota_checked_vision_llm import QuotaCheckedVisionLLM
|
||||||
from app.services.vision_llm_router_service import (
|
from app.services.vision_llm_router_service import (
|
||||||
VISION_PROVIDER_MAP,
|
VISION_PROVIDER_MAP,
|
||||||
VisionLLMRouterService,
|
VisionLLMRouterService,
|
||||||
|
|
@ -519,6 +526,8 @@ async def get_vision_llm(
|
||||||
logger.error(f"No vision LLM configured for search space {search_space_id}")
|
logger.error(f"No vision LLM configured for search space {search_space_id}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
owner_user_id = search_space.user_id
|
||||||
|
|
||||||
if is_vision_auto_mode(config_id):
|
if is_vision_auto_mode(config_id):
|
||||||
if not VisionLLMRouterService.is_initialized():
|
if not VisionLLMRouterService.is_initialized():
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|
@ -526,6 +535,13 @@ async def get_vision_llm(
|
||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
|
# Auto mode is currently treated as free at the wrapper
|
||||||
|
# level — the underlying router can dispatch to either
|
||||||
|
# premium or free YAML configs but routing decisions are
|
||||||
|
# opaque. If/when we want to bill Auto-routed vision
|
||||||
|
# calls we'd need to thread the resolved deployment's
|
||||||
|
# billing_tier back from the router. For now we keep
|
||||||
|
# parity with chat Auto, which also doesn't pre-classify.
|
||||||
return ChatLiteLLMRouter(
|
return ChatLiteLLMRouter(
|
||||||
router=VisionLLMRouterService.get_router(),
|
router=VisionLLMRouterService.get_router(),
|
||||||
streaming=True,
|
streaming=True,
|
||||||
|
|
@ -541,29 +557,46 @@ async def get_vision_llm(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if global_cfg.get("custom_provider"):
|
if global_cfg.get("custom_provider"):
|
||||||
model_string = (
|
provider_prefix = global_cfg["custom_provider"]
|
||||||
f"{global_cfg['custom_provider']}/{global_cfg['model_name']}"
|
model_string = f"{provider_prefix}/{global_cfg['model_name']}"
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
prefix = VISION_PROVIDER_MAP.get(
|
provider_prefix = VISION_PROVIDER_MAP.get(
|
||||||
global_cfg["provider"].upper(),
|
global_cfg["provider"].upper(),
|
||||||
global_cfg["provider"].lower(),
|
global_cfg["provider"].lower(),
|
||||||
)
|
)
|
||||||
model_string = f"{prefix}/{global_cfg['model_name']}"
|
model_string = f"{provider_prefix}/{global_cfg['model_name']}"
|
||||||
|
|
||||||
litellm_kwargs = {
|
litellm_kwargs = {
|
||||||
"model": model_string,
|
"model": model_string,
|
||||||
"api_key": global_cfg["api_key"],
|
"api_key": global_cfg["api_key"],
|
||||||
}
|
}
|
||||||
if global_cfg.get("api_base"):
|
api_base = resolve_api_base(
|
||||||
litellm_kwargs["api_base"] = global_cfg["api_base"]
|
provider=global_cfg.get("provider"),
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=global_cfg.get("api_base"),
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
litellm_kwargs["api_base"] = api_base
|
||||||
if global_cfg.get("litellm_params"):
|
if global_cfg.get("litellm_params"):
|
||||||
litellm_kwargs.update(global_cfg["litellm_params"])
|
litellm_kwargs.update(global_cfg["litellm_params"])
|
||||||
|
|
||||||
from app.agents.new_chat.llm_config import SanitizedChatLiteLLM
|
from app.agents.new_chat.llm_config import SanitizedChatLiteLLM
|
||||||
|
|
||||||
return SanitizedChatLiteLLM(**litellm_kwargs)
|
inner_llm = SanitizedChatLiteLLM(**litellm_kwargs)
|
||||||
|
|
||||||
|
billing_tier = str(global_cfg.get("billing_tier", "free")).lower()
|
||||||
|
if billing_tier == "premium":
|
||||||
|
return QuotaCheckedVisionLLM(
|
||||||
|
inner_llm,
|
||||||
|
user_id=owner_user_id,
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
billing_tier=billing_tier,
|
||||||
|
base_model=model_string,
|
||||||
|
quota_reserve_tokens=global_cfg.get("quota_reserve_tokens"),
|
||||||
|
)
|
||||||
|
return inner_llm
|
||||||
|
|
||||||
|
# User-owned (positive ID) BYOK configs — always free.
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(VisionLLMConfig).where(
|
select(VisionLLMConfig).where(
|
||||||
VisionLLMConfig.id == config_id,
|
VisionLLMConfig.id == config_id,
|
||||||
|
|
@ -578,20 +611,26 @@ async def get_vision_llm(
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if vision_cfg.custom_provider:
|
if vision_cfg.custom_provider:
|
||||||
model_string = f"{vision_cfg.custom_provider}/{vision_cfg.model_name}"
|
provider_prefix = vision_cfg.custom_provider
|
||||||
|
model_string = f"{provider_prefix}/{vision_cfg.model_name}"
|
||||||
else:
|
else:
|
||||||
prefix = VISION_PROVIDER_MAP.get(
|
provider_prefix = VISION_PROVIDER_MAP.get(
|
||||||
vision_cfg.provider.value.upper(),
|
vision_cfg.provider.value.upper(),
|
||||||
vision_cfg.provider.value.lower(),
|
vision_cfg.provider.value.lower(),
|
||||||
)
|
)
|
||||||
model_string = f"{prefix}/{vision_cfg.model_name}"
|
model_string = f"{provider_prefix}/{vision_cfg.model_name}"
|
||||||
|
|
||||||
litellm_kwargs = {
|
litellm_kwargs = {
|
||||||
"model": model_string,
|
"model": model_string,
|
||||||
"api_key": vision_cfg.api_key,
|
"api_key": vision_cfg.api_key,
|
||||||
}
|
}
|
||||||
if vision_cfg.api_base:
|
api_base = resolve_api_base(
|
||||||
litellm_kwargs["api_base"] = vision_cfg.api_base
|
provider=vision_cfg.provider.value,
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=vision_cfg.api_base,
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
litellm_kwargs["api_base"] = api_base
|
||||||
if vision_cfg.litellm_params:
|
if vision_cfg.litellm_params:
|
||||||
litellm_kwargs.update(vision_cfg.litellm_params)
|
litellm_kwargs.update(vision_cfg.litellm_params)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -93,6 +93,53 @@ def _is_text_output_model(model: dict) -> bool:
|
||||||
return output_mods == ["text"]
|
return output_mods == ["text"]
|
||||||
|
|
||||||
|
|
||||||
|
def _is_image_output_model(model: dict) -> bool:
|
||||||
|
"""Return True if the model can produce image output.
|
||||||
|
|
||||||
|
OpenRouter's ``architecture.output_modalities`` is a list (e.g.
|
||||||
|
``["image"]`` for pure image generators, ``["text", "image"]`` for
|
||||||
|
multi-modal generators that also emit captions). We accept any model
|
||||||
|
that can output images; the call site decides whether to use the
|
||||||
|
image-generation API or chat completion.
|
||||||
|
"""
|
||||||
|
output_mods = model.get("architecture", {}).get("output_modalities", []) or []
|
||||||
|
return "image" in output_mods
|
||||||
|
|
||||||
|
|
||||||
|
def _is_vision_input_model(model: dict) -> bool:
|
||||||
|
"""Return True if the model can ingest an image AND emit text.
|
||||||
|
|
||||||
|
OpenRouter's ``architecture.input_modalities`` lists what the model
|
||||||
|
accepts; ``output_modalities`` lists what it produces. A vision LLM
|
||||||
|
is a model that takes images in and produces text out — i.e. it can
|
||||||
|
answer questions about a screenshot or extract content from an
|
||||||
|
image. Pure image-to-image models (e.g. style transfer) and
|
||||||
|
text-only models are excluded.
|
||||||
|
"""
|
||||||
|
arch = model.get("architecture", {}) or {}
|
||||||
|
input_mods = arch.get("input_modalities", []) or []
|
||||||
|
output_mods = arch.get("output_modalities", []) or []
|
||||||
|
return "image" in input_mods and "text" in output_mods
|
||||||
|
|
||||||
|
|
||||||
|
def _supports_image_input(model: dict) -> bool:
|
||||||
|
"""Return True if the model accepts ``image`` in its input modalities.
|
||||||
|
|
||||||
|
Differs from :func:`_is_vision_input_model` in that it does NOT
|
||||||
|
require text output — chat-tab models always emit text already (the
|
||||||
|
chat catalog filters by ``_is_text_output_model``), so the only
|
||||||
|
extra capability we need to track per chat config is whether the
|
||||||
|
model can ingest user-attached images. The chat selector and the
|
||||||
|
streaming task both key off this flag to prevent hitting an
|
||||||
|
OpenRouter 404 ``"No endpoints found that support image input"``
|
||||||
|
when the user uploads an image and selects a text-only model
|
||||||
|
(DeepSeek V3, Llama 3.x base, etc.).
|
||||||
|
"""
|
||||||
|
arch = model.get("architecture", {}) or {}
|
||||||
|
input_mods = arch.get("input_modalities", []) or []
|
||||||
|
return "image" in input_mods
|
||||||
|
|
||||||
|
|
||||||
def _supports_tool_calling(model: dict) -> bool:
|
def _supports_tool_calling(model: dict) -> bool:
|
||||||
"""Return True if the model supports function/tool calling."""
|
"""Return True if the model supports function/tool calling."""
|
||||||
supported = model.get("supported_parameters") or []
|
supported = model.get("supported_parameters") or []
|
||||||
|
|
@ -175,6 +222,32 @@ async def _fetch_models_async() -> list[dict] | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_raw_pricing(raw_models: list[dict]) -> dict[str, dict[str, str]]:
|
||||||
|
"""Return a ``{model_id: {"prompt": str, "completion": str}}`` map.
|
||||||
|
|
||||||
|
Pricing values are kept as the raw OpenRouter strings (e.g.
|
||||||
|
``"0.000003"``); ``pricing_registration`` converts them to floats
|
||||||
|
when registering with LiteLLM. Models with missing or malformed
|
||||||
|
pricing are simply omitted — operator-side risk if any of those are
|
||||||
|
premium.
|
||||||
|
"""
|
||||||
|
pricing: dict[str, dict[str, str]] = {}
|
||||||
|
for model in raw_models:
|
||||||
|
model_id = str(model.get("id") or "").strip()
|
||||||
|
if not model_id:
|
||||||
|
continue
|
||||||
|
p = model.get("pricing") or {}
|
||||||
|
prompt = p.get("prompt")
|
||||||
|
completion = p.get("completion")
|
||||||
|
if prompt is None and completion is None:
|
||||||
|
continue
|
||||||
|
pricing[model_id] = {
|
||||||
|
"prompt": str(prompt) if prompt is not None else "",
|
||||||
|
"completion": str(completion) if completion is not None else "",
|
||||||
|
}
|
||||||
|
return pricing
|
||||||
|
|
||||||
|
|
||||||
def _generate_configs(
|
def _generate_configs(
|
||||||
raw_models: list[dict],
|
raw_models: list[dict],
|
||||||
settings: dict[str, Any],
|
settings: dict[str, Any],
|
||||||
|
|
@ -266,6 +339,13 @@ def _generate_configs(
|
||||||
# account-wide quota, so per-deployment routing can't spread load
|
# account-wide quota, so per-deployment routing can't spread load
|
||||||
# there — it just drains the shared bucket faster.
|
# there — it just drains the shared bucket faster.
|
||||||
"router_pool_eligible": tier == "premium",
|
"router_pool_eligible": tier == "premium",
|
||||||
|
# Capability flag derived from ``architecture.input_modalities``.
|
||||||
|
# Read by the new-chat selector to dim image-incompatible models
|
||||||
|
# when the user has pending image attachments, and by
|
||||||
|
# ``stream_new_chat`` as a fail-fast safety net before the
|
||||||
|
# OpenRouter request would otherwise 404 with
|
||||||
|
# ``"No endpoints found that support image input"``.
|
||||||
|
"supports_image_input": _supports_image_input(model),
|
||||||
_OPENROUTER_DYNAMIC_MARKER: True,
|
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||||
# Auto (Fastest) ranking metadata. ``quality_score`` is initialised
|
# Auto (Fastest) ranking metadata. ``quality_score`` is initialised
|
||||||
# to the static score and gets re-blended with health on the next
|
# to the static score and gets re-blended with health on the next
|
||||||
|
|
@ -282,6 +362,171 @@ def _generate_configs(
|
||||||
return configs
|
return configs
|
||||||
|
|
||||||
|
|
||||||
|
# ID-offset bands used to keep dynamic OpenRouter configs in their own
|
||||||
|
# namespace per surface. Image / vision get separate bands so a single
|
||||||
|
# Postgres-INTEGER cfg ID is unambiguous about which selector it belongs to.
|
||||||
|
_OPENROUTER_IMAGE_ID_OFFSET_DEFAULT = -20000
|
||||||
|
_OPENROUTER_VISION_ID_OFFSET_DEFAULT = -30000
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_image_gen_configs(
|
||||||
|
raw_models: list[dict], settings: dict[str, Any]
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Convert OpenRouter image-generation models into global image-gen
|
||||||
|
config dicts (matches the YAML shape consumed by ``image_generation_routes``).
|
||||||
|
|
||||||
|
Filter:
|
||||||
|
- architecture.output_modalities contains "image"
|
||||||
|
- compatible provider (excluded slugs blocked)
|
||||||
|
- allowed model id (excluded list blocked)
|
||||||
|
|
||||||
|
Notably we *drop* the chat-only filters (``_supports_tool_calling`` and
|
||||||
|
``_has_sufficient_context``) because tool calls and context windows are
|
||||||
|
irrelevant for the ``aimage_generation`` API. ``billing_tier`` is
|
||||||
|
derived per model the same way as chat (``_openrouter_tier``).
|
||||||
|
|
||||||
|
Cost is intentionally *not* registered with LiteLLM at startup
|
||||||
|
(``pricing_registration`` skips image gen): OpenRouter image-gen
|
||||||
|
models are not in LiteLLM's native cost map and OpenRouter populates
|
||||||
|
``response_cost`` directly from the response header. A defensive
|
||||||
|
branch in ``_extract_cost_usd`` handles the rare case where
|
||||||
|
``usage.cost`` is missing — see ``token_tracking_service``.
|
||||||
|
"""
|
||||||
|
id_offset: int = int(
|
||||||
|
settings.get("image_id_offset") or _OPENROUTER_IMAGE_ID_OFFSET_DEFAULT
|
||||||
|
)
|
||||||
|
api_key: str = settings.get("api_key", "")
|
||||||
|
rpm: int = settings.get("rpm", 200)
|
||||||
|
free_rpm: int = settings.get("free_rpm", 20)
|
||||||
|
litellm_params: dict = settings.get("litellm_params") or {}
|
||||||
|
|
||||||
|
image_models = [
|
||||||
|
m
|
||||||
|
for m in raw_models
|
||||||
|
if _is_image_output_model(m)
|
||||||
|
and _is_compatible_provider(m)
|
||||||
|
and _is_allowed_model(m)
|
||||||
|
and "/" in m.get("id", "")
|
||||||
|
]
|
||||||
|
|
||||||
|
configs: list[dict] = []
|
||||||
|
taken: set[int] = set()
|
||||||
|
for model in image_models:
|
||||||
|
model_id: str = model["id"]
|
||||||
|
name: str = model.get("name", model_id)
|
||||||
|
tier = _openrouter_tier(model)
|
||||||
|
|
||||||
|
cfg: dict[str, Any] = {
|
||||||
|
"id": _stable_config_id(model_id, id_offset, taken),
|
||||||
|
"name": name,
|
||||||
|
"description": f"{name} via OpenRouter (image generation)",
|
||||||
|
"provider": "OPENROUTER",
|
||||||
|
"model_name": model_id,
|
||||||
|
"api_key": api_key,
|
||||||
|
# Pin to OpenRouter's public base URL so a downstream call site
|
||||||
|
# that forgets ``resolve_api_base`` still doesn't inherit
|
||||||
|
# ``AZURE_OPENAI_ENDPOINT`` and 404 on
|
||||||
|
# ``image_generation/transformation`` (defense-in-depth, see
|
||||||
|
# ``provider_api_base`` docstring).
|
||||||
|
"api_base": "https://openrouter.ai/api/v1",
|
||||||
|
"api_version": None,
|
||||||
|
"rpm": free_rpm if tier == "free" else rpm,
|
||||||
|
"litellm_params": dict(litellm_params),
|
||||||
|
"billing_tier": tier,
|
||||||
|
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||||
|
}
|
||||||
|
configs.append(cfg)
|
||||||
|
|
||||||
|
return configs
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_vision_llm_configs(
|
||||||
|
raw_models: list[dict], settings: dict[str, Any]
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Convert OpenRouter vision-capable LLMs into global vision-LLM config
|
||||||
|
dicts (matches the YAML shape consumed by ``vision_llm_routes``).
|
||||||
|
|
||||||
|
Filter:
|
||||||
|
- architecture.input_modalities contains "image"
|
||||||
|
- architecture.output_modalities contains "text"
|
||||||
|
- compatible provider (excluded slugs blocked)
|
||||||
|
- allowed model id (excluded list blocked)
|
||||||
|
|
||||||
|
Vision-LLM is invoked from the indexer (image extraction during
|
||||||
|
document upload) via ``langchain_litellm.ChatLiteLLM.ainvoke``, so
|
||||||
|
the chat-only ``_supports_tool_calling`` and ``_has_sufficient_context``
|
||||||
|
filters do not apply: a small-context vision model that doesn't
|
||||||
|
advertise tool-calling is still perfectly viable for "describe this
|
||||||
|
image" prompts.
|
||||||
|
"""
|
||||||
|
id_offset: int = int(
|
||||||
|
settings.get("vision_id_offset") or _OPENROUTER_VISION_ID_OFFSET_DEFAULT
|
||||||
|
)
|
||||||
|
api_key: str = settings.get("api_key", "")
|
||||||
|
rpm: int = settings.get("rpm", 200)
|
||||||
|
tpm: int = settings.get("tpm", 1_000_000)
|
||||||
|
free_rpm: int = settings.get("free_rpm", 20)
|
||||||
|
free_tpm: int = settings.get("free_tpm", 100_000)
|
||||||
|
quota_reserve_tokens: int = settings.get("quota_reserve_tokens", 4000)
|
||||||
|
litellm_params: dict = settings.get("litellm_params") or {}
|
||||||
|
|
||||||
|
vision_models = [
|
||||||
|
m
|
||||||
|
for m in raw_models
|
||||||
|
if _is_vision_input_model(m)
|
||||||
|
and _is_compatible_provider(m)
|
||||||
|
and _is_allowed_model(m)
|
||||||
|
and "/" in m.get("id", "")
|
||||||
|
]
|
||||||
|
|
||||||
|
configs: list[dict] = []
|
||||||
|
taken: set[int] = set()
|
||||||
|
for model in vision_models:
|
||||||
|
model_id: str = model["id"]
|
||||||
|
name: str = model.get("name", model_id)
|
||||||
|
tier = _openrouter_tier(model)
|
||||||
|
pricing = model.get("pricing") or {}
|
||||||
|
|
||||||
|
# Capture per-token prices so ``pricing_registration`` can
|
||||||
|
# register them with LiteLLM at startup (and so the cost
|
||||||
|
# estimator in ``estimate_call_reserve_micros`` can resolve
|
||||||
|
# them at reserve time).
|
||||||
|
try:
|
||||||
|
input_cost = float(pricing.get("prompt", 0) or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
input_cost = 0.0
|
||||||
|
try:
|
||||||
|
output_cost = float(pricing.get("completion", 0) or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
output_cost = 0.0
|
||||||
|
|
||||||
|
cfg: dict[str, Any] = {
|
||||||
|
"id": _stable_config_id(model_id, id_offset, taken),
|
||||||
|
"name": name,
|
||||||
|
"description": f"{name} via OpenRouter (vision)",
|
||||||
|
"provider": "OPENROUTER",
|
||||||
|
"model_name": model_id,
|
||||||
|
"api_key": api_key,
|
||||||
|
# Pin to OpenRouter's public base URL so a downstream call site
|
||||||
|
# that forgets ``resolve_api_base`` still doesn't inherit
|
||||||
|
# ``AZURE_OPENAI_ENDPOINT`` (defense-in-depth, see
|
||||||
|
# ``provider_api_base`` docstring).
|
||||||
|
"api_base": "https://openrouter.ai/api/v1",
|
||||||
|
"api_version": None,
|
||||||
|
"rpm": free_rpm if tier == "free" else rpm,
|
||||||
|
"tpm": free_tpm if tier == "free" else tpm,
|
||||||
|
"litellm_params": dict(litellm_params),
|
||||||
|
"billing_tier": tier,
|
||||||
|
"quota_reserve_tokens": quota_reserve_tokens,
|
||||||
|
"input_cost_per_token": input_cost or None,
|
||||||
|
"output_cost_per_token": output_cost or None,
|
||||||
|
_OPENROUTER_DYNAMIC_MARKER: True,
|
||||||
|
}
|
||||||
|
configs.append(cfg)
|
||||||
|
|
||||||
|
return configs
|
||||||
|
|
||||||
|
|
||||||
class OpenRouterIntegrationService:
|
class OpenRouterIntegrationService:
|
||||||
"""Singleton that manages the dynamic OpenRouter model catalogue."""
|
"""Singleton that manages the dynamic OpenRouter model catalogue."""
|
||||||
|
|
||||||
|
|
@ -300,6 +545,19 @@ class OpenRouterIntegrationService:
|
||||||
# Shape: {model_name: {"gated": bool, "score": float | None}}
|
# Shape: {model_name: {"gated": bool, "score": float | None}}
|
||||||
self._health_cache: dict[str, dict[str, Any]] = {}
|
self._health_cache: dict[str, dict[str, Any]] = {}
|
||||||
self._enrich_task: asyncio.Task | None = None
|
self._enrich_task: asyncio.Task | None = None
|
||||||
|
# Raw OpenRouter pricing per model_id, captured at the same time
|
||||||
|
# we generate configs. Consumed by ``pricing_registration`` to
|
||||||
|
# teach LiteLLM the per-token cost of every dynamic deployment so
|
||||||
|
# the success-callback can populate ``response_cost`` correctly.
|
||||||
|
self._raw_pricing: dict[str, dict[str, str]] = {}
|
||||||
|
# Cached raw catalogue from the most recent fetch. Image / vision
|
||||||
|
# emitters reuse this to avoid a second network call per surface.
|
||||||
|
self._raw_models: list[dict] = []
|
||||||
|
# Image / vision config caches (only populated when the matching
|
||||||
|
# opt-in flag is true on initialize). Refreshed in lockstep with
|
||||||
|
# the chat catalogue.
|
||||||
|
self._image_configs: list[dict] = []
|
||||||
|
self._vision_configs: list[dict] = []
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_instance(cls) -> "OpenRouterIntegrationService":
|
def get_instance(cls) -> "OpenRouterIntegrationService":
|
||||||
|
|
@ -329,8 +587,32 @@ class OpenRouterIntegrationService:
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
self._raw_models = raw_models
|
||||||
self._configs = _generate_configs(raw_models, settings)
|
self._configs = _generate_configs(raw_models, settings)
|
||||||
self._configs_by_id = {c["id"]: c for c in self._configs}
|
self._configs_by_id = {c["id"]: c for c in self._configs}
|
||||||
|
self._raw_pricing = _extract_raw_pricing(raw_models)
|
||||||
|
|
||||||
|
# Populate image / vision caches when their opt-in flag is set.
|
||||||
|
# Empty otherwise so the accessors return [] without re-running
|
||||||
|
# filters every refresh.
|
||||||
|
if settings.get("image_generation_enabled"):
|
||||||
|
self._image_configs = _generate_image_gen_configs(raw_models, settings)
|
||||||
|
logger.info(
|
||||||
|
"OpenRouter integration: image-gen emission ON (%d models)",
|
||||||
|
len(self._image_configs),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._image_configs = []
|
||||||
|
|
||||||
|
if settings.get("vision_enabled"):
|
||||||
|
self._vision_configs = _generate_vision_llm_configs(raw_models, settings)
|
||||||
|
logger.info(
|
||||||
|
"OpenRouter integration: vision LLM emission ON (%d models)",
|
||||||
|
len(self._vision_configs),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self._vision_configs = []
|
||||||
|
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
|
|
||||||
tier_counts = self._tier_counts(self._configs)
|
tier_counts = self._tier_counts(self._configs)
|
||||||
|
|
@ -369,6 +651,8 @@ class OpenRouterIntegrationService:
|
||||||
|
|
||||||
new_configs = _generate_configs(raw_models, self._settings)
|
new_configs = _generate_configs(raw_models, self._settings)
|
||||||
new_by_id = {c["id"]: c for c in new_configs}
|
new_by_id = {c["id"]: c for c in new_configs}
|
||||||
|
self._raw_pricing = _extract_raw_pricing(raw_models)
|
||||||
|
self._raw_models = raw_models
|
||||||
|
|
||||||
from app.config import config as app_config
|
from app.config import config as app_config
|
||||||
|
|
||||||
|
|
@ -382,6 +666,29 @@ class OpenRouterIntegrationService:
|
||||||
self._configs = new_configs
|
self._configs = new_configs
|
||||||
self._configs_by_id = new_by_id
|
self._configs_by_id = new_by_id
|
||||||
|
|
||||||
|
# Image / vision lists are atomic-swapped the same way: filter out
|
||||||
|
# the previous dynamic entries from the live config list and append
|
||||||
|
# the freshly generated ones. No-ops when the opt-in flag is off.
|
||||||
|
if self._settings.get("image_generation_enabled"):
|
||||||
|
new_image = _generate_image_gen_configs(raw_models, self._settings)
|
||||||
|
static_image = [
|
||||||
|
c
|
||||||
|
for c in app_config.GLOBAL_IMAGE_GEN_CONFIGS
|
||||||
|
if not c.get(_OPENROUTER_DYNAMIC_MARKER)
|
||||||
|
]
|
||||||
|
app_config.GLOBAL_IMAGE_GEN_CONFIGS = static_image + new_image
|
||||||
|
self._image_configs = new_image
|
||||||
|
|
||||||
|
if self._settings.get("vision_enabled"):
|
||||||
|
new_vision = _generate_vision_llm_configs(raw_models, self._settings)
|
||||||
|
static_vision = [
|
||||||
|
c
|
||||||
|
for c in app_config.GLOBAL_VISION_LLM_CONFIGS
|
||||||
|
if not c.get(_OPENROUTER_DYNAMIC_MARKER)
|
||||||
|
]
|
||||||
|
app_config.GLOBAL_VISION_LLM_CONFIGS = static_vision + new_vision
|
||||||
|
self._vision_configs = new_vision
|
||||||
|
|
||||||
# Catalogue churn invalidates per-config "recently healthy" credit
|
# Catalogue churn invalidates per-config "recently healthy" credit
|
||||||
# earned by the previous turn's preflight. Drop the whole table so
|
# earned by the previous turn's preflight. Drop the whole table so
|
||||||
# the next turn re-probes against the freshly loaded configs.
|
# the next turn re-probes against the freshly loaded configs.
|
||||||
|
|
@ -407,6 +714,21 @@ class OpenRouterIntegrationService:
|
||||||
# so a hand-picked dead OR model is gated like a dynamic one.
|
# so a hand-picked dead OR model is gated like a dynamic one.
|
||||||
await self._enrich_health_safely(static_configs + new_configs, log_summary=True)
|
await self._enrich_health_safely(static_configs + new_configs, log_summary=True)
|
||||||
|
|
||||||
|
# Re-register LiteLLM pricing for the freshly fetched catalogue
|
||||||
|
# so newly added OR models bill correctly on their first call.
|
||||||
|
# Runs before the router rebuild because the router may issue
|
||||||
|
# cost-table lookups during deployment registration.
|
||||||
|
try:
|
||||||
|
from app.services.pricing_registration import (
|
||||||
|
register_pricing_from_global_configs,
|
||||||
|
)
|
||||||
|
|
||||||
|
register_pricing_from_global_configs()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"OpenRouter refresh: pricing re-registration skipped (%s)", exc
|
||||||
|
)
|
||||||
|
|
||||||
# Rebuild the LiteLLM router so freshly fetched configs flow through
|
# Rebuild the LiteLLM router so freshly fetched configs flow through
|
||||||
# (dynamic OR premium entries now opt into the pool, free ones stay
|
# (dynamic OR premium entries now opt into the pool, free ones stay
|
||||||
# out; a refresh also needs to pick up any static-config edits and
|
# out; a refresh also needs to pick up any static-config edits and
|
||||||
|
|
@ -635,3 +957,34 @@ class OpenRouterIntegrationService:
|
||||||
|
|
||||||
def get_config_by_id(self, config_id: int) -> dict | None:
|
def get_config_by_id(self, config_id: int) -> dict | None:
|
||||||
return self._configs_by_id.get(config_id)
|
return self._configs_by_id.get(config_id)
|
||||||
|
|
||||||
|
def get_image_generation_configs(self) -> list[dict]:
|
||||||
|
"""Return the dynamic OpenRouter image-generation configs (empty
|
||||||
|
list when the ``image_generation_enabled`` flag is off).
|
||||||
|
|
||||||
|
Each entry already has ``billing_tier`` derived per-model from
|
||||||
|
OpenRouter's signals and is shaped to drop directly into
|
||||||
|
``Config.GLOBAL_IMAGE_GEN_CONFIGS``.
|
||||||
|
"""
|
||||||
|
return list(self._image_configs)
|
||||||
|
|
||||||
|
def get_vision_llm_configs(self) -> list[dict]:
|
||||||
|
"""Return the dynamic OpenRouter vision-LLM configs (empty list
|
||||||
|
when the ``vision_enabled`` flag is off).
|
||||||
|
|
||||||
|
Each entry exposes ``input_cost_per_token`` / ``output_cost_per_token``
|
||||||
|
so ``pricing_registration`` can teach LiteLLM the cost of these
|
||||||
|
models the same way it does for chat — which keeps the billable
|
||||||
|
wrapper able to debit accurate micro-USD on a vision call.
|
||||||
|
"""
|
||||||
|
return list(self._vision_configs)
|
||||||
|
|
||||||
|
def get_raw_pricing(self) -> dict[str, dict[str, str]]:
|
||||||
|
"""Return the cached raw OpenRouter pricing map.
|
||||||
|
|
||||||
|
Shape: ``{model_id: {"prompt": str, "completion": str}}``. The
|
||||||
|
values are the strings OpenRouter publishes (USD per token),
|
||||||
|
never converted to floats here so the caller can decide how to
|
||||||
|
handle malformed or unset entries.
|
||||||
|
"""
|
||||||
|
return dict(self._raw_pricing)
|
||||||
|
|
|
||||||
274
surfsense_backend/app/services/pricing_registration.py
Normal file
274
surfsense_backend/app/services/pricing_registration.py
Normal file
|
|
@ -0,0 +1,274 @@
|
||||||
|
"""
|
||||||
|
Pricing registration with LiteLLM.
|
||||||
|
|
||||||
|
Many models reach our LiteLLM callback without LiteLLM knowing their
|
||||||
|
per-token cost — namely:
|
||||||
|
|
||||||
|
* The ~300 dynamic OpenRouter deployments (their pricing only lives on
|
||||||
|
OpenRouter's ``/api/v1/models`` payload, never in LiteLLM's published
|
||||||
|
pricing table).
|
||||||
|
* Static YAML deployments whose ``base_model`` name is operator-defined
|
||||||
|
(e.g. custom Azure deployment names like ``gpt-5.4``) and therefore
|
||||||
|
not in LiteLLM's table either.
|
||||||
|
|
||||||
|
Without registration, ``kwargs["response_cost"]`` is 0 for those calls
|
||||||
|
and the user gets billed nothing — a fail-safe but wrong answer for a
|
||||||
|
cost-based credit system. This module runs once at startup, after the
|
||||||
|
OpenRouter integration has fetched its catalogue, and registers each
|
||||||
|
known model's pricing with ``litellm.register_model()`` under multiple
|
||||||
|
plausible alias keys (LiteLLM's cost lookup may use any of them
|
||||||
|
depending on whether the call went through the Router, ChatLiteLLM,
|
||||||
|
or a direct ``acompletion``).
|
||||||
|
|
||||||
|
Operators who run a custom Azure deployment whose ``base_model`` name
|
||||||
|
isn't in LiteLLM's table can declare per-token pricing inline in
|
||||||
|
``global_llm_config.yaml`` via ``input_cost_per_token`` and
|
||||||
|
``output_cost_per_token`` (USD per token, e.g. ``0.000002``). Without
|
||||||
|
that declaration the model's calls debit 0 — never overbilled.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_float(value: Any) -> float:
|
||||||
|
"""Return ``float(value)`` if it parses to a positive number, else 0.0."""
|
||||||
|
if value is None:
|
||||||
|
return 0.0
|
||||||
|
try:
|
||||||
|
f = float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return 0.0
|
||||||
|
return f if f > 0 else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _alias_set_for_openrouter(model_id: str) -> list[str]:
|
||||||
|
"""Return the alias keys to register an OpenRouter model under.
|
||||||
|
|
||||||
|
LiteLLM's cost-callback lookup key varies by call path:
|
||||||
|
- Router with ``model="openrouter/X"`` → kwargs["model"] is
|
||||||
|
typically ``openrouter/X``.
|
||||||
|
- LiteLLM's own provider routing may strip the prefix and pass the
|
||||||
|
bare ``X`` to the cost-table lookup.
|
||||||
|
Registering under both keeps the lookup hermetic regardless of
|
||||||
|
which path the call took.
|
||||||
|
"""
|
||||||
|
aliases = [f"openrouter/{model_id}", model_id]
|
||||||
|
return list(dict.fromkeys(a for a in aliases if a))
|
||||||
|
|
||||||
|
|
||||||
|
def _alias_set_for_yaml(provider: str, model_name: str, base_model: str) -> list[str]:
|
||||||
|
"""Return the alias keys to register a static YAML deployment under.
|
||||||
|
|
||||||
|
Same reasoning as the OpenRouter set: cover the bare ``base_model``,
|
||||||
|
the ``<provider>/<model>`` form LiteLLM Router constructs, and the
|
||||||
|
bare ``model_name`` because callbacks sometimes see whichever was
|
||||||
|
configured first.
|
||||||
|
"""
|
||||||
|
provider_lower = (provider or "").lower()
|
||||||
|
aliases: list[str] = []
|
||||||
|
if base_model:
|
||||||
|
aliases.append(base_model)
|
||||||
|
if provider_lower and base_model:
|
||||||
|
aliases.append(f"{provider_lower}/{base_model}")
|
||||||
|
if model_name and model_name != base_model:
|
||||||
|
aliases.append(model_name)
|
||||||
|
if provider_lower and model_name and model_name != base_model:
|
||||||
|
aliases.append(f"{provider_lower}/{model_name}")
|
||||||
|
# Azure deployments often surface as "azure/<name>"; normalise the
|
||||||
|
# ``azure_openai`` provider slug to the LiteLLM-canonical ``azure``.
|
||||||
|
if provider_lower == "azure_openai":
|
||||||
|
if base_model:
|
||||||
|
aliases.append(f"azure/{base_model}")
|
||||||
|
if model_name and model_name != base_model:
|
||||||
|
aliases.append(f"azure/{model_name}")
|
||||||
|
return list(dict.fromkeys(a for a in aliases if a))
|
||||||
|
|
||||||
|
|
||||||
|
def _register(
|
||||||
|
aliases: list[str],
|
||||||
|
*,
|
||||||
|
input_cost: float,
|
||||||
|
output_cost: float,
|
||||||
|
provider: str,
|
||||||
|
mode: str = "chat",
|
||||||
|
) -> int:
|
||||||
|
"""Register a single pricing entry under every alias in ``aliases``.
|
||||||
|
|
||||||
|
Returns the count of aliases successfully registered.
|
||||||
|
"""
|
||||||
|
payload: dict[str, dict[str, Any]] = {}
|
||||||
|
for alias in aliases:
|
||||||
|
payload[alias] = {
|
||||||
|
"input_cost_per_token": input_cost,
|
||||||
|
"output_cost_per_token": output_cost,
|
||||||
|
"litellm_provider": provider,
|
||||||
|
"mode": mode,
|
||||||
|
}
|
||||||
|
if not payload:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
litellm.register_model(payload)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"[PricingRegistration] register_model failed for aliases=%s: %s",
|
||||||
|
aliases,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
return len(payload)
|
||||||
|
|
||||||
|
|
||||||
|
def _register_chat_shape_configs(
|
||||||
|
configs: list[dict],
|
||||||
|
*,
|
||||||
|
or_pricing: dict[str, dict[str, str]],
|
||||||
|
label: str,
|
||||||
|
) -> tuple[int, int, int, list[str]]:
|
||||||
|
"""Common loop that registers per-token pricing for a list of "chat-shape"
|
||||||
|
configs (chat or vision LLM — both use ``input_cost_per_token`` /
|
||||||
|
``output_cost_per_token`` and the LiteLLM ``mode="chat"`` cost shape).
|
||||||
|
|
||||||
|
Returns ``(registered_models, registered_aliases, skipped, sample_keys)``.
|
||||||
|
"""
|
||||||
|
registered_models = 0
|
||||||
|
registered_aliases = 0
|
||||||
|
skipped_no_pricing = 0
|
||||||
|
sample_keys: list[str] = []
|
||||||
|
|
||||||
|
for cfg in configs:
|
||||||
|
provider = str(cfg.get("provider") or "").upper()
|
||||||
|
model_name = str(cfg.get("model_name") or "").strip()
|
||||||
|
litellm_params = cfg.get("litellm_params") or {}
|
||||||
|
base_model = str(litellm_params.get("base_model") or model_name).strip()
|
||||||
|
|
||||||
|
if provider == "OPENROUTER":
|
||||||
|
entry = or_pricing.get(model_name)
|
||||||
|
if entry:
|
||||||
|
input_cost = _safe_float(entry.get("prompt"))
|
||||||
|
output_cost = _safe_float(entry.get("completion"))
|
||||||
|
else:
|
||||||
|
# Vision configs from ``_generate_vision_llm_configs``
|
||||||
|
# carry their pricing inline because the OpenRouter
|
||||||
|
# raw-pricing cache is keyed by chat-catalogue model_id;
|
||||||
|
# vision flows pick up the inline values here.
|
||||||
|
input_cost = _safe_float(cfg.get("input_cost_per_token"))
|
||||||
|
output_cost = _safe_float(cfg.get("output_cost_per_token"))
|
||||||
|
if input_cost == 0.0 and output_cost == 0.0:
|
||||||
|
skipped_no_pricing += 1
|
||||||
|
continue
|
||||||
|
aliases = _alias_set_for_openrouter(model_name)
|
||||||
|
count = _register(
|
||||||
|
aliases,
|
||||||
|
input_cost=input_cost,
|
||||||
|
output_cost=output_cost,
|
||||||
|
provider="openrouter",
|
||||||
|
)
|
||||||
|
if count > 0:
|
||||||
|
registered_models += 1
|
||||||
|
registered_aliases += count
|
||||||
|
if len(sample_keys) < 6:
|
||||||
|
sample_keys.extend(aliases[:2])
|
||||||
|
continue
|
||||||
|
|
||||||
|
input_cost = _safe_float(
|
||||||
|
cfg.get("input_cost_per_token")
|
||||||
|
or litellm_params.get("input_cost_per_token")
|
||||||
|
)
|
||||||
|
output_cost = _safe_float(
|
||||||
|
cfg.get("output_cost_per_token")
|
||||||
|
or litellm_params.get("output_cost_per_token")
|
||||||
|
)
|
||||||
|
if input_cost == 0.0 and output_cost == 0.0:
|
||||||
|
skipped_no_pricing += 1
|
||||||
|
continue
|
||||||
|
aliases = _alias_set_for_yaml(provider, model_name, base_model)
|
||||||
|
provider_slug = "azure" if provider == "AZURE_OPENAI" else provider.lower()
|
||||||
|
count = _register(
|
||||||
|
aliases,
|
||||||
|
input_cost=input_cost,
|
||||||
|
output_cost=output_cost,
|
||||||
|
provider=provider_slug,
|
||||||
|
)
|
||||||
|
if count > 0:
|
||||||
|
registered_models += 1
|
||||||
|
registered_aliases += count
|
||||||
|
if len(sample_keys) < 6:
|
||||||
|
sample_keys.extend(aliases[:2])
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"[PricingRegistration:%s] registered pricing for %d models (%d aliases); "
|
||||||
|
"%d configs had no pricing data; sample registered keys=%s",
|
||||||
|
label,
|
||||||
|
registered_models,
|
||||||
|
registered_aliases,
|
||||||
|
skipped_no_pricing,
|
||||||
|
sample_keys,
|
||||||
|
)
|
||||||
|
return registered_models, registered_aliases, skipped_no_pricing, sample_keys
|
||||||
|
|
||||||
|
|
||||||
|
def register_pricing_from_global_configs() -> None:
|
||||||
|
"""Register pricing for every known LLM deployment with LiteLLM.
|
||||||
|
|
||||||
|
Walks ``config.GLOBAL_LLM_CONFIGS`` *and* ``config.GLOBAL_VISION_LLM_CONFIGS``
|
||||||
|
so vision calls (during indexing) can resolve cost the same way chat
|
||||||
|
calls do — namely:
|
||||||
|
|
||||||
|
1. ``OPENROUTER``: pulls the cached raw pricing from
|
||||||
|
``OpenRouterIntegrationService`` (populated during its own
|
||||||
|
startup fetch) and converts the per-token strings to floats. For
|
||||||
|
vision configs that carry pricing inline (``input_cost_per_token`` /
|
||||||
|
``output_cost_per_token`` set on the cfg itself) we fall back to
|
||||||
|
those values when the OR cache misses the model.
|
||||||
|
2. Anything else: looks for operator-declared
|
||||||
|
``input_cost_per_token`` / ``output_cost_per_token`` on the YAML
|
||||||
|
config block (top-level or nested under ``litellm_params``).
|
||||||
|
|
||||||
|
**Image generation is intentionally NOT registered here.** The cost
|
||||||
|
shape for image-gen is per-image (``output_cost_per_image``), not
|
||||||
|
per-token, and LiteLLM's ``register_model`` doesn't accept those
|
||||||
|
keys via the chat-cost path. OpenRouter image-gen models populate
|
||||||
|
``response_cost`` directly from their response header instead, and
|
||||||
|
Azure-native image-gen models are already in LiteLLM's cost map.
|
||||||
|
|
||||||
|
Calls without a resolved pair of costs are skipped, not registered
|
||||||
|
with zeros — operators who forget pricing get a "$0 debit" warning
|
||||||
|
in ``TokenTrackingCallback`` rather than silently overwriting any
|
||||||
|
pricing LiteLLM might know natively.
|
||||||
|
"""
|
||||||
|
from app.config import config as app_config
|
||||||
|
|
||||||
|
chat_configs: list[dict] = list(getattr(app_config, "GLOBAL_LLM_CONFIGS", []) or [])
|
||||||
|
vision_configs: list[dict] = list(
|
||||||
|
getattr(app_config, "GLOBAL_VISION_LLM_CONFIGS", []) or []
|
||||||
|
)
|
||||||
|
if not chat_configs and not vision_configs:
|
||||||
|
logger.info("[PricingRegistration] no global configs to register")
|
||||||
|
return
|
||||||
|
|
||||||
|
or_pricing: dict[str, dict[str, str]] = {}
|
||||||
|
try:
|
||||||
|
from app.services.openrouter_integration_service import (
|
||||||
|
OpenRouterIntegrationService,
|
||||||
|
)
|
||||||
|
|
||||||
|
if OpenRouterIntegrationService.is_initialized():
|
||||||
|
or_pricing = OpenRouterIntegrationService.get_instance().get_raw_pricing()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
"[PricingRegistration] OpenRouter pricing not available yet: %s", exc
|
||||||
|
)
|
||||||
|
|
||||||
|
if chat_configs:
|
||||||
|
_register_chat_shape_configs(chat_configs, or_pricing=or_pricing, label="chat")
|
||||||
|
if vision_configs:
|
||||||
|
_register_chat_shape_configs(
|
||||||
|
vision_configs, or_pricing=or_pricing, label="vision"
|
||||||
|
)
|
||||||
106
surfsense_backend/app/services/provider_api_base.py
Normal file
106
surfsense_backend/app/services/provider_api_base.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
"""Provider-aware ``api_base`` resolution shared by chat / image-gen / vision.
|
||||||
|
|
||||||
|
LiteLLM falls back to the module-global ``litellm.api_base`` when an
|
||||||
|
individual call doesn't pass one, which silently inherits provider-agnostic
|
||||||
|
env vars like ``AZURE_OPENAI_ENDPOINT`` / ``OPENAI_API_BASE``. Without an
|
||||||
|
explicit ``api_base``, an ``openrouter/<model>`` request can end up at an
|
||||||
|
Azure endpoint and 404 with ``Resource not found`` (real reproducer:
|
||||||
|
[litellm/llms/openrouter/image_generation/transformation.py:242-263] appends
|
||||||
|
``/chat/completions`` to whatever inherited base it gets, regardless of
|
||||||
|
provider).
|
||||||
|
|
||||||
|
The chat router has had this defense for a while
|
||||||
|
(``llm_router_service.py:466-478``). This module hoists the maps + cascade
|
||||||
|
into a tiny standalone helper so vision and image-gen can share the same
|
||||||
|
source of truth without an inter-service circular import.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
PROVIDER_DEFAULT_API_BASE: dict[str, str] = {
|
||||||
|
"openrouter": "https://openrouter.ai/api/v1",
|
||||||
|
"groq": "https://api.groq.com/openai/v1",
|
||||||
|
"mistral": "https://api.mistral.ai/v1",
|
||||||
|
"perplexity": "https://api.perplexity.ai",
|
||||||
|
"xai": "https://api.x.ai/v1",
|
||||||
|
"cerebras": "https://api.cerebras.ai/v1",
|
||||||
|
"deepinfra": "https://api.deepinfra.com/v1/openai",
|
||||||
|
"fireworks_ai": "https://api.fireworks.ai/inference/v1",
|
||||||
|
"together_ai": "https://api.together.xyz/v1",
|
||||||
|
"anyscale": "https://api.endpoints.anyscale.com/v1",
|
||||||
|
"cometapi": "https://api.cometapi.com/v1",
|
||||||
|
"sambanova": "https://api.sambanova.ai/v1",
|
||||||
|
}
|
||||||
|
"""Default ``api_base`` per LiteLLM provider prefix (lowercase).
|
||||||
|
|
||||||
|
Only providers with a well-known, stable public base URL are listed —
|
||||||
|
self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
|
||||||
|
huggingface, databricks, cloudflare, replicate) are intentionally omitted
|
||||||
|
so their existing config-driven behaviour is preserved."""
|
||||||
|
|
||||||
|
|
||||||
|
PROVIDER_KEY_DEFAULT_API_BASE: dict[str, str] = {
|
||||||
|
"DEEPSEEK": "https://api.deepseek.com/v1",
|
||||||
|
"ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||||
|
"MOONSHOT": "https://api.moonshot.ai/v1",
|
||||||
|
"ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
|
||||||
|
"MINIMAX": "https://api.minimax.io/v1",
|
||||||
|
}
|
||||||
|
"""Canonical provider key (uppercase) → base URL.
|
||||||
|
|
||||||
|
Used when the LiteLLM provider prefix is the generic ``openai`` shim but the
|
||||||
|
config's ``provider`` field tells us which API it actually is (DeepSeek,
|
||||||
|
Alibaba, Moonshot, Zhipu, MiniMax all use the ``openai`` prefix but each
|
||||||
|
has its own base URL)."""
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_api_base(
|
||||||
|
*,
|
||||||
|
provider: str | None,
|
||||||
|
provider_prefix: str | None,
|
||||||
|
config_api_base: str | None,
|
||||||
|
) -> str | None:
|
||||||
|
"""Resolve a non-Azure-leaking ``api_base`` for a deployment.
|
||||||
|
|
||||||
|
Cascade (first non-empty wins):
|
||||||
|
1. The config's own ``api_base`` (whitespace-only treated as missing).
|
||||||
|
2. ``PROVIDER_KEY_DEFAULT_API_BASE[provider.upper()]``.
|
||||||
|
3. ``PROVIDER_DEFAULT_API_BASE[provider_prefix.lower()]``.
|
||||||
|
4. ``None`` — caller should NOT set ``api_base`` and let the LiteLLM
|
||||||
|
provider integration apply its own default (e.g. AzureOpenAI's
|
||||||
|
deployment-derived URL, custom provider's per-deployment URL).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider: The config's ``provider`` field (e.g. ``"OPENROUTER"``,
|
||||||
|
``"DEEPSEEK"``). Case-insensitive.
|
||||||
|
provider_prefix: The LiteLLM model-string prefix the same call
|
||||||
|
site builds for the model id (e.g. ``"openrouter"``,
|
||||||
|
``"groq"``). Case-insensitive.
|
||||||
|
config_api_base: ``api_base`` from the global YAML / DB row /
|
||||||
|
OpenRouter dynamic config. Empty / whitespace-only means
|
||||||
|
"missing" — the resolver still applies the cascade.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A URL string, or ``None`` if no default applies for this provider.
|
||||||
|
"""
|
||||||
|
if config_api_base and config_api_base.strip():
|
||||||
|
return config_api_base
|
||||||
|
|
||||||
|
if provider:
|
||||||
|
key_default = PROVIDER_KEY_DEFAULT_API_BASE.get(provider.upper())
|
||||||
|
if key_default:
|
||||||
|
return key_default
|
||||||
|
|
||||||
|
if provider_prefix:
|
||||||
|
prefix_default = PROVIDER_DEFAULT_API_BASE.get(provider_prefix.lower())
|
||||||
|
if prefix_default:
|
||||||
|
return prefix_default
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"PROVIDER_DEFAULT_API_BASE",
|
||||||
|
"PROVIDER_KEY_DEFAULT_API_BASE",
|
||||||
|
"resolve_api_base",
|
||||||
|
]
|
||||||
280
surfsense_backend/app/services/provider_capabilities.py
Normal file
280
surfsense_backend/app/services/provider_capabilities.py
Normal file
|
|
@ -0,0 +1,280 @@
|
||||||
|
"""Capability resolution shared by chat / image / vision call sites.
|
||||||
|
|
||||||
|
Why this exists
|
||||||
|
---------------
|
||||||
|
The chat catalog (YAML + dynamic OpenRouter + BYOK DB rows + Auto) needs a
|
||||||
|
single, authoritative answer to one question: *can this chat config accept
|
||||||
|
``image_url`` content blocks?* Without it, the new-chat selector can't badge
|
||||||
|
incompatible models and the streaming task can't fail fast with a friendly
|
||||||
|
error before sending an image to a text-only provider.
|
||||||
|
|
||||||
|
Two functions, two intents:
|
||||||
|
|
||||||
|
- :func:`derive_supports_image_input` — best-effort *True* for catalog and
|
||||||
|
UI surfacing. Default-allow: an unknown / unmapped model is treated as
|
||||||
|
capable so we never lock the user out of a freshly added or
|
||||||
|
third-party-hosted vision model.
|
||||||
|
|
||||||
|
- :func:`is_known_text_only_chat_model` — strict opt-out for the streaming
|
||||||
|
task's safety net. Returns True only when LiteLLM's model map *explicitly*
|
||||||
|
sets ``supports_vision=False`` (or its bare-name variant does). Anything
|
||||||
|
else — missing key, lookup exception, ``supports_vision=True`` — returns
|
||||||
|
False so the request flows through to the provider.
|
||||||
|
|
||||||
|
Implementation rule: only public LiteLLM symbols
|
||||||
|
------------------------------------------------
|
||||||
|
``litellm.supports_vision`` and ``litellm.get_model_info`` are part of the
|
||||||
|
typed module surface (see ``litellm.__init__`` lazy stubs) and are stable
|
||||||
|
across releases. The private ``_is_explicitly_disabled_factory`` and
|
||||||
|
``_get_model_info_helper`` are intentionally avoided so a LiteLLM upgrade
|
||||||
|
can't silently break us.
|
||||||
|
|
||||||
|
Why the previous round's strict YAML opt-in flag failed
|
||||||
|
-------------------------------------------------------
|
||||||
|
``supports_image_input: false`` was the YAML loader's setdefault. Operators
|
||||||
|
maintaining ``global_llm_config.yaml`` never set it, so every Azure / OpenAI
|
||||||
|
YAML chat model — including vision-capable GPT-5.x and GPT-4o — resolved to
|
||||||
|
False and the streaming gate rejected every image turn. Sourcing capability
|
||||||
|
from LiteLLM's authoritative model map (which already says
|
||||||
|
``azure/gpt-5.4 -> supports_vision=true``) removes that operator toil.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Provider-name → LiteLLM model-prefix map.
|
||||||
|
#
|
||||||
|
# Owned here because ``app.services.provider_capabilities`` is the
|
||||||
|
# only edge that's safe to call from ``app.config``'s YAML loader at
|
||||||
|
# class-body init time. ``app.agents.new_chat.llm_config`` re-exports
|
||||||
|
# this constant under the historical ``PROVIDER_MAP`` name; placing the
|
||||||
|
# map there directly would re-introduce the
|
||||||
|
# ``app.config -> ... -> app.agents.new_chat.tools.generate_image ->
|
||||||
|
# app.config`` cycle that prompted the move.
|
||||||
|
_PROVIDER_PREFIX_MAP: dict[str, str] = {
|
||||||
|
"OPENAI": "openai",
|
||||||
|
"ANTHROPIC": "anthropic",
|
||||||
|
"GROQ": "groq",
|
||||||
|
"COHERE": "cohere",
|
||||||
|
"GOOGLE": "gemini",
|
||||||
|
"OLLAMA": "ollama_chat",
|
||||||
|
"MISTRAL": "mistral",
|
||||||
|
"AZURE_OPENAI": "azure",
|
||||||
|
"OPENROUTER": "openrouter",
|
||||||
|
"XAI": "xai",
|
||||||
|
"BEDROCK": "bedrock",
|
||||||
|
"VERTEX_AI": "vertex_ai",
|
||||||
|
"TOGETHER_AI": "together_ai",
|
||||||
|
"FIREWORKS_AI": "fireworks_ai",
|
||||||
|
"DEEPSEEK": "openai",
|
||||||
|
"ALIBABA_QWEN": "openai",
|
||||||
|
"MOONSHOT": "openai",
|
||||||
|
"ZHIPU": "openai",
|
||||||
|
"GITHUB_MODELS": "github",
|
||||||
|
"REPLICATE": "replicate",
|
||||||
|
"PERPLEXITY": "perplexity",
|
||||||
|
"ANYSCALE": "anyscale",
|
||||||
|
"DEEPINFRA": "deepinfra",
|
||||||
|
"CEREBRAS": "cerebras",
|
||||||
|
"SAMBANOVA": "sambanova",
|
||||||
|
"AI21": "ai21",
|
||||||
|
"CLOUDFLARE": "cloudflare",
|
||||||
|
"DATABRICKS": "databricks",
|
||||||
|
"COMETAPI": "cometapi",
|
||||||
|
"HUGGINGFACE": "huggingface",
|
||||||
|
"MINIMAX": "openai",
|
||||||
|
"CUSTOM": "custom",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _candidate_model_strings(
|
||||||
|
*,
|
||||||
|
provider: str | None,
|
||||||
|
model_name: str | None,
|
||||||
|
base_model: str | None,
|
||||||
|
custom_provider: str | None,
|
||||||
|
) -> list[tuple[str, str | None]]:
|
||||||
|
"""Return ``[(model_string, custom_llm_provider), ...]`` lookup candidates.
|
||||||
|
|
||||||
|
LiteLLM's capability lookup is keyed by ``model`` + (optional)
|
||||||
|
``custom_llm_provider``. Different config sources give us different
|
||||||
|
levels of detail, so we try the most-specific keys first and fall back
|
||||||
|
to bare model names so unannotated entries (e.g. an Azure deployment
|
||||||
|
pointing at ``gpt-5.4`` via ``litellm_params.base_model``) still hit the
|
||||||
|
map. Order matters — the first lookup that returns a definitive answer
|
||||||
|
wins for both helpers.
|
||||||
|
"""
|
||||||
|
candidates: list[tuple[str, str | None]] = []
|
||||||
|
seen: set[tuple[str, str | None]] = set()
|
||||||
|
|
||||||
|
def _add(model: str | None, llm_provider: str | None) -> None:
|
||||||
|
if not model:
|
||||||
|
return
|
||||||
|
key = (model, llm_provider)
|
||||||
|
if key in seen:
|
||||||
|
return
|
||||||
|
seen.add(key)
|
||||||
|
candidates.append(key)
|
||||||
|
|
||||||
|
provider_prefix: str | None = None
|
||||||
|
if provider:
|
||||||
|
provider_prefix = _PROVIDER_PREFIX_MAP.get(provider.upper(), provider.lower())
|
||||||
|
if custom_provider:
|
||||||
|
# ``custom_provider`` overrides everything for CUSTOM/proxy setups.
|
||||||
|
provider_prefix = custom_provider
|
||||||
|
|
||||||
|
primary_model = base_model or model_name
|
||||||
|
bare_model = model_name
|
||||||
|
|
||||||
|
# Most-specific first: provider-prefixed identifier with explicit
|
||||||
|
# custom_llm_provider so LiteLLM won't have to guess the provider via
|
||||||
|
# ``get_llm_provider``.
|
||||||
|
if primary_model and provider_prefix:
|
||||||
|
# e.g. "azure/gpt-5.4" + custom_llm_provider="azure"
|
||||||
|
if "/" in primary_model:
|
||||||
|
_add(primary_model, provider_prefix)
|
||||||
|
else:
|
||||||
|
_add(f"{provider_prefix}/{primary_model}", provider_prefix)
|
||||||
|
|
||||||
|
# Bare base_model (or model_name) with provider hint — handles entries
|
||||||
|
# the upstream map keys without a provider prefix (most ``gpt-*`` and
|
||||||
|
# ``claude-*`` entries do this).
|
||||||
|
if primary_model:
|
||||||
|
_add(primary_model, provider_prefix)
|
||||||
|
|
||||||
|
# Fallback to model_name when base_model differs (e.g. an Azure
|
||||||
|
# deployment whose model_name is the deployment id but base_model is the
|
||||||
|
# canonical OpenAI sku).
|
||||||
|
if bare_model and bare_model != primary_model:
|
||||||
|
if provider_prefix and "/" not in bare_model:
|
||||||
|
_add(f"{provider_prefix}/{bare_model}", provider_prefix)
|
||||||
|
_add(bare_model, provider_prefix)
|
||||||
|
_add(bare_model, None)
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
def derive_supports_image_input(
|
||||||
|
*,
|
||||||
|
provider: str | None = None,
|
||||||
|
model_name: str | None = None,
|
||||||
|
base_model: str | None = None,
|
||||||
|
custom_provider: str | None = None,
|
||||||
|
openrouter_input_modalities: Iterable[str] | None = None,
|
||||||
|
) -> bool:
|
||||||
|
"""Best-effort capability flag for the new-chat selector and catalog.
|
||||||
|
|
||||||
|
Resolution order (first definitive answer wins):
|
||||||
|
|
||||||
|
1. ``openrouter_input_modalities`` (when provided as a non-empty
|
||||||
|
iterable). OpenRouter exposes ``architecture.input_modalities`` per
|
||||||
|
model and that's the authoritative source for OR dynamic configs.
|
||||||
|
2. ``litellm.supports_vision`` against each candidate identifier from
|
||||||
|
:func:`_candidate_model_strings`. Returns True as soon as any
|
||||||
|
candidate confirms vision support.
|
||||||
|
3. Default ``True`` — the conservative-allow stance. An unknown /
|
||||||
|
newly-added / third-party-hosted model is *not* pre-judged. The
|
||||||
|
streaming safety net (:func:`is_known_text_only_chat_model`) is the
|
||||||
|
only place a False ever blocks; everywhere else, a False here would
|
||||||
|
just hide a usable model from the user.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the model can plausibly accept image input, False only when
|
||||||
|
OpenRouter explicitly says it can't.
|
||||||
|
"""
|
||||||
|
if openrouter_input_modalities is not None:
|
||||||
|
modalities = list(openrouter_input_modalities)
|
||||||
|
if modalities:
|
||||||
|
return "image" in modalities
|
||||||
|
# Empty list explicitly published by OR — treat as "no image".
|
||||||
|
return False
|
||||||
|
|
||||||
|
for model_string, custom_llm_provider in _candidate_model_strings(
|
||||||
|
provider=provider,
|
||||||
|
model_name=model_name,
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=custom_provider,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
if litellm.supports_vision(
|
||||||
|
model=model_string, custom_llm_provider=custom_llm_provider
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
"litellm.supports_vision raised for model=%s provider=%s: %s",
|
||||||
|
model_string,
|
||||||
|
custom_llm_provider,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Default-allow. ``is_known_text_only_chat_model`` is the strict gate.
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_known_text_only_chat_model(
|
||||||
|
*,
|
||||||
|
provider: str | None = None,
|
||||||
|
model_name: str | None = None,
|
||||||
|
base_model: str | None = None,
|
||||||
|
custom_provider: str | None = None,
|
||||||
|
) -> bool:
|
||||||
|
"""Strict opt-out probe for the streaming-task safety net.
|
||||||
|
|
||||||
|
Returns True only when LiteLLM's model map *explicitly* sets
|
||||||
|
``supports_vision=False`` for at least one candidate identifier. Missing
|
||||||
|
key, lookup exception, or ``supports_vision=True`` all return False so
|
||||||
|
the streaming task lets the request through. This is the inverse-default
|
||||||
|
of :func:`derive_supports_image_input`.
|
||||||
|
|
||||||
|
Why two functions
|
||||||
|
-----------------
|
||||||
|
The selector wants "show me everything that's plausibly capable" —
|
||||||
|
default-allow. The safety net wants "block only when I'm certain it
|
||||||
|
can't" — default-pass. Mixing the two intents in a single function
|
||||||
|
leads to the regression we're fixing here.
|
||||||
|
"""
|
||||||
|
for model_string, custom_llm_provider in _candidate_model_strings(
|
||||||
|
provider=provider,
|
||||||
|
model_name=model_name,
|
||||||
|
base_model=base_model,
|
||||||
|
custom_provider=custom_provider,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
info = litellm.get_model_info(
|
||||||
|
model=model_string, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
"litellm.get_model_info raised for model=%s provider=%s: %s",
|
||||||
|
model_string,
|
||||||
|
custom_llm_provider,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# ``ModelInfo`` is a TypedDict (dict at runtime). ``supports_vision``
|
||||||
|
# may be missing, None, True, or False. We only fire on explicit
|
||||||
|
# False — None / missing / True all mean "don't block".
|
||||||
|
try:
|
||||||
|
value = info.get("supports_vision") # type: ignore[union-attr]
|
||||||
|
except AttributeError:
|
||||||
|
value = None
|
||||||
|
if value is False:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"derive_supports_image_input",
|
||||||
|
"is_known_text_only_chat_model",
|
||||||
|
]
|
||||||
105
surfsense_backend/app/services/quota_checked_vision_llm.py
Normal file
105
surfsense_backend/app/services/quota_checked_vision_llm.py
Normal file
|
|
@ -0,0 +1,105 @@
|
||||||
|
"""
|
||||||
|
Vision LLM proxy that enforces premium credit quota on every ``ainvoke``.
|
||||||
|
|
||||||
|
Used by :func:`app.services.llm_service.get_vision_llm` so callers in the
|
||||||
|
indexing pipeline (file processors, connector indexers, etl pipeline) can
|
||||||
|
keep invoking the LLM exactly the way they do today — ``await llm.ainvoke(...)``
|
||||||
|
— without threading ``user_id`` through every parser. The wrapper looks like
|
||||||
|
a chat model from the outside; on the inside it routes each call through
|
||||||
|
``billable_call`` so the user's premium credit pool is reserved → finalized
|
||||||
|
or released, and a ``TokenUsage`` audit row is written.
|
||||||
|
|
||||||
|
Free configs are returned unwrapped from ``get_vision_llm`` (they do not
|
||||||
|
need quota enforcement) so this class only ever wraps premium configs.
|
||||||
|
|
||||||
|
Why a wrapper instead of plumbing ``user_id`` through every caller:
|
||||||
|
|
||||||
|
* The indexer ecosystem has 8+ entry points (Google Drive, OneDrive,
|
||||||
|
Dropbox, local-folder, file-processor, ETL pipeline) each calling
|
||||||
|
``parse_with_vision_llm(...)``. Adding a ``user_id`` argument to each is
|
||||||
|
invasive, error-prone, and easy for a future indexer to forget.
|
||||||
|
* Per the design (issue M), we always debit the *search-space owner*, not
|
||||||
|
the triggering user, so ``user_id`` is fully derivable from the search
|
||||||
|
space the caller is already operating on. The wrapper captures it once
|
||||||
|
at construction time.
|
||||||
|
* ``langchain_litellm.ChatLiteLLM`` has no public hook for "before each
|
||||||
|
call run this coroutine"; subclassing isn't safe across versions because
|
||||||
|
it derives from ``BaseChatModel`` which expects specific Pydantic shapes.
|
||||||
|
Composition via attribute proxying (``__getattr__``) is robust to
|
||||||
|
upstream changes — every method other than ``ainvoke`` falls through to
|
||||||
|
the inner LLM unchanged.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from app.services.billable_calls import QuotaInsufficientError, billable_call
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class QuotaCheckedVisionLLM:
|
||||||
|
"""Composition wrapper around a langchain chat model that enforces
|
||||||
|
premium credit quota on every ``ainvoke``.
|
||||||
|
|
||||||
|
Anything other than ``ainvoke`` is forwarded to the inner model so
|
||||||
|
``invoke`` (sync), ``astream``, ``with_structured_output``, etc. all
|
||||||
|
still work — they simply bypass quota enforcement, which is fine
|
||||||
|
because the indexing pipeline only ever calls ``ainvoke`` today.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
inner_llm: Any,
|
||||||
|
*,
|
||||||
|
user_id: UUID,
|
||||||
|
search_space_id: int,
|
||||||
|
billing_tier: str,
|
||||||
|
base_model: str,
|
||||||
|
quota_reserve_tokens: int | None,
|
||||||
|
usage_type: str = "vision_extraction",
|
||||||
|
) -> None:
|
||||||
|
self._inner = inner_llm
|
||||||
|
self._user_id = user_id
|
||||||
|
self._search_space_id = search_space_id
|
||||||
|
self._billing_tier = billing_tier
|
||||||
|
self._base_model = base_model
|
||||||
|
self._quota_reserve_tokens = quota_reserve_tokens
|
||||||
|
self._usage_type = usage_type
|
||||||
|
|
||||||
|
async def ainvoke(self, input: Any, *args: Any, **kwargs: Any) -> Any:
|
||||||
|
"""Proxied async invoke that runs the underlying call inside
|
||||||
|
``billable_call``.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
QuotaInsufficientError: when the user has exhausted their
|
||||||
|
premium credit pool. Caller (``etl_pipeline_service._extract_image``)
|
||||||
|
catches this and falls back to the document parser.
|
||||||
|
"""
|
||||||
|
async with billable_call(
|
||||||
|
user_id=self._user_id,
|
||||||
|
search_space_id=self._search_space_id,
|
||||||
|
billing_tier=self._billing_tier,
|
||||||
|
base_model=self._base_model,
|
||||||
|
quota_reserve_tokens=self._quota_reserve_tokens,
|
||||||
|
usage_type=self._usage_type,
|
||||||
|
call_details={"model": self._base_model},
|
||||||
|
):
|
||||||
|
return await self._inner.ainvoke(input, *args, **kwargs)
|
||||||
|
|
||||||
|
def __getattr__(self, name: str) -> Any:
|
||||||
|
"""Forward everything else (``invoke``, ``astream``, ``bind``,
|
||||||
|
``with_structured_output``, …) to the inner model.
|
||||||
|
|
||||||
|
``__getattr__`` is only consulted when the attribute is *not*
|
||||||
|
already found on the proxy, which is exactly the contract we
|
||||||
|
want — methods we override stay on the proxy, the rest fall
|
||||||
|
through.
|
||||||
|
"""
|
||||||
|
return getattr(self._inner, name)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["QuotaCheckedVisionLLM", "QuotaInsufficientError"]
|
||||||
|
|
@ -22,6 +22,71 @@ from app.config import config
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Per-call reservation estimator (USD micro-units)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Minimum reserve in micros so a user with $0.0001 left can still make a tiny
|
||||||
|
# request, and so models without registered pricing reserve at least
|
||||||
|
# something while the call runs (debited 0 at finalize anyway when their
|
||||||
|
# cost can't be resolved).
|
||||||
|
_QUOTA_MIN_RESERVE_MICROS = 100
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_call_reserve_micros(
|
||||||
|
*,
|
||||||
|
base_model: str,
|
||||||
|
quota_reserve_tokens: int | None,
|
||||||
|
) -> int:
|
||||||
|
"""Return the number of micro-USD to reserve for one premium call.
|
||||||
|
|
||||||
|
Computes a worst-case upper bound from LiteLLM's per-token pricing
|
||||||
|
table:
|
||||||
|
|
||||||
|
reserve_usd ≈ reserve_tokens x (input_cost + output_cost)
|
||||||
|
|
||||||
|
so the math scales with model cost — Claude Opus + 4K reserve_tokens
|
||||||
|
naturally reserves ≈ $0.36, while a cheap model reserves only a few
|
||||||
|
cents. Clamped to ``[_QUOTA_MIN_RESERVE_MICROS, QUOTA_MAX_RESERVE_MICROS]``
|
||||||
|
so a misconfigured "$1000/M" model can't lock the whole balance on
|
||||||
|
one call.
|
||||||
|
|
||||||
|
If ``litellm.get_model_info`` raises (model unknown) we fall back to
|
||||||
|
the floor — 100 micros / $0.0001 — which is enough to gate a sane
|
||||||
|
request without over-reserving for a model whose pricing the
|
||||||
|
operator hasn't declared yet.
|
||||||
|
"""
|
||||||
|
reserve_tokens = quota_reserve_tokens or config.QUOTA_MAX_RESERVE_PER_CALL
|
||||||
|
if reserve_tokens <= 0:
|
||||||
|
reserve_tokens = config.QUOTA_MAX_RESERVE_PER_CALL
|
||||||
|
|
||||||
|
try:
|
||||||
|
from litellm import get_model_info
|
||||||
|
|
||||||
|
info = get_model_info(base_model) if base_model else {}
|
||||||
|
input_cost = float(info.get("input_cost_per_token") or 0.0)
|
||||||
|
output_cost = float(info.get("output_cost_per_token") or 0.0)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
"[quota_reserve] cost lookup failed for base_model=%s: %s",
|
||||||
|
base_model,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
input_cost = 0.0
|
||||||
|
output_cost = 0.0
|
||||||
|
|
||||||
|
if input_cost == 0.0 and output_cost == 0.0:
|
||||||
|
return _QUOTA_MIN_RESERVE_MICROS
|
||||||
|
|
||||||
|
reserve_usd = reserve_tokens * (input_cost + output_cost)
|
||||||
|
reserve_micros = round(reserve_usd * 1_000_000)
|
||||||
|
if reserve_micros < _QUOTA_MIN_RESERVE_MICROS:
|
||||||
|
reserve_micros = _QUOTA_MIN_RESERVE_MICROS
|
||||||
|
if reserve_micros > config.QUOTA_MAX_RESERVE_MICROS:
|
||||||
|
reserve_micros = config.QUOTA_MAX_RESERVE_MICROS
|
||||||
|
return reserve_micros
|
||||||
|
|
||||||
|
|
||||||
class QuotaScope(StrEnum):
|
class QuotaScope(StrEnum):
|
||||||
ANONYMOUS = "anonymous"
|
ANONYMOUS = "anonymous"
|
||||||
PREMIUM = "premium"
|
PREMIUM = "premium"
|
||||||
|
|
@ -444,8 +509,16 @@ class TokenQuotaService:
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
user_id: Any,
|
user_id: Any,
|
||||||
request_id: str,
|
request_id: str,
|
||||||
reserve_tokens: int,
|
reserve_micros: int,
|
||||||
) -> QuotaResult:
|
) -> QuotaResult:
|
||||||
|
"""Reserve ``reserve_micros`` (USD micro-units) from the user's
|
||||||
|
premium credit balance.
|
||||||
|
|
||||||
|
``QuotaResult.used``/``limit``/``reserved``/``remaining`` are
|
||||||
|
all in micro-USD on this code path; callers (chat stream,
|
||||||
|
token-status route, FE display) convert to dollars by dividing
|
||||||
|
by 1_000_000.
|
||||||
|
"""
|
||||||
from app.db import User
|
from app.db import User
|
||||||
|
|
||||||
user = (
|
user = (
|
||||||
|
|
@ -465,11 +538,11 @@ class TokenQuotaService:
|
||||||
limit=0,
|
limit=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
limit = user.premium_tokens_limit
|
limit = user.premium_credit_micros_limit
|
||||||
used = user.premium_tokens_used
|
used = user.premium_credit_micros_used
|
||||||
reserved = user.premium_tokens_reserved
|
reserved = user.premium_credit_micros_reserved
|
||||||
|
|
||||||
effective = used + reserved + reserve_tokens
|
effective = used + reserved + reserve_micros
|
||||||
if effective > limit:
|
if effective > limit:
|
||||||
remaining = max(0, limit - used - reserved)
|
remaining = max(0, limit - used - reserved)
|
||||||
await db_session.rollback()
|
await db_session.rollback()
|
||||||
|
|
@ -482,10 +555,10 @@ class TokenQuotaService:
|
||||||
remaining=remaining,
|
remaining=remaining,
|
||||||
)
|
)
|
||||||
|
|
||||||
user.premium_tokens_reserved = reserved + reserve_tokens
|
user.premium_credit_micros_reserved = reserved + reserve_micros
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
|
|
||||||
new_reserved = reserved + reserve_tokens
|
new_reserved = reserved + reserve_micros
|
||||||
remaining = max(0, limit - used - new_reserved)
|
remaining = max(0, limit - used - new_reserved)
|
||||||
warning_threshold = int(limit * 0.8)
|
warning_threshold = int(limit * 0.8)
|
||||||
|
|
||||||
|
|
@ -510,9 +583,12 @@ class TokenQuotaService:
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
user_id: Any,
|
user_id: Any,
|
||||||
request_id: str,
|
request_id: str,
|
||||||
actual_tokens: int,
|
actual_micros: int,
|
||||||
reserved_tokens: int,
|
reserved_micros: int,
|
||||||
) -> QuotaResult:
|
) -> QuotaResult:
|
||||||
|
"""Settle the reservation: release ``reserved_micros`` and debit
|
||||||
|
``actual_micros`` (the LiteLLM-reported provider cost in micro-USD).
|
||||||
|
"""
|
||||||
from app.db import User
|
from app.db import User
|
||||||
|
|
||||||
user = (
|
user = (
|
||||||
|
|
@ -529,16 +605,18 @@ class TokenQuotaService:
|
||||||
allowed=False, status=QuotaStatus.BLOCKED, used=0, limit=0
|
allowed=False, status=QuotaStatus.BLOCKED, used=0, limit=0
|
||||||
)
|
)
|
||||||
|
|
||||||
user.premium_tokens_reserved = max(
|
user.premium_credit_micros_reserved = max(
|
||||||
0, user.premium_tokens_reserved - reserved_tokens
|
0, user.premium_credit_micros_reserved - reserved_micros
|
||||||
|
)
|
||||||
|
user.premium_credit_micros_used = (
|
||||||
|
user.premium_credit_micros_used + actual_micros
|
||||||
)
|
)
|
||||||
user.premium_tokens_used = user.premium_tokens_used + actual_tokens
|
|
||||||
|
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
|
|
||||||
limit = user.premium_tokens_limit
|
limit = user.premium_credit_micros_limit
|
||||||
used = user.premium_tokens_used
|
used = user.premium_credit_micros_used
|
||||||
reserved = user.premium_tokens_reserved
|
reserved = user.premium_credit_micros_reserved
|
||||||
remaining = max(0, limit - used - reserved)
|
remaining = max(0, limit - used - reserved)
|
||||||
|
|
||||||
warning_threshold = int(limit * 0.8)
|
warning_threshold = int(limit * 0.8)
|
||||||
|
|
@ -562,8 +640,13 @@ class TokenQuotaService:
|
||||||
async def premium_release(
|
async def premium_release(
|
||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
user_id: Any,
|
user_id: Any,
|
||||||
reserved_tokens: int,
|
reserved_micros: int,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Release ``reserved_micros`` previously held by ``premium_reserve``.
|
||||||
|
|
||||||
|
Used when a request fails before finalize (so the reservation
|
||||||
|
doesn't leak credit).
|
||||||
|
"""
|
||||||
from app.db import User
|
from app.db import User
|
||||||
|
|
||||||
user = (
|
user = (
|
||||||
|
|
@ -576,8 +659,8 @@ class TokenQuotaService:
|
||||||
.scalar_one_or_none()
|
.scalar_one_or_none()
|
||||||
)
|
)
|
||||||
if user is not None:
|
if user is not None:
|
||||||
user.premium_tokens_reserved = max(
|
user.premium_credit_micros_reserved = max(
|
||||||
0, user.premium_tokens_reserved - reserved_tokens
|
0, user.premium_credit_micros_reserved - reserved_micros
|
||||||
)
|
)
|
||||||
await db_session.commit()
|
await db_session.commit()
|
||||||
|
|
||||||
|
|
@ -598,9 +681,9 @@ class TokenQuotaService:
|
||||||
allowed=False, status=QuotaStatus.BLOCKED, used=0, limit=0
|
allowed=False, status=QuotaStatus.BLOCKED, used=0, limit=0
|
||||||
)
|
)
|
||||||
|
|
||||||
limit = user.premium_tokens_limit
|
limit = user.premium_credit_micros_limit
|
||||||
used = user.premium_tokens_used
|
used = user.premium_credit_micros_used
|
||||||
reserved = user.premium_tokens_reserved
|
reserved = user.premium_credit_micros_reserved
|
||||||
remaining = max(0, limit - used - reserved)
|
remaining = max(0, limit - used - reserved)
|
||||||
|
|
||||||
warning_threshold = int(limit * 0.8)
|
warning_threshold = int(limit * 0.8)
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,14 @@ from __future__ import annotations
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import logging
|
import logging
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
|
import litellm
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
|
|
@ -35,6 +38,8 @@ class TokenCallRecord:
|
||||||
prompt_tokens: int
|
prompt_tokens: int
|
||||||
completion_tokens: int
|
completion_tokens: int
|
||||||
total_tokens: int
|
total_tokens: int
|
||||||
|
cost_micros: int = 0
|
||||||
|
call_kind: str = "chat"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -49,6 +54,8 @@ class TurnTokenAccumulator:
|
||||||
prompt_tokens: int,
|
prompt_tokens: int,
|
||||||
completion_tokens: int,
|
completion_tokens: int,
|
||||||
total_tokens: int,
|
total_tokens: int,
|
||||||
|
cost_micros: int = 0,
|
||||||
|
call_kind: str = "chat",
|
||||||
) -> None:
|
) -> None:
|
||||||
self.calls.append(
|
self.calls.append(
|
||||||
TokenCallRecord(
|
TokenCallRecord(
|
||||||
|
|
@ -56,20 +63,28 @@ class TurnTokenAccumulator:
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=total_tokens,
|
total_tokens=total_tokens,
|
||||||
|
cost_micros=cost_micros,
|
||||||
|
call_kind=call_kind,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def per_message_summary(self) -> dict[str, dict[str, int]]:
|
def per_message_summary(self) -> dict[str, dict[str, int]]:
|
||||||
"""Return token counts grouped by model name."""
|
"""Return token counts (and cost) grouped by model name."""
|
||||||
by_model: dict[str, dict[str, int]] = {}
|
by_model: dict[str, dict[str, int]] = {}
|
||||||
for c in self.calls:
|
for c in self.calls:
|
||||||
entry = by_model.setdefault(
|
entry = by_model.setdefault(
|
||||||
c.model,
|
c.model,
|
||||||
{"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
{
|
||||||
|
"prompt_tokens": 0,
|
||||||
|
"completion_tokens": 0,
|
||||||
|
"total_tokens": 0,
|
||||||
|
"cost_micros": 0,
|
||||||
|
},
|
||||||
)
|
)
|
||||||
entry["prompt_tokens"] += c.prompt_tokens
|
entry["prompt_tokens"] += c.prompt_tokens
|
||||||
entry["completion_tokens"] += c.completion_tokens
|
entry["completion_tokens"] += c.completion_tokens
|
||||||
entry["total_tokens"] += c.total_tokens
|
entry["total_tokens"] += c.total_tokens
|
||||||
|
entry["cost_micros"] += c.cost_micros
|
||||||
return by_model
|
return by_model
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -84,6 +99,21 @@ class TurnTokenAccumulator:
|
||||||
def total_completion_tokens(self) -> int:
|
def total_completion_tokens(self) -> int:
|
||||||
return sum(c.completion_tokens for c in self.calls)
|
return sum(c.completion_tokens for c in self.calls)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_cost_micros(self) -> int:
|
||||||
|
"""Sum of per-call ``cost_micros`` across the entire turn.
|
||||||
|
|
||||||
|
Used by ``stream_new_chat`` to debit a premium turn's actual
|
||||||
|
provider cost (in micro-USD) from the user's premium credit
|
||||||
|
balance. ``cost_micros`` per call is captured by
|
||||||
|
``TokenTrackingCallback.async_log_success_event`` from
|
||||||
|
``kwargs["response_cost"]`` (LiteLLM's auto-calculated cost),
|
||||||
|
with multiple fallback paths so OpenRouter dynamic models and
|
||||||
|
custom Azure deployments still bill correctly when our
|
||||||
|
``pricing_registration`` ran at startup.
|
||||||
|
"""
|
||||||
|
return sum(c.cost_micros for c in self.calls)
|
||||||
|
|
||||||
def serialized_calls(self) -> list[dict[str, Any]]:
|
def serialized_calls(self) -> list[dict[str, Any]]:
|
||||||
return [dataclasses.asdict(c) for c in self.calls]
|
return [dataclasses.asdict(c) for c in self.calls]
|
||||||
|
|
||||||
|
|
@ -94,7 +124,14 @@ _turn_accumulator: ContextVar[TurnTokenAccumulator | None] = ContextVar(
|
||||||
|
|
||||||
|
|
||||||
def start_turn() -> TurnTokenAccumulator:
|
def start_turn() -> TurnTokenAccumulator:
|
||||||
"""Create a fresh accumulator for the current async context and return it."""
|
"""Create a fresh accumulator for the current async context and return it.
|
||||||
|
|
||||||
|
NOTE: Used by ``stream_new_chat`` for the long-lived chat turn. For
|
||||||
|
short-lived per-call billable wrappers (image generation REST endpoint,
|
||||||
|
vision LLM during indexing) prefer :func:`scoped_turn`, which uses a
|
||||||
|
ContextVar reset token to restore the *previous* accumulator on exit and
|
||||||
|
avoids leaking call records across reservations (issue B).
|
||||||
|
"""
|
||||||
acc = TurnTokenAccumulator()
|
acc = TurnTokenAccumulator()
|
||||||
_turn_accumulator.set(acc)
|
_turn_accumulator.set(acc)
|
||||||
logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc))
|
logger.info("[TokenTracking] start_turn: new accumulator created (id=%s)", id(acc))
|
||||||
|
|
@ -105,6 +142,140 @@ def get_current_accumulator() -> TurnTokenAccumulator | None:
|
||||||
return _turn_accumulator.get()
|
return _turn_accumulator.get()
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def scoped_turn() -> AsyncIterator[TurnTokenAccumulator]:
|
||||||
|
"""Async context manager that scopes a fresh ``TurnTokenAccumulator``
|
||||||
|
for the duration of the ``async with`` block, then *resets* the
|
||||||
|
ContextVar to its previous value on exit.
|
||||||
|
|
||||||
|
This is the safe primitive for per-call billable operations
|
||||||
|
(image generation, vision LLM extraction, podcasts) that may run
|
||||||
|
inside an outer chat turn or be called sequentially from the same
|
||||||
|
background worker. Using ``ContextVar.set`` without ``reset`` (as
|
||||||
|
:func:`start_turn` does) would leak the inner accumulator into the
|
||||||
|
outer scope, causing the outer chat turn to debit cost twice.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
async with scoped_turn() as acc:
|
||||||
|
await llm.ainvoke(...)
|
||||||
|
# acc.total_cost_micros captures cost from the LiteLLM callback
|
||||||
|
# Outer accumulator (if any) is restored here.
|
||||||
|
"""
|
||||||
|
acc = TurnTokenAccumulator()
|
||||||
|
token = _turn_accumulator.set(acc)
|
||||||
|
logger.debug(
|
||||||
|
"[TokenTracking] scoped_turn: enter (acc id=%s, prev token=%s)",
|
||||||
|
id(acc),
|
||||||
|
token,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
yield acc
|
||||||
|
finally:
|
||||||
|
_turn_accumulator.reset(token)
|
||||||
|
logger.debug(
|
||||||
|
"[TokenTracking] scoped_turn: exit (acc id=%s captured %d call(s), %d micros total)",
|
||||||
|
id(acc),
|
||||||
|
len(acc.calls),
|
||||||
|
acc.total_cost_micros,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_cost_usd(
|
||||||
|
kwargs: dict[str, Any],
|
||||||
|
response_obj: Any,
|
||||||
|
model: str,
|
||||||
|
prompt_tokens: int,
|
||||||
|
completion_tokens: int,
|
||||||
|
is_image: bool = False,
|
||||||
|
) -> float:
|
||||||
|
"""Best-effort USD cost extraction for a single LLM/image call.
|
||||||
|
|
||||||
|
Tries four sources in priority order and returns the first that
|
||||||
|
yields a positive number; returns 0.0 if all four fail (the call
|
||||||
|
will then debit nothing from the user's balance — fail-safe).
|
||||||
|
|
||||||
|
Sources:
|
||||||
|
1. ``kwargs["response_cost"]`` — LiteLLM's standard callback
|
||||||
|
field, populated for ``Router.acompletion`` since PR #12500.
|
||||||
|
2. ``response_obj._hidden_params["response_cost"]`` — same value
|
||||||
|
exposed on the response itself.
|
||||||
|
3. ``litellm.completion_cost(completion_response=response_obj)``
|
||||||
|
— recompute from the response and LiteLLM's pricing table.
|
||||||
|
4. ``litellm.cost_per_token(model, prompt_tokens, completion_tokens)``
|
||||||
|
— manual fallback for OpenRouter/custom-Azure models that
|
||||||
|
only resolve via aliases registered by
|
||||||
|
``pricing_registration`` at startup. **Skipped for image
|
||||||
|
responses** — ``cost_per_token`` does not support ``ImageResponse``
|
||||||
|
and would raise; the cost map for image-gen lives in different
|
||||||
|
keys (``output_cost_per_image``) handled by ``completion_cost``.
|
||||||
|
"""
|
||||||
|
cost = kwargs.get("response_cost")
|
||||||
|
if cost is not None:
|
||||||
|
try:
|
||||||
|
value = float(cost)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
value = 0.0
|
||||||
|
if value > 0:
|
||||||
|
return value
|
||||||
|
|
||||||
|
hidden = getattr(response_obj, "_hidden_params", None) or {}
|
||||||
|
if isinstance(hidden, dict):
|
||||||
|
cost = hidden.get("response_cost")
|
||||||
|
if cost is not None:
|
||||||
|
try:
|
||||||
|
value = float(cost)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
value = 0.0
|
||||||
|
if value > 0:
|
||||||
|
return value
|
||||||
|
|
||||||
|
try:
|
||||||
|
value = float(litellm.completion_cost(completion_response=response_obj))
|
||||||
|
if value > 0:
|
||||||
|
return value
|
||||||
|
except Exception as exc:
|
||||||
|
if is_image:
|
||||||
|
# Image-gen path: OpenRouter's image responses can omit
|
||||||
|
# ``usage.cost`` and LiteLLM's ``default_image_cost_calculator``
|
||||||
|
# then *raises* (no cost map for OpenRouter image models).
|
||||||
|
# Bail out with a warning rather than falling through to
|
||||||
|
# cost_per_token (which is also incompatible with ImageResponse).
|
||||||
|
logger.warning(
|
||||||
|
"[TokenTracking] completion_cost failed for image model=%s "
|
||||||
|
"(provider may have omitted usage.cost). Debiting 0. "
|
||||||
|
"Cause: %s",
|
||||||
|
model,
|
||||||
|
exc,
|
||||||
|
)
|
||||||
|
return 0.0
|
||||||
|
logger.debug(
|
||||||
|
"[TokenTracking] completion_cost failed for model=%s: %s", model, exc
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_image:
|
||||||
|
# Never call cost_per_token for ImageResponse — keys mismatch and
|
||||||
|
# the function is documented chat-only.
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
if model and (prompt_tokens > 0 or completion_tokens > 0):
|
||||||
|
try:
|
||||||
|
prompt_cost, completion_cost = litellm.cost_per_token(
|
||||||
|
model=model,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
)
|
||||||
|
value = float(prompt_cost) + float(completion_cost)
|
||||||
|
if value > 0:
|
||||||
|
return value
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(
|
||||||
|
"[TokenTracking] cost_per_token failed for model=%s: %s", model, exc
|
||||||
|
)
|
||||||
|
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
class TokenTrackingCallback(CustomLogger):
|
class TokenTrackingCallback(CustomLogger):
|
||||||
"""LiteLLM callback that captures token usage into the turn accumulator."""
|
"""LiteLLM callback that captures token usage into the turn accumulator."""
|
||||||
|
|
||||||
|
|
@ -122,6 +293,13 @@ class TokenTrackingCallback(CustomLogger):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Detect image generation responses — they have a different usage
|
||||||
|
# shape (ImageUsage with input_tokens/output_tokens) and require a
|
||||||
|
# different cost-extraction path. We probe by class name to avoid a
|
||||||
|
# hard import dependency on litellm internals.
|
||||||
|
response_cls = type(response_obj).__name__
|
||||||
|
is_image = response_cls == "ImageResponse"
|
||||||
|
|
||||||
usage = getattr(response_obj, "usage", None)
|
usage = getattr(response_obj, "usage", None)
|
||||||
if not usage:
|
if not usage:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
|
|
@ -129,24 +307,66 @@ class TokenTrackingCallback(CustomLogger):
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if is_image:
|
||||||
|
# ``ImageUsage`` exposes ``input_tokens`` / ``output_tokens``
|
||||||
|
# (not prompt_tokens/completion_tokens). Several providers
|
||||||
|
# populate only one or neither (e.g. OpenRouter's gpt-image-1
|
||||||
|
# passes through `input_tokens` from the prompt but no
|
||||||
|
# completion); fall through gracefully to 0.
|
||||||
|
prompt_tokens = getattr(usage, "input_tokens", 0) or 0
|
||||||
|
completion_tokens = getattr(usage, "output_tokens", 0) or 0
|
||||||
|
total_tokens = (
|
||||||
|
getattr(usage, "total_tokens", 0) or prompt_tokens + completion_tokens
|
||||||
|
)
|
||||||
|
call_kind = "image_generation"
|
||||||
|
else:
|
||||||
prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
||||||
completion_tokens = getattr(usage, "completion_tokens", 0) or 0
|
completion_tokens = getattr(usage, "completion_tokens", 0) or 0
|
||||||
total_tokens = getattr(usage, "total_tokens", 0) or 0
|
total_tokens = getattr(usage, "total_tokens", 0) or 0
|
||||||
|
call_kind = "chat"
|
||||||
|
|
||||||
model = kwargs.get("model", "unknown")
|
model = kwargs.get("model", "unknown")
|
||||||
|
|
||||||
|
cost_usd = _extract_cost_usd(
|
||||||
|
kwargs=kwargs,
|
||||||
|
response_obj=response_obj,
|
||||||
|
model=model,
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
completion_tokens=completion_tokens,
|
||||||
|
is_image=is_image,
|
||||||
|
)
|
||||||
|
cost_micros = round(cost_usd * 1_000_000) if cost_usd > 0 else 0
|
||||||
|
|
||||||
|
if cost_micros == 0 and (prompt_tokens > 0 or completion_tokens > 0):
|
||||||
|
logger.warning(
|
||||||
|
"[TokenTracking] No cost resolved for model=%s prompt=%d completion=%d "
|
||||||
|
"kind=%s — debiting 0. Register pricing via pricing_registration or YAML "
|
||||||
|
"input_cost_per_token/output_cost_per_token (or rely on response_cost "
|
||||||
|
"for image generation).",
|
||||||
|
model,
|
||||||
|
prompt_tokens,
|
||||||
|
completion_tokens,
|
||||||
|
call_kind,
|
||||||
|
)
|
||||||
|
|
||||||
acc.add(
|
acc.add(
|
||||||
model=model,
|
model=model,
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=total_tokens,
|
total_tokens=total_tokens,
|
||||||
|
cost_micros=cost_micros,
|
||||||
|
call_kind=call_kind,
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
"[TokenTracking] Captured: model=%s prompt=%d completion=%d total=%d (accumulator now has %d calls)",
|
"[TokenTracking] Captured: model=%s kind=%s prompt=%d completion=%d total=%d "
|
||||||
|
"cost=$%.6f (%d micros) (accumulator now has %d calls)",
|
||||||
model,
|
model,
|
||||||
|
call_kind,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
|
cost_usd,
|
||||||
|
cost_micros,
|
||||||
len(acc.calls),
|
len(acc.calls),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -168,6 +388,7 @@ async def record_token_usage(
|
||||||
prompt_tokens: int = 0,
|
prompt_tokens: int = 0,
|
||||||
completion_tokens: int = 0,
|
completion_tokens: int = 0,
|
||||||
total_tokens: int = 0,
|
total_tokens: int = 0,
|
||||||
|
cost_micros: int = 0,
|
||||||
model_breakdown: dict[str, Any] | None = None,
|
model_breakdown: dict[str, Any] | None = None,
|
||||||
call_details: dict[str, Any] | None = None,
|
call_details: dict[str, Any] | None = None,
|
||||||
thread_id: int | None = None,
|
thread_id: int | None = None,
|
||||||
|
|
@ -185,6 +406,7 @@ async def record_token_usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
total_tokens=total_tokens,
|
total_tokens=total_tokens,
|
||||||
|
cost_micros=cost_micros,
|
||||||
model_breakdown=model_breakdown,
|
model_breakdown=model_breakdown,
|
||||||
call_details=call_details,
|
call_details=call_details,
|
||||||
thread_id=thread_id,
|
thread_id=thread_id,
|
||||||
|
|
@ -194,11 +416,12 @@ async def record_token_usage(
|
||||||
)
|
)
|
||||||
session.add(record)
|
session.add(record)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d",
|
"[TokenTracking] recorded %s usage: prompt=%d completion=%d total=%d cost_micros=%d",
|
||||||
usage_type,
|
usage_type,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
total_tokens,
|
total_tokens,
|
||||||
|
cost_micros,
|
||||||
)
|
)
|
||||||
return record
|
return record
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ from typing import Any
|
||||||
|
|
||||||
from litellm import Router
|
from litellm import Router
|
||||||
|
|
||||||
|
from app.services.provider_api_base import resolve_api_base
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
VISION_AUTO_MODE_ID = 0
|
VISION_AUTO_MODE_ID = 0
|
||||||
|
|
@ -108,10 +110,11 @@ class VisionLLMRouterService:
|
||||||
if not config.get("model_name") or not config.get("api_key"):
|
if not config.get("model_name") or not config.get("api_key"):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if config.get("custom_provider"):
|
|
||||||
model_string = f"{config['custom_provider']}/{config['model_name']}"
|
|
||||||
else:
|
|
||||||
provider = config.get("provider", "").upper()
|
provider = config.get("provider", "").upper()
|
||||||
|
if config.get("custom_provider"):
|
||||||
|
provider_prefix = config["custom_provider"]
|
||||||
|
model_string = f"{provider_prefix}/{config['model_name']}"
|
||||||
|
else:
|
||||||
provider_prefix = VISION_PROVIDER_MAP.get(provider, provider.lower())
|
provider_prefix = VISION_PROVIDER_MAP.get(provider, provider.lower())
|
||||||
model_string = f"{provider_prefix}/{config['model_name']}"
|
model_string = f"{provider_prefix}/{config['model_name']}"
|
||||||
|
|
||||||
|
|
@ -120,8 +123,13 @@ class VisionLLMRouterService:
|
||||||
"api_key": config.get("api_key"),
|
"api_key": config.get("api_key"),
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.get("api_base"):
|
api_base = resolve_api_base(
|
||||||
litellm_params["api_base"] = config["api_base"]
|
provider=provider,
|
||||||
|
provider_prefix=provider_prefix,
|
||||||
|
config_api_base=config.get("api_base"),
|
||||||
|
)
|
||||||
|
if api_base:
|
||||||
|
litellm_params["api_base"] = api_base
|
||||||
|
|
||||||
if config.get("api_version"):
|
if config.get("api_version"):
|
||||||
litellm_params["api_version"] = config["api_version"]
|
litellm_params["api_version"] = config["api_version"]
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,25 @@
|
||||||
"""Celery tasks package."""
|
"""Celery tasks package.
|
||||||
|
|
||||||
|
Also hosts the small helpers every async celery task should use to
|
||||||
|
spin up its event loop. See :func:`run_async_celery_task` for the
|
||||||
|
canonical pattern.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
|
import logging
|
||||||
|
from collections.abc import Awaitable, Callable
|
||||||
|
from typing import TypeVar
|
||||||
|
|
||||||
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||||
from sqlalchemy.pool import NullPool
|
from sqlalchemy.pool import NullPool
|
||||||
|
|
||||||
from app.config import config
|
from app.config import config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
_celery_engine = None
|
_celery_engine = None
|
||||||
_celery_session_maker = None
|
_celery_session_maker = None
|
||||||
|
|
||||||
|
|
@ -26,3 +41,86 @@ def get_celery_session_maker() -> async_sessionmaker:
|
||||||
_celery_engine, expire_on_commit=False
|
_celery_engine, expire_on_commit=False
|
||||||
)
|
)
|
||||||
return _celery_session_maker
|
return _celery_session_maker
|
||||||
|
|
||||||
|
|
||||||
|
def _dispose_shared_db_engine(loop: asyncio.AbstractEventLoop) -> None:
|
||||||
|
"""Drop the shared ``app.db.engine`` connection pool synchronously.
|
||||||
|
|
||||||
|
The shared engine (used by ``shielded_async_session`` and most
|
||||||
|
routes / services) is a module-level singleton with a real pool.
|
||||||
|
Each celery task creates a fresh ``asyncio`` event loop; asyncpg
|
||||||
|
connections cache a reference to whichever loop opened them. When
|
||||||
|
a subsequent task's loop pulls a stale connection from the pool,
|
||||||
|
SQLAlchemy's ``pool_pre_ping`` checkout crashes with::
|
||||||
|
|
||||||
|
AttributeError: 'NoneType' object has no attribute 'send'
|
||||||
|
File ".../asyncio/proactor_events.py", line 402, in _loop_writing
|
||||||
|
self._write_fut = self._loop._proactor.send(self._sock, data)
|
||||||
|
|
||||||
|
or hangs forever inside the asyncpg ``Connection._cancel`` cleanup
|
||||||
|
coroutine that can never run because its loop is gone.
|
||||||
|
|
||||||
|
Disposing the engine forces the pool to drop every cached
|
||||||
|
connection so the next checkout opens a fresh one on the current
|
||||||
|
loop. Safe to call from a task's finally block; failure is logged
|
||||||
|
but never propagated.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from app.db import engine as shared_engine
|
||||||
|
|
||||||
|
loop.run_until_complete(shared_engine.dispose())
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Shared DB engine dispose() failed", exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
def run_async_celery_task[T](coro_factory: Callable[[], Awaitable[T]]) -> T:
|
||||||
|
"""Run an async coroutine inside a fresh event loop with proper
|
||||||
|
DB-engine cleanup.
|
||||||
|
|
||||||
|
This is the canonical entry point for every async celery task.
|
||||||
|
It performs three responsibilities that were previously copy-pasted
|
||||||
|
(incorrectly) across each task module:
|
||||||
|
|
||||||
|
1. Create a fresh ``asyncio`` loop and install it on the current
|
||||||
|
thread (celery's ``--pool=solo`` runs every task on the main
|
||||||
|
thread, but other pool types don't).
|
||||||
|
2. Dispose the shared ``app.db.engine`` BEFORE the task runs so
|
||||||
|
any stale connections left over from a previous task's loop
|
||||||
|
are dropped — defends against tasks that crashed without
|
||||||
|
cleaning up.
|
||||||
|
3. Dispose the shared engine AFTER the task runs so the
|
||||||
|
connections we opened on this loop are released before the
|
||||||
|
loop closes (avoids ``coroutine 'Connection._cancel' was
|
||||||
|
never awaited`` warnings and the next-task hang).
|
||||||
|
|
||||||
|
Use as::
|
||||||
|
|
||||||
|
@celery_app.task(name="my_task", bind=True)
|
||||||
|
def my_task(self, *args):
|
||||||
|
return run_async_celery_task(lambda: _my_task_impl(*args))
|
||||||
|
"""
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
asyncio.set_event_loop(loop)
|
||||||
|
try:
|
||||||
|
# Defense-in-depth: prior task may have crashed before
|
||||||
|
# disposing. Idempotent — no-op if pool is already empty.
|
||||||
|
_dispose_shared_db_engine(loop)
|
||||||
|
return loop.run_until_complete(coro_factory())
|
||||||
|
finally:
|
||||||
|
# Drop any connections this task opened so they don't leak
|
||||||
|
# into the next task's loop.
|
||||||
|
_dispose_shared_db_engine(loop)
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
loop.run_until_complete(loop.shutdown_asyncgens())
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
asyncio.set_event_loop(None)
|
||||||
|
loop.close()
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"get_celery_session_maker",
|
||||||
|
"run_async_celery_task",
|
||||||
|
]
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import logging
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from app.celery_app import celery_app
|
from app.celery_app import celery_app
|
||||||
from app.tasks.celery_tasks import get_celery_session_maker
|
from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -49,22 +49,15 @@ def index_notion_pages_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Notion pages."""
|
"""Celery task to index Notion pages."""
|
||||||
import asyncio
|
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(
|
return run_async_celery_task(
|
||||||
_index_notion_pages(
|
lambda: _index_notion_pages(
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_handle_greenlet_error(e, "index_notion_pages", connector_id)
|
_handle_greenlet_error(e, "index_notion_pages", connector_id)
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_notion_pages(
|
async def _index_notion_pages(
|
||||||
|
|
@ -95,19 +88,11 @@ def index_github_repos_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index GitHub repositories."""
|
"""Celery task to index GitHub repositories."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_github_repos(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_github_repos(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_github_repos(
|
async def _index_github_repos(
|
||||||
|
|
@ -138,19 +123,11 @@ def index_confluence_pages_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Confluence pages."""
|
"""Celery task to index Confluence pages."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_confluence_pages(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_confluence_pages(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_confluence_pages(
|
async def _index_confluence_pages(
|
||||||
|
|
@ -181,22 +158,15 @@ def index_google_calendar_events_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Google Calendar events."""
|
"""Celery task to index Google Calendar events."""
|
||||||
import asyncio
|
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(
|
return run_async_celery_task(
|
||||||
_index_google_calendar_events(
|
lambda: _index_google_calendar_events(
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_handle_greenlet_error(e, "index_google_calendar_events", connector_id)
|
_handle_greenlet_error(e, "index_google_calendar_events", connector_id)
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_google_calendar_events(
|
async def _index_google_calendar_events(
|
||||||
|
|
@ -227,19 +197,11 @@ def index_google_gmail_messages_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Google Gmail messages."""
|
"""Celery task to index Google Gmail messages."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_google_gmail_messages(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_google_gmail_messages(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_google_gmail_messages(
|
async def _index_google_gmail_messages(
|
||||||
|
|
@ -269,22 +231,14 @@ def index_google_drive_files_task(
|
||||||
items_dict: dict, # Dictionary with 'folders', 'files', and 'indexing_options'
|
items_dict: dict, # Dictionary with 'folders', 'files', and 'indexing_options'
|
||||||
):
|
):
|
||||||
"""Celery task to index Google Drive folders and files."""
|
"""Celery task to index Google Drive folders and files."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_google_drive_files(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_google_drive_files(
|
|
||||||
connector_id,
|
connector_id,
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
items_dict,
|
items_dict,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_google_drive_files(
|
async def _index_google_drive_files(
|
||||||
|
|
@ -317,22 +271,14 @@ def index_onedrive_files_task(
|
||||||
items_dict: dict,
|
items_dict: dict,
|
||||||
):
|
):
|
||||||
"""Celery task to index OneDrive folders and files."""
|
"""Celery task to index OneDrive folders and files."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_onedrive_files(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_onedrive_files(
|
|
||||||
connector_id,
|
connector_id,
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
items_dict,
|
items_dict,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_onedrive_files(
|
async def _index_onedrive_files(
|
||||||
|
|
@ -365,22 +311,14 @@ def index_dropbox_files_task(
|
||||||
items_dict: dict,
|
items_dict: dict,
|
||||||
):
|
):
|
||||||
"""Celery task to index Dropbox folders and files."""
|
"""Celery task to index Dropbox folders and files."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_dropbox_files(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_dropbox_files(
|
|
||||||
connector_id,
|
connector_id,
|
||||||
search_space_id,
|
search_space_id,
|
||||||
user_id,
|
user_id,
|
||||||
items_dict,
|
items_dict,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_dropbox_files(
|
async def _index_dropbox_files(
|
||||||
|
|
@ -414,19 +352,11 @@ def index_elasticsearch_documents_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Elasticsearch documents."""
|
"""Celery task to index Elasticsearch documents."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_elasticsearch_documents(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_elasticsearch_documents(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_elasticsearch_documents(
|
async def _index_elasticsearch_documents(
|
||||||
|
|
@ -457,22 +387,15 @@ def index_crawled_urls_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index Web page Urls."""
|
"""Celery task to index Web page Urls."""
|
||||||
import asyncio
|
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(
|
return run_async_celery_task(
|
||||||
_index_crawled_urls(
|
lambda: _index_crawled_urls(
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
_handle_greenlet_error(e, "index_crawled_urls", connector_id)
|
_handle_greenlet_error(e, "index_crawled_urls", connector_id)
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_crawled_urls(
|
async def _index_crawled_urls(
|
||||||
|
|
@ -503,19 +426,11 @@ def index_bookstack_pages_task(
|
||||||
end_date: str,
|
end_date: str,
|
||||||
):
|
):
|
||||||
"""Celery task to index BookStack pages."""
|
"""Celery task to index BookStack pages."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_bookstack_pages(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_bookstack_pages(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_bookstack_pages(
|
async def _index_bookstack_pages(
|
||||||
|
|
@ -546,19 +461,11 @@ def index_composio_connector_task(
|
||||||
end_date: str | None,
|
end_date: str | None,
|
||||||
):
|
):
|
||||||
"""Celery task to index Composio connector content (Google Drive, Gmail, Calendar via Composio)."""
|
"""Celery task to index Composio connector content (Google Drive, Gmail, Calendar via Composio)."""
|
||||||
import asyncio
|
return run_async_celery_task(
|
||||||
|
lambda: _index_composio_connector(
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_composio_connector(
|
|
||||||
connector_id, search_space_id, user_id, start_date, end_date
|
connector_id, search_space_id, user_id, start_date, end_date
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_composio_connector(
|
async def _index_composio_connector(
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from app.db import Document
|
||||||
from app.indexing_pipeline.adapters.file_upload_adapter import UploadDocumentAdapter
|
from app.indexing_pipeline.adapters.file_upload_adapter import UploadDocumentAdapter
|
||||||
from app.services.llm_service import get_user_long_context_llm
|
from app.services.llm_service import get_user_long_context_llm
|
||||||
from app.services.task_logging_service import TaskLoggingService
|
from app.services.task_logging_service import TaskLoggingService
|
||||||
from app.tasks.celery_tasks import get_celery_session_maker
|
from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -25,15 +25,7 @@ def reindex_document_task(self, document_id: int, user_id: str):
|
||||||
document_id: ID of document to reindex
|
document_id: ID of document to reindex
|
||||||
user_id: ID of user who edited the document
|
user_id: ID of user who edited the document
|
||||||
"""
|
"""
|
||||||
import asyncio
|
return run_async_celery_task(lambda: _reindex_document(document_id, user_id))
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(_reindex_document(document_id, user_id))
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _reindex_document(document_id: int, user_id: str):
|
async def _reindex_document(document_id: int, user_id: str):
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from app.celery_app import celery_app
|
||||||
from app.config import config
|
from app.config import config
|
||||||
from app.services.notification_service import NotificationService
|
from app.services.notification_service import NotificationService
|
||||||
from app.services.task_logging_service import TaskLoggingService
|
from app.services.task_logging_service import TaskLoggingService
|
||||||
from app.tasks.celery_tasks import get_celery_session_maker
|
from app.tasks.celery_tasks import get_celery_session_maker, run_async_celery_task
|
||||||
from app.tasks.connector_indexers.local_folder_indexer import (
|
from app.tasks.connector_indexers.local_folder_indexer import (
|
||||||
index_local_folder,
|
index_local_folder,
|
||||||
index_uploaded_files,
|
index_uploaded_files,
|
||||||
|
|
@ -105,12 +105,7 @@ async def _run_heartbeat_loop(notification_id: int):
|
||||||
)
|
)
|
||||||
def delete_document_task(self, document_id: int):
|
def delete_document_task(self, document_id: int):
|
||||||
"""Celery task to delete a document and its chunks in batches."""
|
"""Celery task to delete a document and its chunks in batches."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(lambda: _delete_document_background(document_id))
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(_delete_document_background(document_id))
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _delete_document_background(document_id: int) -> None:
|
async def _delete_document_background(document_id: int) -> None:
|
||||||
|
|
@ -153,14 +148,9 @@ def delete_folder_documents_task(
|
||||||
folder_subtree_ids: list[int] | None = None,
|
folder_subtree_ids: list[int] | None = None,
|
||||||
):
|
):
|
||||||
"""Celery task to delete documents first, then the folder rows."""
|
"""Celery task to delete documents first, then the folder rows."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _delete_folder_documents(document_ids, folder_subtree_ids)
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_delete_folder_documents(document_ids, folder_subtree_ids)
|
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _delete_folder_documents(
|
async def _delete_folder_documents(
|
||||||
|
|
@ -209,12 +199,9 @@ async def _delete_folder_documents(
|
||||||
)
|
)
|
||||||
def delete_search_space_task(self, search_space_id: int):
|
def delete_search_space_task(self, search_space_id: int):
|
||||||
"""Celery task to delete a search space and heavy child rows in batches."""
|
"""Celery task to delete a search space and heavy child rows in batches."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _delete_search_space_background(search_space_id)
|
||||||
try:
|
)
|
||||||
loop.run_until_complete(_delete_search_space_background(search_space_id))
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _delete_search_space_background(search_space_id: int) -> None:
|
async def _delete_search_space_background(search_space_id: int) -> None:
|
||||||
|
|
@ -269,18 +256,11 @@ def process_extension_document_task(
|
||||||
search_space_id: ID of the search space
|
search_space_id: ID of the search space
|
||||||
user_id: ID of the user
|
user_id: ID of the user
|
||||||
"""
|
"""
|
||||||
# Create a new event loop for this task
|
return run_async_celery_task(
|
||||||
loop = asyncio.new_event_loop()
|
lambda: _process_extension_document(
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_process_extension_document(
|
|
||||||
individual_document_dict, search_space_id, user_id
|
individual_document_dict, search_space_id, user_id
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _process_extension_document(
|
async def _process_extension_document(
|
||||||
|
|
@ -419,13 +399,9 @@ def process_youtube_video_task(self, url: str, search_space_id: int, user_id: st
|
||||||
search_space_id: ID of the search space
|
search_space_id: ID of the search space
|
||||||
user_id: ID of the user
|
user_id: ID of the user
|
||||||
"""
|
"""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _process_youtube_video(url, search_space_id, user_id)
|
||||||
|
)
|
||||||
try:
|
|
||||||
loop.run_until_complete(_process_youtube_video(url, search_space_id, user_id))
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _process_youtube_video(url: str, search_space_id: int, user_id: str):
|
async def _process_youtube_video(url: str, search_space_id: int, user_id: str):
|
||||||
|
|
@ -573,12 +549,9 @@ def process_file_upload_task(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[process_file_upload] Could not get file size: {e}")
|
logger.warning(f"[process_file_upload] Could not get file size: {e}")
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(
|
run_async_celery_task(
|
||||||
_process_file_upload(file_path, filename, search_space_id, user_id)
|
lambda: _process_file_upload(file_path, filename, search_space_id, user_id)
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"[process_file_upload] Task completed successfully for: {filename}"
|
f"[process_file_upload] Task completed successfully for: {filename}"
|
||||||
|
|
@ -589,8 +562,6 @@ def process_file_upload_task(
|
||||||
f"Traceback:\n{traceback.format_exc()}"
|
f"Traceback:\n{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _process_file_upload(
|
async def _process_file_upload(
|
||||||
|
|
@ -811,25 +782,17 @@ def process_file_upload_with_document_task(
|
||||||
"File may have been removed before syncing could start."
|
"File may have been removed before syncing could start."
|
||||||
)
|
)
|
||||||
# Mark document as failed since file is missing
|
# Mark document as failed since file is missing
|
||||||
loop = asyncio.new_event_loop()
|
run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _mark_document_failed(
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_mark_document_failed(
|
|
||||||
document_id,
|
document_id,
|
||||||
"File not found. Please re-upload the file.",
|
"File not found. Please re-upload the file.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
return
|
return
|
||||||
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
loop.run_until_complete(
|
run_async_celery_task(
|
||||||
_process_file_with_document(
|
lambda: _process_file_with_document(
|
||||||
document_id,
|
document_id,
|
||||||
temp_path,
|
temp_path,
|
||||||
filename,
|
filename,
|
||||||
|
|
@ -849,8 +812,6 @@ def process_file_upload_with_document_task(
|
||||||
f"Traceback:\n{traceback.format_exc()}"
|
f"Traceback:\n{traceback.format_exc()}"
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _mark_document_failed(document_id: int, reason: str):
|
async def _mark_document_failed(document_id: int, reason: str):
|
||||||
|
|
@ -1119,12 +1080,8 @@ def process_circleback_meeting_task(
|
||||||
search_space_id: ID of the search space
|
search_space_id: ID of the search space
|
||||||
connector_id: ID of the Circleback connector (for deletion support)
|
connector_id: ID of the Circleback connector (for deletion support)
|
||||||
"""
|
"""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _process_circleback_meeting(
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_process_circleback_meeting(
|
|
||||||
meeting_id,
|
meeting_id,
|
||||||
meeting_name,
|
meeting_name,
|
||||||
markdown_content,
|
markdown_content,
|
||||||
|
|
@ -1133,8 +1090,6 @@ def process_circleback_meeting_task(
|
||||||
connector_id,
|
connector_id,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _process_circleback_meeting(
|
async def _process_circleback_meeting(
|
||||||
|
|
@ -1291,12 +1246,8 @@ def index_local_folder_task(
|
||||||
target_file_paths: list[str] | None = None,
|
target_file_paths: list[str] | None = None,
|
||||||
):
|
):
|
||||||
"""Celery task to index a local folder. Config is passed directly — no connector row."""
|
"""Celery task to index a local folder. Config is passed directly — no connector row."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _index_local_folder_async(
|
||||||
|
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_local_folder_async(
|
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
folder_path=folder_path,
|
folder_path=folder_path,
|
||||||
|
|
@ -1308,8 +1259,6 @@ def index_local_folder_task(
|
||||||
target_file_paths=target_file_paths,
|
target_file_paths=target_file_paths,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_local_folder_async(
|
async def _index_local_folder_async(
|
||||||
|
|
@ -1441,11 +1390,8 @@ def index_uploaded_folder_files_task(
|
||||||
processing_mode: str = "basic",
|
processing_mode: str = "basic",
|
||||||
):
|
):
|
||||||
"""Celery task to index files uploaded from the desktop app."""
|
"""Celery task to index files uploaded from the desktop app."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _index_uploaded_folder_files_async(
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_index_uploaded_folder_files_async(
|
|
||||||
search_space_id=search_space_id,
|
search_space_id=search_space_id,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
folder_name=folder_name,
|
folder_name=folder_name,
|
||||||
|
|
@ -1456,8 +1402,6 @@ def index_uploaded_folder_files_task(
|
||||||
processing_mode=processing_mode,
|
processing_mode=processing_mode,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _index_uploaded_folder_files_async(
|
async def _index_uploaded_folder_files_async(
|
||||||
|
|
@ -1584,12 +1528,9 @@ def _ai_sort_lock_key(search_space_id: int) -> str:
|
||||||
@celery_app.task(name="ai_sort_search_space", bind=True, max_retries=1)
|
@celery_app.task(name="ai_sort_search_space", bind=True, max_retries=1)
|
||||||
def ai_sort_search_space_task(self, search_space_id: int, user_id: str):
|
def ai_sort_search_space_task(self, search_space_id: int, user_id: str):
|
||||||
"""Full AI sort for all documents in a search space."""
|
"""Full AI sort for all documents in a search space."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _ai_sort_search_space_async(search_space_id, user_id)
|
||||||
try:
|
)
|
||||||
loop.run_until_complete(_ai_sort_search_space_async(search_space_id, user_id))
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _ai_sort_search_space_async(search_space_id: int, user_id: str):
|
async def _ai_sort_search_space_async(search_space_id: int, user_id: str):
|
||||||
|
|
@ -1639,14 +1580,9 @@ async def _ai_sort_search_space_async(search_space_id: int, user_id: str):
|
||||||
)
|
)
|
||||||
def ai_sort_document_task(self, search_space_id: int, user_id: str, document_id: int):
|
def ai_sort_document_task(self, search_space_id: int, user_id: str, document_id: int):
|
||||||
"""Incremental AI sort for a single document after indexing."""
|
"""Incremental AI sort for a single document after indexing."""
|
||||||
loop = asyncio.new_event_loop()
|
return run_async_celery_task(
|
||||||
asyncio.set_event_loop(loop)
|
lambda: _ai_sort_document_async(search_space_id, user_id, document_id)
|
||||||
try:
|
|
||||||
loop.run_until_complete(
|
|
||||||
_ai_sort_document_async(search_space_id, user_id, document_id)
|
|
||||||
)
|
)
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def _ai_sort_document_async(search_space_id: int, user_id: str, document_id: int):
|
async def _ai_sort_document_async(search_space_id: int, user_id: str, document_id: int):
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue