From c3f242a6b23a0157f0ec5a26fd8005107b1d9fbd Mon Sep 17 00:00:00 2001 From: "DESKTOP-RTLN3BA\\$punk" Date: Thu, 25 Jun 2026 20:58:50 -0700 Subject: [PATCH] feat: fix desktop auth issues - Added logging for JSON parsing failures in structured output to aid in debugging. - Updated the TranscriptTurn model to ignore unknown keys, improving flexibility for future model variations. --- .../app/podcasts/generation/structured.py | 20 ++++++++++++++++++- .../app/podcasts/schemas/transcript.py | 10 ++++++++-- surfsense_desktop/scripts/build-electron.mjs | 3 +++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/surfsense_backend/app/podcasts/generation/structured.py b/surfsense_backend/app/podcasts/generation/structured.py index 08132e776..61096f43e 100644 --- a/surfsense_backend/app/podcasts/generation/structured.py +++ b/surfsense_backend/app/podcasts/generation/structured.py @@ -7,6 +7,7 @@ parse here keeps every generation node validating replies the same way. from __future__ import annotations +import logging from typing import TYPE_CHECKING, TypeVar from pydantic import BaseModel, ValidationError @@ -16,8 +17,14 @@ from app.utils.content_utils import extract_text_content, strip_markdown_fences if TYPE_CHECKING: from langchain_core.messages import BaseMessage +logger = logging.getLogger(__name__) + T = TypeVar("T", bound=BaseModel) +# How much of the raw reply to include in logs when a parse fails, so the actual +# malformation is diagnosable without dumping an entire episode's worth of text. +_LOG_SNIPPET_CHARS = 2000 + class StructuredOutputError(RuntimeError): """The model reply could not be parsed into the expected shape.""" @@ -41,10 +48,21 @@ async def invoke_json[T: BaseModel]( try: return model.model_validate_json(content[start:end]) except (ValidationError, ValueError) as exc: + logger.error( + "Failed to parse %s from model reply: %s\nRaw reply: %s", + model.__name__, + exc, + content[:_LOG_SNIPPET_CHARS], + ) raise StructuredOutputError( - f"could not parse {model.__name__} from model reply" + f"could not parse {model.__name__} from model reply: {exc}" ) from exc + logger.error( + "No JSON object found for %s in model reply.\nRaw reply: %s", + model.__name__, + content[:_LOG_SNIPPET_CHARS], + ) raise StructuredOutputError( f"no JSON object found for {model.__name__} in model reply" ) diff --git a/surfsense_backend/app/podcasts/schemas/transcript.py b/surfsense_backend/app/podcasts/schemas/transcript.py index b4c1463d8..94c5c5e16 100644 --- a/surfsense_backend/app/podcasts/schemas/transcript.py +++ b/surfsense_backend/app/podcasts/schemas/transcript.py @@ -12,9 +12,15 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator class TranscriptTurn(BaseModel): - """A single spoken line by one speaker.""" + """A single spoken line by one speaker. - model_config = ConfigDict(extra="forbid") + Drafting models (especially GPT-5-family) often decorate each turn with + extra keys like ``speaker_name``, ``emotion`` or ``tone``. The renderer only + needs ``speaker`` + ``text``, so unknown keys are ignored rather than + rejected — otherwise one stray field would fail the whole segment parse. + """ + + model_config = ConfigDict(extra="ignore") speaker: int = Field(..., ge=0, description="The PodcastSpec speaker slot speaking") text: str = Field(..., min_length=1) diff --git a/surfsense_desktop/scripts/build-electron.mjs b/surfsense_desktop/scripts/build-electron.mjs index cc2083fe4..3785ccda4 100644 --- a/surfsense_desktop/scripts/build-electron.mjs +++ b/surfsense_desktop/scripts/build-electron.mjs @@ -114,6 +114,9 @@ async function buildElectron() { 'process.env.HOSTED_FRONTEND_URL': JSON.stringify( process.env.HOSTED_FRONTEND_URL || desktopEnv.HOSTED_FRONTEND_URL || 'https://surfsense.com' ), + 'process.env.GOOGLE_DESKTOP_CLIENT_ID': JSON.stringify( + process.env.GOOGLE_DESKTOP_CLIENT_ID || desktopEnv.GOOGLE_DESKTOP_CLIENT_ID || '' + ), 'process.env.POSTHOG_KEY': JSON.stringify( process.env.POSTHOG_KEY || desktopEnv.POSTHOG_KEY || '' ),