fix: detect masked short secrets in contains_masked_key

contains_masked_key() guards against persisting a still-masked secret by
checking for the MASK_MARKER ("***") substring. But mask_key() only emits
3+ consecutive asterisks for keys longer than VISIBLE_CHARS + 2. For short
secrets it emits fewer: e.g. mask_key("EMPTY") == "*MPTY" (a single
asterisk). Such masked values slip past the guard, so a dashboard
"save without editing" round-trips the masked display string back and
overwrites the real stored value with e.g. "*MPTY".

This bites self-hosted/OpenAI-compatible providers that use a short
no-validate sentinel api_key such as "EMPTY".

Match the full shape mask_key() produces — a run of MASK_CHAR followed by
at most VISIBLE_CHARS revealed characters — in addition to the legacy
marker. Verified: masked short secrets ("*MPTY", "*", "*ykey") are now
detected while real unmasked values ("EMPTY", "sk-live-abcd", ...) are
not, so there are no false positives.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
developer603 2026-06-01 16:54:41 +05:30
parent 98d2b24cba
commit ecb331a53c

View file

@ -10,6 +10,7 @@ The rules are simple:
"""
import copy
import re
from typing import Any, Dict, Optional
from api.schemas.user_configuration import UserConfiguration
@ -22,13 +23,28 @@ MASK_MARKER = "***" # substring that indicates a masked key
SERVICE_SECRET_FIELDS = ("api_key", "credentials", "aws_access_key", "aws_secret_key")
MODEL_OVERRIDE_FIELDS = ("llm", "tts", "stt", "realtime")
# A mask_key() result is a run of one-or-more MASK_CHAR followed by at most
# VISIBLE_CHARS revealed trailing characters. The legacy ``MASK_MARKER in k``
# check only catches keys long enough to produce 3+ leading asterisks, so masked
# *short* secrets slip through: e.g. mask_key("EMPTY") == "*MPTY" (a single
# asterisk), which is not detected and can then be persisted verbatim, corrupting
# the stored value. Match the full mask_key() shape so any masked key is caught.
_MASK_SHAPE_RE = re.compile(
rf"^{re.escape(MASK_CHAR)}+[^{re.escape(MASK_CHAR)}]{{0,{VISIBLE_CHARS}}}$"
)
def contains_masked_key(value: str | list[str] | None) -> bool:
"""Return True if *value* looks like a masked placeholder."""
"""Return True if *value* looks like a masked placeholder.
Triggers on either the legacy 3+ MASK_CHAR marker (long keys) or the full
mask_key() shape a run of MASK_CHAR followed by <= VISIBLE_CHARS revealed
characters so masked *short* secrets such as ``"*MPTY"`` are also caught.
"""
if value is None:
return False
keys = value if isinstance(value, list) else [value]
return any(MASK_MARKER in k for k in keys)
return any(MASK_MARKER in k or bool(_MASK_SHAPE_RE.match(k)) for k in keys)
def check_for_masked_keys(config: "UserConfiguration") -> None: