From ecb331a53c3e0acf2bf1a4d1ba3a9d5fc18f1455 Mon Sep 17 00:00:00 2001 From: developer603 Date: Mon, 1 Jun 2026 16:54:41 +0530 Subject: [PATCH] fix: detect masked short secrets in contains_masked_key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit contains_masked_key() guards against persisting a still-masked secret by checking for the MASK_MARKER ("***") substring. But mask_key() only emits 3+ consecutive asterisks for keys longer than VISIBLE_CHARS + 2. For short secrets it emits fewer: e.g. mask_key("EMPTY") == "*MPTY" (a single asterisk). Such masked values slip past the guard, so a dashboard "save without editing" round-trips the masked display string back and overwrites the real stored value with e.g. "*MPTY". This bites self-hosted/OpenAI-compatible providers that use a short no-validate sentinel api_key such as "EMPTY". Match the full shape mask_key() produces — a run of MASK_CHAR followed by at most VISIBLE_CHARS revealed characters — in addition to the legacy marker. Verified: masked short secrets ("*MPTY", "*", "*ykey") are now detected while real unmasked values ("EMPTY", "sk-live-abcd", ...) are not, so there are no false positives. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/services/configuration/masking.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/api/services/configuration/masking.py b/api/services/configuration/masking.py index 877cad97..ddcd6ea8 100644 --- a/api/services/configuration/masking.py +++ b/api/services/configuration/masking.py @@ -10,6 +10,7 @@ The rules are simple: """ import copy +import re from typing import Any, Dict, Optional from api.schemas.user_configuration import UserConfiguration @@ -22,13 +23,28 @@ MASK_MARKER = "***" # substring that indicates a masked key SERVICE_SECRET_FIELDS = ("api_key", "credentials", "aws_access_key", "aws_secret_key") MODEL_OVERRIDE_FIELDS = ("llm", "tts", "stt", "realtime") +# A mask_key() result is a run of one-or-more MASK_CHAR followed by at most +# VISIBLE_CHARS revealed trailing characters. The legacy ``MASK_MARKER in k`` +# check only catches keys long enough to produce 3+ leading asterisks, so masked +# *short* secrets slip through: e.g. mask_key("EMPTY") == "*MPTY" (a single +# asterisk), which is not detected and can then be persisted verbatim, corrupting +# the stored value. Match the full mask_key() shape so any masked key is caught. +_MASK_SHAPE_RE = re.compile( + rf"^{re.escape(MASK_CHAR)}+[^{re.escape(MASK_CHAR)}]{{0,{VISIBLE_CHARS}}}$" +) + def contains_masked_key(value: str | list[str] | None) -> bool: - """Return True if *value* looks like a masked placeholder.""" + """Return True if *value* looks like a masked placeholder. + + Triggers on either the legacy 3+ MASK_CHAR marker (long keys) or the full + mask_key() shape — a run of MASK_CHAR followed by <= VISIBLE_CHARS revealed + characters — so masked *short* secrets such as ``"*MPTY"`` are also caught. + """ if value is None: return False keys = value if isinstance(value, list) else [value] - return any(MASK_MARKER in k for k in keys) + return any(MASK_MARKER in k or bool(_MASK_SHAPE_RE.match(k)) for k in keys) def check_for_masked_keys(config: "UserConfiguration") -> None: