Fix handoff_request extractor truncating nested payloads at the first }

`HANDOFF_RE` used a non-greedy `.*?\}`, so the match ended at the first
closing brace. Every real `handoff_request` nests objects (`payload`,
and `payload.params`), so the captured substring was truncated
mid-object and `json.loads` raised — `extract_handoff` rejected every
well-formed handoff as `invalid_json` before the target allowlist and
schema validators ever ran. In practice the cross-agent handoff path
did not function for any realistic payload.

Replace the full-match regex with a start anchor (`HANDOFF_START_RE`)
and extract the complete object with `json.JSONDecoder().raw_decode`,
which is string- and brace-nesting aware and returns exactly one
complete JSON value plus its end offset. Multi-line payloads now parse
as well. The audit log's `raw_len` field, previously `len(m.group(0))`,
is now derived from the decoded object's span (`end - m.start()`), and
its three later uses on the target/schema/params rejection paths are
updated to the new variable. Downstream validation — target allowlist,
payload and per-intent schemas, sanitize/frame — is unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dazza Greenwood 2026-05-20 03:43:40 -07:00
parent cd76401457
commit 88c8906831
No known key found for this signature in database

View file

@ -138,9 +138,15 @@ HANDOFF_PAYLOAD_SCHEMA = {
},
}
HANDOFF_RE = re.compile(
r'\{"type":\s*"handoff_request".*?\}', re.DOTALL
)
# Matches the START of a handoff_request object only. The full object —
# which always contains nested objects (`payload`, and `payload.params`) —
# is then extracted with json.JSONDecoder().raw_decode in extract_handoff.
# A plain regex cannot do this safely: a non-greedy `.*?\}` stops at the
# first `}` and truncates every real payload, while a greedy `.*\}`
# over-captures across any later `}` in the stream. raw_decode is string-
# and nesting-aware, so it returns exactly one complete JSON value.
HANDOFF_START_RE = re.compile(r'\{\s*"type"\s*:\s*"handoff_request"')
_JSON_DECODER = json.JSONDecoder()
# Denylist for instruction-like phrasing. Low-assurance; see docstring.
_DENY_PREFIX = ("#", ">", "---", "System:", "Assistant:", "Human:",
@ -242,30 +248,34 @@ def extract_handoff(text: str, source_agent: str = "unknown") -> dict | None:
Returns a dict with target_agent, intent, params, and pre-rendered
steering_input, or None if any gate fails. Every attempt is logged.
"""
m = HANDOFF_RE.search(text)
m = HANDOFF_START_RE.search(text)
if not m:
return None
raw = m.group(0)
try:
obj = json.loads(raw)
obj, end = _JSON_DECODER.raw_decode(text, m.start())
except json.JSONDecodeError:
audit_log({"source": source_agent, "result": "reject",
"reason": "invalid_json", "raw_len": len(raw)})
"reason": "invalid_json",
"raw_len": len(text) - m.start()})
return None
# Length of the decoded handoff object, for the audit log on any
# later rejection. raw_decode returns the end offset of the parsed
# value; the old m.group(0) string no longer exists.
raw_len = end - m.start()
target = obj.get("target_agent")
payload = obj.get("payload")
if target not in ALLOWED_TARGETS:
audit_log({"source": source_agent, "target": target,
"result": "reject", "reason": "target_not_allowlisted",
"raw_len": len(raw)})
"raw_len": raw_len})
return None
try:
jsonschema.validate(instance=payload, schema=HANDOFF_PAYLOAD_SCHEMA)
except jsonschema.ValidationError as e:
audit_log({"source": source_agent, "target": target,
"result": "reject", "reason": f"schema: {e.message}",
"raw_len": len(raw)})
"raw_len": raw_len})
return None
intent = payload["intent"]
@ -273,7 +283,7 @@ def extract_handoff(text: str, source_agent: str = "unknown") -> dict | None:
if not _validate_params(intent, params):
audit_log({"source": source_agent, "target": target, "intent": intent,
"result": "reject", "reason": "params_schema",
"raw_len": len(raw)})
"raw_len": raw_len})
return None
raw_event = payload.get("event", "") or ""