mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-16 08:25:18 +02:00
84 lines
3.5 KiB
Python
84 lines
3.5 KiB
Python
"""Topic: avoid instruction collision — conflicting guidance in one prompt."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from api.services.voice_prompting_guide._base import (
|
|
AuditCheck,
|
|
Stage,
|
|
StageLens,
|
|
VoicePromptingTopic,
|
|
)
|
|
|
|
TOPIC = VoicePromptingTopic(
|
|
id="instruction_collision",
|
|
title="Avoid instruction collision — contradictory guidance in one prompt",
|
|
severity="high",
|
|
# No applies_to_node_types: collision is cross-cutting. The classic case
|
|
# is global-vs-node, but any single prompt can contradict itself.
|
|
stages={
|
|
Stage.create: StageLens(
|
|
relevant=True,
|
|
lens=(
|
|
"As you write, keep instructions and their examples consistent. If "
|
|
"you say 'disclose your name and reason for calling', make the "
|
|
"example do exactly that — not check availability instead."
|
|
),
|
|
),
|
|
Stage.review: StageLens(
|
|
relevant=True,
|
|
lens=(
|
|
"Read the prompt end-to-end (and global vs. node together) for "
|
|
"sentences that contradict each other even slightly. This is the "
|
|
"primary review-stage check; it breaks more agents than people "
|
|
"expect."
|
|
),
|
|
),
|
|
},
|
|
content="""\
|
|
Instruction collision happens when two parts of a prompt give conflicting or
|
|
partially conflicting guidance. The model has to resolve the conflict in real
|
|
time, on every turn, and picks whichever side it leans toward that turn — so
|
|
the behavior is inconsistent and hard to debug. It's more common than people
|
|
assume.
|
|
|
|
Two classic shapes:
|
|
- Instruction vs. example: the prompt says "Start the call with a greeting and
|
|
disclose your name and reason for calling," but the example is "Hi {{name}},
|
|
I'm Sarah from {{company}} — is this a good time to talk?" The instruction
|
|
says disclose the reason; the example checks availability. The agent now has
|
|
two competing patterns.
|
|
- Style self-conflict: the response-style section says "Be conversational and
|
|
empathize deeply" and later "Keep responses under 10 words." You can't
|
|
empathize deeply in under ten words. Pick one.
|
|
|
|
Collisions also occur between the global prompt and a node prompt — a global
|
|
"always confirm every detail" against a node "keep this quick, don't read
|
|
things back" pull in opposite directions.
|
|
|
|
How to catch it: read the prompt end to end before shipping, and read the
|
|
global and node prompts together. Look for sentences that contradict each other
|
|
even slightly — voice models are especially sensitive because the prompt loads
|
|
on every turn.
|
|
|
|
Note for reviewers: this is an intent-level judgment, not a text pattern. Don't
|
|
try to detect collisions with a regex; compare what the instructions and their
|
|
examples actually ask the agent to do.
|
|
""",
|
|
audit_checks=(
|
|
AuditCheck(
|
|
id="no_contradictions",
|
|
judge_question=(
|
|
"Reading this prompt (and, where relevant, the global prompt "
|
|
"alongside it) end-to-end, are its instructions and examples "
|
|
"mutually consistent — with no two directions that partially or "
|
|
"fully contradict each other?"
|
|
),
|
|
expected="yes",
|
|
quote=(
|
|
"Instructions or examples conflict — reconcile them so the agent "
|
|
"isn't resolving a contradiction every turn."
|
|
),
|
|
),
|
|
),
|
|
cross_refs=("response_style", "persona_and_identity_lock"),
|
|
)
|