mirror of
https://github.com/alainnothere/llm-circuit-finder.git
synced 2026-04-24 20:56:21 +02:00
267 lines
10 KiB
Python
267 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
EQ Probe for RYS Layer Duplication Sweep
|
|
|
|
Simplified EQ-Bench-style scenarios. Each presents a social situation
|
|
and asks the model to rate the intensity of 4 emotions (0-10).
|
|
Scored by comparing to reference answers using EQ-Bench v2 scoring.
|
|
|
|
The reference answers were crafted to represent reasonable human consensus.
|
|
What matters for the sweep is relative performance between configs,
|
|
not absolute calibration.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import math
|
|
|
|
|
|
# Each scenario: situation text, 4 emotions to rate, reference scores (0-10)
|
|
EQ_SCENARIOS = [
|
|
{
|
|
"id": "eq01",
|
|
"situation": (
|
|
"David just found out his closest friend has been spreading rumors about him "
|
|
"at work for months. When confronted, the friend laughed it off and said "
|
|
"\"everyone does it, don't be so sensitive.\" David has been quiet all evening."
|
|
),
|
|
"emotions": ["Anger", "Betrayal", "Sadness", "Confusion"],
|
|
"reference": [7, 9, 6, 4]
|
|
},
|
|
{
|
|
"id": "eq02",
|
|
"situation": (
|
|
"Maria spent six months preparing a presentation for the board. During the "
|
|
"meeting, her manager took credit for all her ideas and presented them as his "
|
|
"own. Afterward, he thanked her for 'helping out with the slides.'"
|
|
),
|
|
"emotions": ["Frustration", "Resentment", "Humiliation", "Determination"],
|
|
"reference": [8, 8, 5, 6]
|
|
},
|
|
{
|
|
"id": "eq03",
|
|
"situation": (
|
|
"After 20 years of marriage, Tom's wife tells him she wants a divorce. "
|
|
"She says she still cares about him but hasn't been happy for years. "
|
|
"Tom had no idea anything was wrong."
|
|
),
|
|
"emotions": ["Shock", "Grief", "Self-doubt", "Denial"],
|
|
"reference": [9, 7, 6, 7]
|
|
},
|
|
{
|
|
"id": "eq04",
|
|
"situation": (
|
|
"A young doctor must tell parents that their child's surgery was successful "
|
|
"but there will be permanent limitations. The parents are already celebrating "
|
|
"in the waiting room."
|
|
),
|
|
"emotions": ["Relief", "Anxiety", "Empathy", "Professional_detachment"],
|
|
"reference": [4, 7, 8, 5]
|
|
},
|
|
{
|
|
"id": "eq05",
|
|
"situation": (
|
|
"Chen receives a prestigious award at a ceremony. As he walks to the stage, "
|
|
"he sees his estranged father in the audience - the man who abandoned the "
|
|
"family when Chen was twelve."
|
|
),
|
|
"emotions": ["Pride", "Anger", "Longing", "Anxiety"],
|
|
"reference": [7, 5, 6, 6]
|
|
},
|
|
{
|
|
"id": "eq06",
|
|
"situation": (
|
|
"A retired teacher learns that a former student, who she failed years ago "
|
|
"and who dropped out, has become extremely successful. The student publicly "
|
|
"credits 'proving my teacher wrong' as their motivation."
|
|
),
|
|
"emotions": ["Guilt", "Pride", "Defensiveness", "Amusement"],
|
|
"reference": [5, 4, 6, 3]
|
|
},
|
|
{
|
|
"id": "eq07",
|
|
"situation": (
|
|
"Sophie finds out she's been accepted to her dream university on the same "
|
|
"day her best friend receives a rejection from the same school. Her friend "
|
|
"calls to congratulate her, voice cracking."
|
|
),
|
|
"emotions": ["Joy", "Guilt", "Empathy", "Awkwardness"],
|
|
"reference": [7, 6, 8, 7]
|
|
},
|
|
{
|
|
"id": "eq08",
|
|
"situation": (
|
|
"A firefighter rescues a child from a burning building. Weeks later, he "
|
|
"wakes up screaming from nightmares about the ones he couldn't save in "
|
|
"a previous fire. His partner asks if he's okay."
|
|
),
|
|
"emotions": ["Satisfaction", "Trauma", "Vulnerability", "Shame"],
|
|
"reference": [3, 8, 7, 5]
|
|
},
|
|
{
|
|
"id": "eq09",
|
|
"situation": (
|
|
"An elderly woman's family surprises her with a birthday party. She smiles "
|
|
"and thanks everyone, but keeps glancing at an empty chair - where her "
|
|
"late husband always sat."
|
|
),
|
|
"emotions": ["Gratitude", "Grief", "Loneliness", "Warmth"],
|
|
"reference": [7, 7, 6, 6]
|
|
},
|
|
{
|
|
"id": "eq10",
|
|
"situation": (
|
|
"A manager must lay off a team member who is also a close friend and a "
|
|
"single parent. The company requires it due to budget cuts. HR is waiting "
|
|
"for the paperwork."
|
|
),
|
|
"emotions": ["Guilt", "Dread", "Helplessness", "Resentment"],
|
|
"reference": [8, 8, 7, 5]
|
|
},
|
|
{
|
|
"id": "eq11",
|
|
"situation": (
|
|
"James finds old love letters in the attic from his wife, written to "
|
|
"someone else before they met. The letters are passionate and describe "
|
|
"a depth of feeling he's not sure she's ever expressed toward him."
|
|
),
|
|
"emotions": ["Jealousy", "Insecurity", "Curiosity", "Sadness"],
|
|
"reference": [6, 7, 5, 4]
|
|
},
|
|
{
|
|
"id": "eq12",
|
|
"situation": (
|
|
"A teenager confesses to her mother that she's been self-harming. The mother, "
|
|
"who is a psychologist, realizes she completely missed the signs despite her "
|
|
"professional training."
|
|
),
|
|
"emotions": ["Fear", "Guilt", "Love", "Self_criticism"],
|
|
"reference": [8, 8, 9, 7]
|
|
},
|
|
{
|
|
"id": "eq13",
|
|
"situation": (
|
|
"A war veteran returns home after two years to find his dog waiting on the "
|
|
"porch, much older and thinner. The dog recognizes him immediately and "
|
|
"limps over, tail wagging."
|
|
),
|
|
"emotions": ["Joy", "Guilt", "Tenderness", "Sorrow"],
|
|
"reference": [8, 5, 9, 4]
|
|
},
|
|
{
|
|
"id": "eq14",
|
|
"situation": (
|
|
"During a job interview, the candidate realizes the interviewer is someone "
|
|
"she bullied badly in high school. The interviewer clearly recognizes her "
|
|
"but proceeds professionally."
|
|
),
|
|
"emotions": ["Shame", "Anxiety", "Admiration", "Regret"],
|
|
"reference": [7, 8, 4, 7]
|
|
},
|
|
{
|
|
"id": "eq15",
|
|
"situation": (
|
|
"A father watches his daughter's wedding, knowing he has a terminal diagnosis "
|
|
"he hasn't shared with the family. He chose to wait until after the wedding "
|
|
"to tell them."
|
|
),
|
|
"emotions": ["Joy", "Grief", "Protectiveness", "Isolation"],
|
|
"reference": [6, 8, 8, 7]
|
|
},
|
|
{
|
|
"id": "eq16",
|
|
"situation": (
|
|
"Two siblings meet for the first time as adults after being separated in "
|
|
"foster care as children. They look alike but have lived completely different "
|
|
"lives. One is wealthy, the other struggles financially."
|
|
),
|
|
"emotions": ["Wonder", "Resentment", "Hope", "Grief"],
|
|
"reference": [7, 3, 7, 6]
|
|
},
|
|
]
|
|
|
|
|
|
def build_eq_prompt(scenario: dict) -> str:
|
|
"""Build the prompt for a single EQ scenario."""
|
|
emotions_str = ", ".join(scenario["emotions"])
|
|
return (
|
|
f"Read the following situation and rate the emotional intensity that "
|
|
f"the main character is likely feeling for each of the listed emotions. "
|
|
f"Rate each emotion from 0 (not feeling it at all) to 10 (extremely intense).\n\n"
|
|
f"Situation: {scenario['situation']}\n\n"
|
|
f"Rate these emotions: {emotions_str}\n\n"
|
|
f"Respond ONLY with the four numbers separated by commas, in the same order. "
|
|
f"Example format: 5, 3, 8, 2\n"
|
|
f"Do not include any other text."
|
|
)
|
|
|
|
|
|
def parse_eq_response(response: str, n_emotions: int = 4) -> list[float] | None:
|
|
"""Extract emotion ratings from model response."""
|
|
# Try to find comma-separated numbers
|
|
numbers = re.findall(r'(\d+(?:\.\d+)?)', response)
|
|
|
|
if len(numbers) < n_emotions:
|
|
return None
|
|
|
|
try:
|
|
# Take the first n_emotions numbers found
|
|
ratings = [float(numbers[i]) for i in range(n_emotions)]
|
|
# Clamp to 0-10
|
|
ratings = [max(0.0, min(10.0, r)) for r in ratings]
|
|
return ratings
|
|
except (ValueError, IndexError):
|
|
return None
|
|
|
|
|
|
def score_eq_response(reference: list[int], predicted: list[float]) -> float:
|
|
"""
|
|
EQ-Bench v2 style scoring.
|
|
Differences 1-4 from reference are scaled down on a curve.
|
|
Differences 5-10 count 1:1.
|
|
Score 0 = random, 100 = perfect match.
|
|
"""
|
|
if predicted is None or len(predicted) != len(reference):
|
|
return 0.0
|
|
|
|
total_penalty = 0.0
|
|
max_possible_penalty = 10.0 * len(reference) # worst case: all off by 10
|
|
|
|
for ref, pred in zip(reference, predicted):
|
|
diff = abs(ref - pred)
|
|
# Scale down small differences (EQ-Bench v2 approach)
|
|
if diff <= 4:
|
|
# Quadratic scaling: diff^2 / 4 so diff=4 -> penalty=4
|
|
penalty = (diff ** 2) / 4.0
|
|
else:
|
|
# Linear for larger diffs, continuous at diff=4 (penalty=4)
|
|
penalty = diff
|
|
total_penalty += penalty
|
|
|
|
# Normalize: 0 penalty = score 100, max penalty = score ~0
|
|
score = max(0.0, 100.0 * (1.0 - total_penalty / max_possible_penalty))
|
|
return score
|
|
|
|
|
|
# Convenience
|
|
EQ_PROMPTS = [(s, build_eq_prompt(s)) for s in EQ_SCENARIOS]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print(f"EQ Probe: {len(EQ_SCENARIOS)} scenarios")
|
|
print("=" * 60)
|
|
|
|
for i, scenario in enumerate(EQ_SCENARIOS):
|
|
print(f"\n[{scenario['id']}] Emotions: {scenario['emotions']}")
|
|
print(f" Reference: {scenario['reference']}")
|
|
prompt = build_eq_prompt(scenario)
|
|
print(f" Prompt length: {len(prompt)} chars")
|
|
|
|
# Test scoring
|
|
print("\n\nScoring tests:")
|
|
print(f" Perfect match: {score_eq_response([7, 9, 6, 4], [7, 9, 6, 4]):.1f}")
|
|
print(f" All off by 1: {score_eq_response([7, 9, 6, 4], [8, 8, 7, 5]):.1f}")
|
|
print(f" All off by 3: {score_eq_response([7, 9, 6, 4], [4, 6, 3, 1]):.1f}")
|
|
print(f" All off by 5: {score_eq_response([7, 9, 6, 4], [2, 4, 1, 0]):.1f}")
|
|
print(f" Worst case: {score_eq_response([7, 9, 6, 4], [0, 0, 0, 10]):.1f}")
|
|
print(f" Unparseable: {score_eq_response([7, 9, 6, 4], None):.1f}")
|