mirror of
https://github.com/alainnothere/llm-circuit-finder.git
synced 2026-05-27 14:25:13 +02:00
Add files via upload
This commit is contained in:
parent
9ddf9fafd9
commit
8b68afbd3e
9 changed files with 2148 additions and 0 deletions
267
eq_probe.py
Normal file
267
eq_probe.py
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
EQ Probe for RYS Layer Duplication Sweep
|
||||
|
||||
Simplified EQ-Bench-style scenarios. Each presents a social situation
|
||||
and asks the model to rate the intensity of 4 emotions (0-10).
|
||||
Scored by comparing to reference answers using EQ-Bench v2 scoring.
|
||||
|
||||
The reference answers were crafted to represent reasonable human consensus.
|
||||
What matters for the sweep is relative performance between configs,
|
||||
not absolute calibration.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import math
|
||||
|
||||
|
||||
# Each scenario: situation text, 4 emotions to rate, reference scores (0-10)
|
||||
EQ_SCENARIOS = [
|
||||
{
|
||||
"id": "eq01",
|
||||
"situation": (
|
||||
"David just found out his closest friend has been spreading rumors about him "
|
||||
"at work for months. When confronted, the friend laughed it off and said "
|
||||
"\"everyone does it, don't be so sensitive.\" David has been quiet all evening."
|
||||
),
|
||||
"emotions": ["Anger", "Betrayal", "Sadness", "Confusion"],
|
||||
"reference": [7, 9, 6, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq02",
|
||||
"situation": (
|
||||
"Maria spent six months preparing a presentation for the board. During the "
|
||||
"meeting, her manager took credit for all her ideas and presented them as his "
|
||||
"own. Afterward, he thanked her for 'helping out with the slides.'"
|
||||
),
|
||||
"emotions": ["Frustration", "Resentment", "Humiliation", "Determination"],
|
||||
"reference": [8, 8, 5, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq03",
|
||||
"situation": (
|
||||
"After 20 years of marriage, Tom's wife tells him she wants a divorce. "
|
||||
"She says she still cares about him but hasn't been happy for years. "
|
||||
"Tom had no idea anything was wrong."
|
||||
),
|
||||
"emotions": ["Shock", "Grief", "Self-doubt", "Denial"],
|
||||
"reference": [9, 7, 6, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq04",
|
||||
"situation": (
|
||||
"A young doctor must tell parents that their child's surgery was successful "
|
||||
"but there will be permanent limitations. The parents are already celebrating "
|
||||
"in the waiting room."
|
||||
),
|
||||
"emotions": ["Relief", "Anxiety", "Empathy", "Professional_detachment"],
|
||||
"reference": [4, 7, 8, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq05",
|
||||
"situation": (
|
||||
"Chen receives a prestigious award at a ceremony. As he walks to the stage, "
|
||||
"he sees his estranged father in the audience - the man who abandoned the "
|
||||
"family when Chen was twelve."
|
||||
),
|
||||
"emotions": ["Pride", "Anger", "Longing", "Anxiety"],
|
||||
"reference": [7, 5, 6, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq06",
|
||||
"situation": (
|
||||
"A retired teacher learns that a former student, who she failed years ago "
|
||||
"and who dropped out, has become extremely successful. The student publicly "
|
||||
"credits 'proving my teacher wrong' as their motivation."
|
||||
),
|
||||
"emotions": ["Guilt", "Pride", "Defensiveness", "Amusement"],
|
||||
"reference": [5, 4, 6, 3]
|
||||
},
|
||||
{
|
||||
"id": "eq07",
|
||||
"situation": (
|
||||
"Sophie finds out she's been accepted to her dream university on the same "
|
||||
"day her best friend receives a rejection from the same school. Her friend "
|
||||
"calls to congratulate her, voice cracking."
|
||||
),
|
||||
"emotions": ["Joy", "Guilt", "Empathy", "Awkwardness"],
|
||||
"reference": [7, 6, 8, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq08",
|
||||
"situation": (
|
||||
"A firefighter rescues a child from a burning building. Weeks later, he "
|
||||
"wakes up screaming from nightmares about the ones he couldn't save in "
|
||||
"a previous fire. His partner asks if he's okay."
|
||||
),
|
||||
"emotions": ["Satisfaction", "Trauma", "Vulnerability", "Shame"],
|
||||
"reference": [3, 8, 7, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq09",
|
||||
"situation": (
|
||||
"An elderly woman's family surprises her with a birthday party. She smiles "
|
||||
"and thanks everyone, but keeps glancing at an empty chair - where her "
|
||||
"late husband always sat."
|
||||
),
|
||||
"emotions": ["Gratitude", "Grief", "Loneliness", "Warmth"],
|
||||
"reference": [7, 7, 6, 6]
|
||||
},
|
||||
{
|
||||
"id": "eq10",
|
||||
"situation": (
|
||||
"A manager must lay off a team member who is also a close friend and a "
|
||||
"single parent. The company requires it due to budget cuts. HR is waiting "
|
||||
"for the paperwork."
|
||||
),
|
||||
"emotions": ["Guilt", "Dread", "Helplessness", "Resentment"],
|
||||
"reference": [8, 8, 7, 5]
|
||||
},
|
||||
{
|
||||
"id": "eq11",
|
||||
"situation": (
|
||||
"James finds old love letters in the attic from his wife, written to "
|
||||
"someone else before they met. The letters are passionate and describe "
|
||||
"a depth of feeling he's not sure she's ever expressed toward him."
|
||||
),
|
||||
"emotions": ["Jealousy", "Insecurity", "Curiosity", "Sadness"],
|
||||
"reference": [6, 7, 5, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq12",
|
||||
"situation": (
|
||||
"A teenager confesses to her mother that she's been self-harming. The mother, "
|
||||
"who is a psychologist, realizes she completely missed the signs despite her "
|
||||
"professional training."
|
||||
),
|
||||
"emotions": ["Fear", "Guilt", "Love", "Self_criticism"],
|
||||
"reference": [8, 8, 9, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq13",
|
||||
"situation": (
|
||||
"A war veteran returns home after two years to find his dog waiting on the "
|
||||
"porch, much older and thinner. The dog recognizes him immediately and "
|
||||
"limps over, tail wagging."
|
||||
),
|
||||
"emotions": ["Joy", "Guilt", "Tenderness", "Sorrow"],
|
||||
"reference": [8, 5, 9, 4]
|
||||
},
|
||||
{
|
||||
"id": "eq14",
|
||||
"situation": (
|
||||
"During a job interview, the candidate realizes the interviewer is someone "
|
||||
"she bullied badly in high school. The interviewer clearly recognizes her "
|
||||
"but proceeds professionally."
|
||||
),
|
||||
"emotions": ["Shame", "Anxiety", "Admiration", "Regret"],
|
||||
"reference": [7, 8, 4, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq15",
|
||||
"situation": (
|
||||
"A father watches his daughter's wedding, knowing he has a terminal diagnosis "
|
||||
"he hasn't shared with the family. He chose to wait until after the wedding "
|
||||
"to tell them."
|
||||
),
|
||||
"emotions": ["Joy", "Grief", "Protectiveness", "Isolation"],
|
||||
"reference": [6, 8, 8, 7]
|
||||
},
|
||||
{
|
||||
"id": "eq16",
|
||||
"situation": (
|
||||
"Two siblings meet for the first time as adults after being separated in "
|
||||
"foster care as children. They look alike but have lived completely different "
|
||||
"lives. One is wealthy, the other struggles financially."
|
||||
),
|
||||
"emotions": ["Wonder", "Resentment", "Hope", "Grief"],
|
||||
"reference": [7, 3, 7, 6]
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def build_eq_prompt(scenario: dict) -> str:
|
||||
"""Build the prompt for a single EQ scenario."""
|
||||
emotions_str = ", ".join(scenario["emotions"])
|
||||
return (
|
||||
f"Read the following situation and rate the emotional intensity that "
|
||||
f"the main character is likely feeling for each of the listed emotions. "
|
||||
f"Rate each emotion from 0 (not feeling it at all) to 10 (extremely intense).\n\n"
|
||||
f"Situation: {scenario['situation']}\n\n"
|
||||
f"Rate these emotions: {emotions_str}\n\n"
|
||||
f"Respond ONLY with the four numbers separated by commas, in the same order. "
|
||||
f"Example format: 5, 3, 8, 2\n"
|
||||
f"Do not include any other text."
|
||||
)
|
||||
|
||||
|
||||
def parse_eq_response(response: str, n_emotions: int = 4) -> list[float] | None:
|
||||
"""Extract emotion ratings from model response."""
|
||||
# Try to find comma-separated numbers
|
||||
numbers = re.findall(r'(\d+(?:\.\d+)?)', response)
|
||||
|
||||
if len(numbers) < n_emotions:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Take the first n_emotions numbers found
|
||||
ratings = [float(numbers[i]) for i in range(n_emotions)]
|
||||
# Clamp to 0-10
|
||||
ratings = [max(0.0, min(10.0, r)) for r in ratings]
|
||||
return ratings
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
||||
|
||||
def score_eq_response(reference: list[int], predicted: list[float]) -> float:
|
||||
"""
|
||||
EQ-Bench v2 style scoring.
|
||||
Differences 1-4 from reference are scaled down on a curve.
|
||||
Differences 5-10 count 1:1.
|
||||
Score 0 = random, 100 = perfect match.
|
||||
"""
|
||||
if predicted is None or len(predicted) != len(reference):
|
||||
return 0.0
|
||||
|
||||
total_penalty = 0.0
|
||||
max_possible_penalty = 10.0 * len(reference) # worst case: all off by 10
|
||||
|
||||
for ref, pred in zip(reference, predicted):
|
||||
diff = abs(ref - pred)
|
||||
# Scale down small differences (EQ-Bench v2 approach)
|
||||
if diff <= 4:
|
||||
# Quadratic scaling: diff^2 / 4 so diff=4 -> penalty=4
|
||||
penalty = (diff ** 2) / 4.0
|
||||
else:
|
||||
# Linear for larger diffs, continuous at diff=4 (penalty=4)
|
||||
penalty = diff
|
||||
total_penalty += penalty
|
||||
|
||||
# Normalize: 0 penalty = score 100, max penalty = score ~0
|
||||
score = max(0.0, 100.0 * (1.0 - total_penalty / max_possible_penalty))
|
||||
return score
|
||||
|
||||
|
||||
# Convenience
|
||||
EQ_PROMPTS = [(s, build_eq_prompt(s)) for s in EQ_SCENARIOS]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"EQ Probe: {len(EQ_SCENARIOS)} scenarios")
|
||||
print("=" * 60)
|
||||
|
||||
for i, scenario in enumerate(EQ_SCENARIOS):
|
||||
print(f"\n[{scenario['id']}] Emotions: {scenario['emotions']}")
|
||||
print(f" Reference: {scenario['reference']}")
|
||||
prompt = build_eq_prompt(scenario)
|
||||
print(f" Prompt length: {len(prompt)} chars")
|
||||
|
||||
# Test scoring
|
||||
print("\n\nScoring tests:")
|
||||
print(f" Perfect match: {score_eq_response([7, 9, 6, 4], [7, 9, 6, 4]):.1f}")
|
||||
print(f" All off by 1: {score_eq_response([7, 9, 6, 4], [8, 8, 7, 5]):.1f}")
|
||||
print(f" All off by 3: {score_eq_response([7, 9, 6, 4], [4, 6, 3, 1]):.1f}")
|
||||
print(f" All off by 5: {score_eq_response([7, 9, 6, 4], [2, 4, 1, 0]):.1f}")
|
||||
print(f" Worst case: {score_eq_response([7, 9, 6, 4], [0, 0, 0, 10]):.1f}")
|
||||
print(f" Unparseable: {score_eq_response([7, 9, 6, 4], None):.1f}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue