feat(auteur): Phase 1 — graph signals + director contract (pure, headless)

The spine of 'The Auteur': the LLM/rule-table becomes a film director.
- topology.ts: computeSignals — Brandes betweenness, union-find clusters,
  recency, retention, suppression, edge surprise (Jaccard x distance). Reuses
  pathfinder internals (now exported). Betweenness capped for huge graphs.
- auteur.ts: typed Shot/DirectorPlan/ResolvedShot contract; resolveShots
  carry-forward resolver (every axis back-filled prev->SHOT_DEFAULTS=today's
  camera constants, so a sparse/garbage plan ALWAYS yields a coherent film);
  planShotsDeterministic (Tier-2 pure auteur via graph-metric->shot-grammar
  rule table); directorSystemPrompt (same table → LLM prompt).
- pathfinder: export buildAdjacency/recencyOf/isContradictionEdge/Adjacency;
  add 'surprise' beat kind. narrator KIND_CHIP gains 'surprise' (satisfies).
- 11 new tests (carry-forward, garbage backfill, keystone betweenness,
  contradiction direction, determinism). 937 tests + build green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sam Valladares 2026-06-22 00:51:20 -05:00
parent 4163f4fc80
commit 8094931ea9
5 changed files with 658 additions and 5 deletions

View file

@ -0,0 +1,204 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { planShotsDeterministic, resolveShots, SHOT_DEFAULTS, type DirectorPlan } from '../auteur';
import { planCinemaPath } from '../pathfinder';
import { computeSignals } from '../topology';
import { makeNode, makeEdge, resetNodeCounter } from '../../__tests__/helpers';
describe('auteur — carry-forward shot resolution', () => {
beforeEach(() => resetNodeCounter());
function smallPath() {
const a = makeNode({ id: 'a' });
const b = makeNode({ id: 'b' });
const c = makeNode({ id: 'c' });
const edges = [makeEdge('a', 'b', { weight: 0.8 }), makeEdge('b', 'c', { weight: 0.6 })];
return { path: planCinemaPath([a, b, c], edges, 'a'), nodes: [a, b, c], edges };
}
it('fills EVERY axis from a one-field shot, defaulting to today constants', () => {
const { path } = smallPath();
const plan: DirectorPlan = {
source: 'backend-llm',
logline: 'x',
arc: 'flat',
shots: [{ nodeId: 'a', move: 'orbit', why: 'test' }],
};
const resolved = resolveShots(plan, path);
expect(resolved).toHaveLength(path.beats.length);
// The specified field is honored…
expect(resolved[0].move).toBe('orbit');
// …and every other axis is a real default, never undefined.
expect(resolved[0].standoff).toBe(SHOT_DEFAULTS.standoff);
expect(resolved[0].flightSeconds).toBe(SHOT_DEFAULTS.flightSeconds);
expect(resolved[0].angle).toBe('eye');
for (const s of resolved) {
for (const k of Object.keys(SHOT_DEFAULTS) as (keyof typeof SHOT_DEFAULTS)[]) {
expect(s[k]).toBeDefined();
}
expect(typeof s.why).toBe('string');
expect(s.why.length).toBeGreaterThan(0);
}
});
it('carries non-cut axes forward to subsequent beats', () => {
const { path } = smallPath();
const plan: DirectorPlan = {
source: 'backend-llm',
logline: 'x',
arc: 'flat',
// Only the FIRST beat sets standoff; later beats should inherit it.
shots: [{ nodeId: path.beats[0].nodeId, standoff: 41, why: 'set' }],
};
const resolved = resolveShots(plan, path);
expect(resolved[0].standoff).toBe(41);
expect(resolved[resolved.length - 1].standoff).toBe(41); // carried forward
});
it('cut never carries forward — defaults to fly each beat', () => {
const { path } = smallPath();
const plan: DirectorPlan = {
source: 'backend-llm',
logline: 'x',
arc: 'flat',
shots: [{ nodeId: path.beats[0].nodeId, cut: 'hard_cut', why: 'cut' }],
};
const resolved = resolveShots(plan, path);
expect(resolved[0].cut).toBe('hard_cut');
if (resolved.length > 1) expect(resolved[1].cut).toBe('fly');
});
it('back-fills garbage / out-of-range LLM fields from defaults', () => {
const { path } = smallPath();
const plan = {
source: 'backend-llm',
logline: 'x',
arc: 'flat',
shots: [
{
nodeId: path.beats[0].nodeId,
move: 'teleport', // invalid enum
standoff: 9999, // out of range
dwellSeconds: -5, // out of range
why: '',
},
],
} as unknown as DirectorPlan;
const resolved = resolveShots(plan, path);
expect(resolved[0].move).toBe(SHOT_DEFAULTS.move); // invalid → default
expect(resolved[0].standoff).toBeLessThanOrEqual(90); // clamped
expect(resolved[0].dwellSeconds).toBeGreaterThanOrEqual(0.6); // clamped
expect(resolved[0].why.length).toBeGreaterThan(0); // empty why → fallback
});
it('a null plan still yields one default shot per beat', () => {
const { path } = smallPath();
const resolved = resolveShots(null, path);
expect(resolved).toHaveLength(path.beats.length);
expect(resolved[0].move).toBe(SHOT_DEFAULTS.move);
});
});
describe('auteur — deterministic director', () => {
beforeEach(() => resetNodeCounter());
it('produces a valid plan: one grounded shot per beat, every why non-empty, every nodeId real', () => {
const nodes = Array.from({ length: 8 }, (_, i) => makeNode({ id: `n${i}` }));
const edges = [
makeEdge('n0', 'n1', { weight: 0.9 }),
makeEdge('n1', 'n2', { weight: 0.2, type: 'contradiction' }),
makeEdge('n0', 'n3', { weight: 0.5 }),
makeEdge('n3', 'n4', { weight: 0.7 }),
];
const path = planCinemaPath(nodes, edges, 'n0');
const signals = computeSignals(nodes, edges);
const plan = planShotsDeterministic(path, signals);
const realIds = new Set(nodes.map((n) => n.id));
expect(plan.shots).toHaveLength(path.beats.length);
for (const s of plan.shots) {
expect(realIds.has(s.nodeId)).toBe(true);
expect(s.why && s.why.length).toBeGreaterThan(0);
}
expect(plan.source).toBe('deterministic');
expect(plan.logline.length).toBeGreaterThan(0);
});
it('directs a contradiction beat as a Dutch hard-cut crimson collision', () => {
const a = makeNode({ id: 'a' });
const normal = makeNode({ id: 'normal' });
const conflict = makeNode({ id: 'conflict' });
const edges = [
makeEdge('a', 'normal', { weight: 0.95 }),
makeEdge('a', 'conflict', { weight: 0.2, type: 'contradiction' }),
];
const path = planCinemaPath([a, normal, conflict], edges, 'a');
const signals = computeSignals([a, normal, conflict], edges);
const plan = planShotsDeterministic(path, signals);
const contradictionShot = plan.shots.find((_, i) => path.beats[i].kind === 'contradiction');
expect(contradictionShot).toBeDefined();
expect(contradictionShot!.stormMode).toBe('contradiction');
expect(contradictionShot!.cut).toBe('hard_cut');
expect(contradictionShot!.dutch).toBeGreaterThan(0);
expect(contradictionShot!.scoreCue).toBe('minor_drop');
});
it('ends on a crane pull-back with a major resolve', () => {
const nodes = Array.from({ length: 5 }, (_, i) => makeNode({ id: `m${i}` }));
const edges = nodes.slice(1).map((n, i) => makeEdge(`m${i}`, n.id, { weight: 0.6 }));
const path = planCinemaPath(nodes, edges, 'm0');
const signals = computeSignals(nodes, edges);
const plan = planShotsDeterministic(path, signals);
const last = plan.shots[plan.shots.length - 1];
expect(last.move).toBe('crane');
expect(last.scoreCue).toBe('major_resolve');
});
it('is deterministic — same inputs yield the same plan', () => {
const nodes = Array.from({ length: 6 }, (_, i) => makeNode({ id: `d${i}` }));
const edges = [makeEdge('d0', 'd1', { weight: 0.8 }), makeEdge('d1', 'd2', { weight: 0.5 })];
const path = planCinemaPath(nodes, edges, 'd0');
const sig = computeSignals(nodes, edges);
const p1 = planShotsDeterministic(path, sig);
const p2 = planShotsDeterministic(path, sig);
expect(p1.shots.map((s) => s.move)).toEqual(p2.shots.map((s) => s.move));
expect(p1.logline).toBe(p2.logline);
});
});
describe('topology — graph signals', () => {
beforeEach(() => resetNodeCounter());
it('computes betweenness, clusters, and peak keystone on a real shape', () => {
// Two clusters bridged by 'hub' → hub has the highest betweenness.
const hub = makeNode({ id: 'hub' });
const l1 = makeNode({ id: 'l1' });
const l2 = makeNode({ id: 'l2' });
const r1 = makeNode({ id: 'r1' });
const r2 = makeNode({ id: 'r2' });
const edges = [
makeEdge('l1', 'l2'),
makeEdge('l2', 'hub'),
makeEdge('hub', 'r1'),
makeEdge('r1', 'r2'),
];
const sig = computeSignals([hub, l1, l2, r1, r2], edges);
expect(sig.peakBetweennessId).toBe('hub');
expect(sig.nodes.get('hub')!.betweenness).toBeGreaterThan(sig.nodes.get('l1')!.betweenness);
expect(sig.clusterCount).toBe(1); // all connected through hub
// All signals are finite and in range.
for (const s of sig.nodes.values()) {
expect(s.betweenness).toBeGreaterThanOrEqual(0);
expect(s.betweenness).toBeLessThanOrEqual(1);
expect(Number.isFinite(s.recencyRank)).toBe(true);
}
});
it('flags contradiction edges and computes surprise in range', () => {
const a = makeNode({ id: 'a' });
const b = makeNode({ id: 'b' });
const edges = [makeEdge('a', 'b', { weight: 0.1, type: 'contradiction' })];
const sig = computeSignals([a, b], edges);
expect(sig.edges[0].isContradiction).toBe(true);
expect(sig.edges[0].surprise).toBeGreaterThanOrEqual(0);
expect(sig.edges[0].surprise).toBeLessThanOrEqual(1);
});
});

View file

@ -0,0 +1,223 @@
// The Auteur — the director's brain + the typed shot-plan contract.
//
// The LLM (Tier 1) or the deterministic rule table (Tier 2) produces a
// DirectorPlan: a sequence of cinematographic Shots, one per CinemaBeat, each
// grounded in a real node and justified by a real graph metric. The camera
// runtime (director.ts) executes it. Carry-forward semantics mean a sparse or
// half-hallucinated plan ALWAYS resolves to a coherent film — the same
// robustness pattern as narrator.resolveNarration.
import type { CinemaPath, CinemaBeat } from './pathfinder';
import type { GraphSignals } from './topology';
// ── Camera grammar (string unions keep LLM output validatable) ───────────────
export type Move = 'push_in' | 'pull_back' | 'orbit' | 'crane' | 'whip_pan' | 'rack_focus' | 'hold';
export type Angle = 'eye' | 'low' | 'high'; // low = look up (power); high = look down (decay)
export type Cut = 'fly' | 'hard_cut' | 'match_cut';
export type StormMode = 'anchor' | 'connection' | 'contradiction' | 'surprise';
export type CaptionTone = 'curious' | 'tense' | 'resolved' | 'awe' | 'neutral';
export type ScoreCue = 'motif' | 'minor_drop' | 'major_resolve' | 'silence';
export type Act = 'I' | 'II' | 'III';
export type EmotionalArc = 'man_in_hole' | 'rags_to_riches' | 'icarus' | 'cinderella' | 'oedipus' | 'flat';
export type DirectorSource = 'backend-llm' | 'on-device' | 'deterministic';
/** A directed shot. Only axes that CHANGE need be set the rest carry forward
* from the previous resolved shot (ultimate default = today's camera constants). */
export interface Shot {
nodeId: string; // MUST cite a real node (alignment key + grounding constraint)
move?: Move;
angle?: Angle;
dutch?: number; // camera roll, radians, 0..~0.5
standoff?: number; // world units
flightSeconds?: number;
dwellSeconds?: number;
halflife?: number; // spring smoothing; 0 = jump-cut
cut?: Cut;
stormMode?: StormMode;
intensity?: number; // 0..1 → scales the ignition spike
tension?: number; // 0..1 master scalar
act?: Act;
tone?: CaptionTone;
scoreCue?: ScoreCue;
why: string; // REQUIRED: cites the real metric driving this shot
viaEdgeKey?: string; // `${source}->${target}` for two-node framing
}
export interface DirectorPlan {
source: DirectorSource;
logline: string;
arc: EmotionalArc;
shots: Shot[];
}
/** Every axis filled after carry-forward — what the director reads each beat. */
export type ResolvedShot = Required<Omit<Shot, 'viaEdgeKey'>> & { viaEdgeKey?: string };
// Ultimate defaults — today's hardcoded camera constants, so a plan-less or
// fully-sparse run is byte-identical to the pre-Auteur camera.
export const SHOT_DEFAULTS: Omit<ResolvedShot, 'nodeId' | 'why'> = {
move: 'hold',
angle: 'eye',
dutch: 0,
standoff: 26,
flightSeconds: 2.4,
dwellSeconds: 3.2,
halflife: 0.35,
cut: 'fly',
stormMode: 'connection',
intensity: 0.7,
tension: 0.3,
act: 'I',
tone: 'neutral',
scoreCue: 'motif',
};
const MOVES: ReadonlySet<Move> = new Set(['push_in', 'pull_back', 'orbit', 'crane', 'whip_pan', 'rack_focus', 'hold']);
const ANGLES: ReadonlySet<Angle> = new Set(['eye', 'low', 'high']);
const CUTS: ReadonlySet<Cut> = new Set(['fly', 'hard_cut', 'match_cut']);
const STORM_MODES: ReadonlySet<StormMode> = new Set(['anchor', 'connection', 'contradiction', 'surprise']);
const TONES: ReadonlySet<CaptionTone> = new Set(['curious', 'tense', 'resolved', 'awe', 'neutral']);
const SCORE_CUES: ReadonlySet<ScoreCue> = new Set(['motif', 'minor_drop', 'major_resolve', 'silence']);
const ACTS: ReadonlySet<Act> = new Set(['I', 'II', 'III']);
function num(v: unknown, lo: number, hi: number, fallback: number): number {
const n = typeof v === 'number' && Number.isFinite(v) ? v : NaN;
if (Number.isNaN(n)) return fallback;
return Math.max(lo, Math.min(hi, n));
}
function pick<T>(v: unknown, set: ReadonlySet<T>, fallback: T): T {
return typeof v === 'string' && set.has(v as T) ? (v as T) : fallback;
}
/**
* Resolve a DirectorPlan into one fully-specified ResolvedShot per beat.
* Aligns by nodeId; every unspecified/garbage axis is back-filled by carry-forward
* (previous shot SHOT_DEFAULTS). A shot can NEVER be blank or invalid.
*/
export function resolveShots(plan: DirectorPlan | null, path: CinemaPath): ResolvedShot[] {
const byNode = new Map<string, Shot>();
for (const s of plan?.shots ?? []) {
if (s && typeof s.nodeId === 'string') byNode.set(s.nodeId, s);
}
const resolved: ResolvedShot[] = [];
let prev: ResolvedShot | null = null;
for (const beat of path.beats) {
const raw = byNode.get(beat.nodeId);
const base = prev ?? { ...SHOT_DEFAULTS, nodeId: beat.nodeId, why: '' };
const shot: ResolvedShot = {
nodeId: beat.nodeId,
move: pick(raw?.move, MOVES, base.move),
angle: pick(raw?.angle, ANGLES, base.angle),
dutch: num(raw?.dutch, 0, 0.6, base.dutch),
standoff: num(raw?.standoff, 8, 90, base.standoff),
flightSeconds: num(raw?.flightSeconds, 0.4, 6, base.flightSeconds),
dwellSeconds: num(raw?.dwellSeconds, 0.6, 8, base.dwellSeconds),
halflife: num(raw?.halflife, 0, 1.5, base.halflife),
cut: pick(raw?.cut, CUTS, 'fly'), // cut never carries forward — default per beat
stormMode: pick(raw?.stormMode, STORM_MODES, base.stormMode),
intensity: num(raw?.intensity, 0, 1, base.intensity),
tension: num(raw?.tension, 0, 1, base.tension),
act: pick(raw?.act, ACTS, base.act),
tone: pick(raw?.tone, TONES, base.tone),
scoreCue: pick(raw?.scoreCue, SCORE_CUES, 'motif'),
why: typeof raw?.why === 'string' && raw.why.trim() ? raw.why : base.why || 'establishing shot',
viaEdgeKey: typeof raw?.viaEdgeKey === 'string' ? raw.viaEdgeKey : undefined,
};
resolved.push(shot);
prev = shot;
}
return resolved;
}
// ── The deterministic auteur (Tier 2) ────────────────────────────────────────
// The graph-metric → shot-grammar rule table. This SAME table is handed to the
// LLM as its system prompt (see directorSystemPrompt), so Tier-1 output is
// directly comparable to and back-fillable against this baseline.
function actFor(progress: number): Act {
return progress < 0.34 ? 'I' : progress < 0.72 ? 'II' : 'III';
}
/**
* Produce a cinematic DirectorPlan from pure graph signals no LLM. This alone
* ships the hero film: every shot is grounded and justified by a real metric.
*/
export function planShotsDeterministic(path: CinemaPath, signals: GraphSignals): DirectorPlan {
const n = path.beats.length;
const shots: Shot[] = path.beats.map((beat, i) => {
const progress = n > 1 ? i / (n - 1) : 0;
const act = actFor(progress);
const sig = signals.nodes.get(beat.nodeId);
const isPeak = beat.nodeId === signals.peakBetweennessId;
const isFinale = i === n - 1;
const isOrigin = i === 0;
// Default shot for a plain connection beat.
let shot: Shot = {
nodeId: beat.nodeId,
move: 'push_in',
angle: 'eye',
cut: 'fly',
stormMode: 'connection',
tone: 'curious',
scoreCue: 'motif',
act,
intensity: 0.6,
tension: 0.3,
why: 'a connected memory',
};
if (isOrigin) {
shot = { ...shot, move: 'push_in', tone: 'curious', tension: 0.25, stormMode: 'anchor', why: 'opening on the focal memory' };
}
// High-betweenness keystone → reverent low-angle slow orbit.
if (isPeak || (sig && sig.betweenness > 0.6)) {
shot = { ...shot, move: 'orbit', angle: 'low', stormMode: 'anchor', intensity: 0.75, tension: 0.45, tone: 'awe', why: 'low-angle orbit — the most load-bearing memory in the graph' };
}
// Contradiction → Dutch push-in, hard cut, crimson chaos, minor drop.
if (beat.kind === 'contradiction') {
shot = { ...shot, move: 'push_in', angle: 'eye', dutch: 0.28, cut: 'hard_cut', stormMode: 'contradiction', intensity: 1, tension: 0.95, tone: 'tense', scoreCue: 'minor_drop', viaEdgeKey: beat.viaEdge ? `${beat.viaEdge.source}->${beat.viaEdge.target}` : undefined, why: 'two memories in tension — a Dutch two-shot collision' };
}
// Surprise edge → gold/violet convergence, rising awe.
if (beat.kind === 'surprise') {
shot = { ...shot, move: 'orbit', stormMode: 'surprise', intensity: 0.85, tension: 0.6, tone: 'awe', scoreCue: 'motif', why: 'a surprising, distant-but-plausible connection' };
}
// Fading memory → drifting high angle.
if (sig && (sig.retention < 0.35 || sig.suppression > 0.5)) {
shot = { ...shot, angle: 'high', move: 'pull_back', tone: 'neutral', intensity: 0.4, why: 'a fading memory — high-angle drift' };
}
// Recent → the "now" beat.
if (beat.kind === 'recent') {
shot = { ...shot, move: 'push_in', tone: 'resolved', tension: 0.4, why: 'where the memory is now' };
}
// Finale → crane pull-back, major resolve.
if (isFinale) {
shot = { ...shot, move: 'crane', cut: 'fly', stormMode: 'anchor', tone: 'awe', tension: 0.5, scoreCue: 'major_resolve', why: 'crane pull-back over the whole cluster — resolution' };
}
return shot;
});
const arc: EmotionalArc = path.beats.some((b) => b.kind === 'contradiction') ? 'man_in_hole' : 'rags_to_riches';
const originLabel = path.beats[0]?.node.label ?? 'a memory';
const logline = `A short film about ${originLabel}${n} shots through the graph${arc === 'man_in_hole' ? ', through a contradiction and out the other side' : ''}.`;
return { source: 'deterministic', logline, arc, shots };
}
/** The rule table as an LLM system prompt keeps Tier-1 output comparable to
* the Tier-2 baseline (and thus back-fillable by resolveShots). */
export function directorSystemPrompt(): string {
return [
'You are a film director shooting a short documentary about an AI\'s own memory graph.',
'Output a DirectorPlan: a logline, an emotional arc, and one shot per beat.',
'Each shot MUST cite a real nodeId and a real "why" referencing a graph metric.',
'Grammar → meaning:',
'- high betweenness (load-bearing memory) → low-angle slow orbit, reverent',
'- contradiction edge → Dutch angle + push_in + hard_cut + crimson storm + minor_drop score',
'- surprising distant link → gold/violet orbit→stream convergence + awe',
'- merge/supersede → match_cut at identical standoff+angle (same idea)',
'- low retention / high suppression → high-angle drift (fading)',
'- finale → crane pull_back + major_resolve',
'Build a real emotional arc across acts I→II→III. Only specify axes that change.',
].join('\n');
}

View file

@ -35,6 +35,7 @@ const KIND_CHIP = {
contradiction: 'Tension',
recent: 'Now',
bridge: 'Jump',
surprise: 'Surprise',
} satisfies Record<CinemaBeat['kind'], string>;
function snippet(content: string, max = 90): string {

View file

@ -23,7 +23,7 @@ export interface CinemaBeat {
/** Edge traversed to arrive here (null for the opening beat). */
viaEdge: GraphEdge | null;
/** Why this beat exists — drives the deterministic caption + visual emphasis. */
kind: 'origin' | 'connection' | 'contradiction' | 'recent' | 'bridge';
kind: 'origin' | 'connection' | 'contradiction' | 'recent' | 'bridge' | 'surprise';
/** 0..1 emphasis used by the sandbox to spike emissive/bloom on arrival. */
intensity: number;
}
@ -40,11 +40,11 @@ export interface CinemaPath {
flowEdges: GraphEdge[];
}
interface Adjacency {
export interface Adjacency {
[nodeId: string]: { edge: GraphEdge; otherId: string }[];
}
function buildAdjacency(edges: GraphEdge[]): Adjacency {
export function buildAdjacency(edges: GraphEdge[]): Adjacency {
const adj: Adjacency = {};
for (const edge of edges) {
(adj[edge.source] ??= []).push({ edge, otherId: edge.target });
@ -57,12 +57,12 @@ function buildAdjacency(edges: GraphEdge[]): Adjacency {
return adj;
}
function isContradictionEdge(edge: GraphEdge): boolean {
export function isContradictionEdge(edge: GraphEdge): boolean {
const t = (edge.type ?? '').toLowerCase();
return t.includes('contradict') || t.includes('conflict') || t.includes('supersede');
}
function recencyOf(node: GraphNode): number {
export function recencyOf(node: GraphNode): number {
// Larger = more recent. Tolerates missing/invalid timestamps.
const t = Date.parse(node.updatedAt || node.createdAt || '');
return Number.isFinite(t) ? t : 0;

View file

@ -0,0 +1,225 @@
// The Auteur — graph signal extraction.
//
// Pure, dependency-free statistics over the REAL /api/graph data, computed once
// per Cinema launch. These signals are what gives the AI director something
// meaningful to direct: which memory is most load-bearing (betweenness), where
// tension lives (contradictions), what's surprising (distant-but-plausible
// links), what's fading (low retention / suppression). No LLM, no WebGPU, no
// network — fully headless-testable.
import type { GraphNode, GraphEdge } from '$types';
import { buildAdjacency, recencyOf, isContradictionEdge } from './pathfinder';
export interface NodeSignal {
nodeId: string;
/** Raw connection count. */
degree: number;
/** Brandes betweenness centrality, normalized 0..1 how load-bearing this
* memory is as a bridge between clusters. The director favors high-betweenness
* nodes for hero shots. */
betweenness: number;
/** Connected-component id (which cluster of memory this belongs to). */
clusterId: number;
/** 0..1, 1 = most recent. */
recencyRank: number;
/** FSRS retention 0..1. */
retention: number;
/** Suppression pressure 0..1 (memory actively being forgotten). */
suppression: number;
}
export interface EdgeSignal {
source: string;
target: string;
isContradiction: boolean;
isMergeSupersede: boolean;
/** 0..1: high when endpoints share neighbors (plausible) yet the edge weight
* is low (distant) a surprising, non-obvious connection. */
surprise: number;
weight: number;
}
export interface GraphSignals {
nodes: Map<string, NodeSignal>;
edges: EdgeSignal[];
clusterCount: number;
/** Node id with the single highest betweenness — the graph's keystone. */
peakBetweennessId: string;
}
function isMergeSupersedeEdge(edge: GraphEdge): boolean {
const t = (edge.type ?? '').toLowerCase();
return t.includes('merge') || t.includes('supersede') || t.includes('duplicate');
}
/**
* Brandes' algorithm for betweenness centrality on an unweighted, undirected
* graph. O(V·E) fine for /api/graph payloads. Returns raw (unnormalized)
* scores keyed by node id; the caller normalizes.
*/
function brandesBetweenness(nodeIds: string[], adj: Record<string, { otherId: string }[]>): Map<string, number> {
const cb = new Map<string, number>();
for (const v of nodeIds) cb.set(v, 0);
for (const s of nodeIds) {
const stack: string[] = [];
const pred = new Map<string, string[]>();
const sigma = new Map<string, number>();
const dist = new Map<string, number>();
for (const v of nodeIds) {
pred.set(v, []);
sigma.set(v, 0);
dist.set(v, -1);
}
sigma.set(s, 1);
dist.set(s, 0);
// BFS (unweighted shortest paths).
const queue: string[] = [s];
let head = 0;
while (head < queue.length) {
const v = queue[head++];
stack.push(v);
for (const { otherId: w } of adj[v] ?? []) {
if ((dist.get(w) ?? -1) < 0) {
dist.set(w, (dist.get(v) ?? 0) + 1);
queue.push(w);
}
if ((dist.get(w) ?? -1) === (dist.get(v) ?? 0) + 1) {
sigma.set(w, (sigma.get(w) ?? 0) + (sigma.get(v) ?? 0));
pred.get(w)!.push(v);
}
}
}
// Accumulation (back-propagate dependencies).
const delta = new Map<string, number>();
for (const v of nodeIds) delta.set(v, 0);
while (stack.length > 0) {
const w = stack.pop()!;
for (const v of pred.get(w) ?? []) {
const c = ((sigma.get(v) ?? 0) / (sigma.get(w) || 1)) * (1 + (delta.get(w) ?? 0));
delta.set(v, (delta.get(v) ?? 0) + c);
}
if (w !== s) cb.set(w, (cb.get(w) ?? 0) + (delta.get(w) ?? 0));
}
}
return cb;
}
/** Union-find connected components → a cluster id per node. */
function components(nodeIds: string[], edges: GraphEdge[]): { clusterOf: Map<string, number>; count: number } {
const parent = new Map<string, string>();
for (const id of nodeIds) parent.set(id, id);
const find = (x: string): string => {
let root = x;
while (parent.get(root) !== root) root = parent.get(root)!;
// Path compression.
let cur = x;
while (parent.get(cur) !== root) {
const next = parent.get(cur)!;
parent.set(cur, root);
cur = next;
}
return root;
};
const union = (a: string, b: string) => {
const ra = find(a);
const rb = find(b);
if (ra !== rb) parent.set(ra, rb);
};
for (const e of edges) {
if (parent.has(e.source) && parent.has(e.target)) union(e.source, e.target);
}
const rootToCluster = new Map<string, number>();
const clusterOf = new Map<string, number>();
let next = 0;
for (const id of nodeIds) {
const r = find(id);
if (!rootToCluster.has(r)) rootToCluster.set(r, next++);
clusterOf.set(id, rootToCluster.get(r)!);
}
return { clusterOf, count: next };
}
/**
* Compute all director signals from the real graph. Pure; safe to call once at
* launch. Caps betweenness work on very large graphs by limiting to the
* top-degree subset (the only nodes that can carry meaningful centrality).
*/
export function computeSignals(nodes: GraphNode[], edges: GraphEdge[]): GraphSignals {
const nodeIds = nodes.map((n) => n.id);
const adj = buildAdjacency(edges);
// Recency ranking (0..1, 1 = newest).
const byRecency = [...nodes].sort((a, b) => recencyOf(a) - recencyOf(b));
const recencyRank = new Map<string, number>();
byRecency.forEach((n, i) => recencyRank.set(n.id, nodes.length > 1 ? i / (nodes.length - 1) : 1));
// Betweenness — guard pathological sizes: above the cap, compute on the
// top-degree subset (others get 0; they can't be meaningful bridges anyway).
const BETWEENNESS_CAP = 600;
let betweennessNodes = nodeIds;
if (nodeIds.length > BETWEENNESS_CAP) {
betweennessNodes = [...nodeIds]
.sort((a, b) => (adj[b]?.length ?? 0) - (adj[a]?.length ?? 0))
.slice(0, BETWEENNESS_CAP);
}
const rawBetween = brandesBetweenness(betweennessNodes, adj);
let maxBetween = 0;
for (const v of rawBetween.values()) maxBetween = Math.max(maxBetween, v);
const { clusterOf, count: clusterCount } = components(nodeIds, edges);
const maxSuppression = Math.max(1, ...nodes.map((n) => n.suppression_count ?? 0));
const nodeSignals = new Map<string, NodeSignal>();
let peakBetweennessId = nodeIds[0] ?? '';
let peakVal = -1;
for (const n of nodes) {
const bt = maxBetween > 0 ? (rawBetween.get(n.id) ?? 0) / maxBetween : 0;
if (bt > peakVal) {
peakVal = bt;
peakBetweennessId = n.id;
}
nodeSignals.set(n.id, {
nodeId: n.id,
degree: adj[n.id]?.length ?? 0,
betweenness: bt,
clusterId: clusterOf.get(n.id) ?? 0,
recencyRank: recencyRank.get(n.id) ?? 0,
retention: clamp01(n.retention ?? 0),
suppression: clamp01((n.suppression_count ?? 0) / maxSuppression),
});
}
// Edge signals incl. surprise (shared-neighbor overlap × edge distance).
const neighborSets = new Map<string, Set<string>>();
for (const id of nodeIds) neighborSets.set(id, new Set((adj[id] ?? []).map((a) => a.otherId)));
const edgeSignals: EdgeSignal[] = edges.map((e) => {
const a = neighborSets.get(e.source);
const b = neighborSets.get(e.target);
let shared = 0;
if (a && b) {
const [small, large] = a.size < b.size ? [a, b] : [b, a];
for (const x of small) if (large.has(x)) shared++;
}
const union = (a?.size ?? 0) + (b?.size ?? 0) - shared || 1;
const overlap = shared / union; // Jaccard: structural plausibility.
const distance = 1 - clamp01(e.weight ?? 0); // low weight = semantically distant.
return {
source: e.source,
target: e.target,
isContradiction: isContradictionEdge(e),
isMergeSupersede: isMergeSupersedeEdge(e),
surprise: clamp01(overlap * distance * 2), // plausible AND distant = surprising.
weight: e.weight ?? 0,
};
});
return { nodes: nodeSignals, edges: edgeSignals, clusterCount, peakBetweennessId };
}
function clamp01(x: number): number {
return Math.max(0, Math.min(1, Number.isFinite(x) ? x : 0));
}