mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Validates the brightstaff signals port against the katanemo/signals Python reference on lmsys/lmsys-chat-1m. Adds a signals_replay bin emitting python- compatible JSON, a pyarrow-based driver (bypasses the datasets loader pickle bug on python 3.14), a 3-tier comparator, and an on-demand workflow_dispatch CI job. Made-with: Cursor
97 lines
3 KiB
Python
97 lines
3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Local smoke test for the parity harness — runs both runners on a tiny
|
|
hand-picked set of conversations without touching the lmsys dataset.
|
|
|
|
Run from this directory:
|
|
python _smoke_test.py --rust-binary <path>
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from signals.analyzer import SignalAnalyzer
|
|
|
|
SAMPLES = [
|
|
{
|
|
"id": "smoke-gratitude",
|
|
"messages": [
|
|
{"from": "human", "value": "What is the weather in Istanbul?"},
|
|
{"from": "gpt", "value": "Istanbul is 14C and partly cloudy."},
|
|
{"from": "human", "value": "That worked, exactly what I needed. Thanks!"},
|
|
],
|
|
},
|
|
{
|
|
"id": "smoke-escalation",
|
|
"messages": [
|
|
{"from": "human", "value": "This isn't helpful at all"},
|
|
{"from": "gpt", "value": "I'm sorry, can you tell me more?"},
|
|
{"from": "human", "value": "Get me a human, this is useless"},
|
|
],
|
|
},
|
|
{
|
|
"id": "smoke-correction",
|
|
"messages": [
|
|
{"from": "human", "value": "Book me a flight to NYC for tomorrow"},
|
|
{"from": "gpt", "value": "Sure, here are flights to NYC for Friday."},
|
|
{"from": "human", "value": "No, I meant flights for Saturday, not tomorrow"},
|
|
],
|
|
},
|
|
{
|
|
"id": "smoke-clean",
|
|
"messages": [
|
|
{"from": "human", "value": "Hi"},
|
|
{"from": "gpt", "value": "Hello, how can I help?"},
|
|
],
|
|
},
|
|
{
|
|
"id": "smoke-rephrase",
|
|
"messages": [
|
|
{"from": "human", "value": "Can you summarize the news please"},
|
|
{"from": "gpt", "value": "Sure, here is a summary."},
|
|
{"from": "human", "value": "Could you please summarize the news"},
|
|
],
|
|
},
|
|
]
|
|
|
|
|
|
def main() -> int:
|
|
p = argparse.ArgumentParser()
|
|
p.add_argument("--rust-binary", required=True, type=Path)
|
|
args = p.parse_args()
|
|
|
|
out_dir = Path("out_smoke")
|
|
out_dir.mkdir(exist_ok=True)
|
|
conv_path = out_dir / "conversations.jsonl"
|
|
rust_path = out_dir / "rust_reports.jsonl"
|
|
py_path = out_dir / "python_reports.jsonl"
|
|
|
|
with conv_path.open("w") as f:
|
|
for s in SAMPLES:
|
|
f.write(json.dumps(s) + "\n")
|
|
|
|
with conv_path.open("rb") as fin, rust_path.open("wb") as fout:
|
|
proc = subprocess.run([str(args.rust_binary)], stdin=fin, stdout=fout, stderr=subprocess.PIPE)
|
|
if proc.returncode != 0:
|
|
sys.stderr.write(proc.stderr.decode("utf-8", errors="replace"))
|
|
return 2
|
|
|
|
analyzer = SignalAnalyzer()
|
|
with conv_path.open() as fin, py_path.open("w") as fout:
|
|
for line in fin:
|
|
obj = json.loads(line)
|
|
r = analyzer.analyze(obj["messages"])
|
|
fout.write(json.dumps({"id": obj["id"], "report": r.to_dict()}) + "\n")
|
|
|
|
rc = subprocess.call(
|
|
[sys.executable, "compare.py", "--output-dir", str(out_dir)],
|
|
)
|
|
return rc
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|