mirror of
https://github.com/syntrex-lab/gomcp.git
synced 2026-05-02 07:42:37 +02:00
chore: add copyright headers, CI tests, and sanitize gitignore
This commit is contained in:
parent
5cbb3d89d3
commit
d1f844235e
325 changed files with 2267 additions and 902 deletions
|
|
@ -1,3 +1,7 @@
|
|||
// Copyright 2026 Syntrex Lab. All rights reserved.
|
||||
// Use of this source code is governed by an Apache-2.0 license
|
||||
// that can be found in the LICENSE file.
|
||||
|
||||
// Package eval implements the CLASP Evaluation Framework (SDD-005).
|
||||
//
|
||||
// Provides structured capability scoring for SOC agents across 6 dimensions
|
||||
|
|
@ -52,23 +56,23 @@ type Score struct {
|
|||
|
||||
// EvalScenario defines a test scenario for agent evaluation.
|
||||
type EvalScenario struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Stage Stage `json:"stage"`
|
||||
Description string `json:"description"`
|
||||
Inputs []string `json:"inputs"`
|
||||
Expected string `json:"expected"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Stage Stage `json:"stage"`
|
||||
Description string `json:"description"`
|
||||
Inputs []string `json:"inputs"`
|
||||
Expected string `json:"expected"`
|
||||
Dimensions []Dimension `json:"dimensions"` // Which dimensions this tests
|
||||
}
|
||||
|
||||
// EvalResult represents the outcome of evaluating an agent on a scenario.
|
||||
type EvalResult struct {
|
||||
AgentID string `json:"agent_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
ScenarioID string `json:"scenario_id"`
|
||||
Scores map[Dimension]Score `json:"scores"`
|
||||
OverallL int `json:"overall_l"` // 1-5 aggregate
|
||||
JudgeModel string `json:"judge_model,omitempty"`
|
||||
AgentID string `json:"agent_id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
ScenarioID string `json:"scenario_id"`
|
||||
Scores map[Dimension]Score `json:"scores"`
|
||||
OverallL int `json:"overall_l"` // 1-5 aggregate
|
||||
JudgeModel string `json:"judge_model,omitempty"`
|
||||
}
|
||||
|
||||
// ComputeOverall calculates the aggregate maturity level (average, rounded down).
|
||||
|
|
@ -86,12 +90,12 @@ func (r *EvalResult) ComputeOverall() int {
|
|||
|
||||
// AgentProfile aggregates multiple EvalResults into a capability profile.
|
||||
type AgentProfile struct {
|
||||
AgentID string `json:"agent_id"`
|
||||
Results []EvalResult `json:"results"`
|
||||
AgentID string `json:"agent_id"`
|
||||
Results []EvalResult `json:"results"`
|
||||
Averages map[Dimension]float64 `json:"averages"`
|
||||
OverallL int `json:"overall_l"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
LastEvalAt time.Time `json:"last_eval_at"`
|
||||
OverallL int `json:"overall_l"`
|
||||
EvalCount int `json:"eval_count"`
|
||||
LastEvalAt time.Time `json:"last_eval_at"`
|
||||
}
|
||||
|
||||
// ComputeAverages calculates per-dimension average scores across all results.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue