Files
ExperionCrawler/mcp-server/eval/results/Qwen3-8B_20260526_103459.json

281 lines
5.8 KiB
JSON

{
"name": "Qwen3-8B",
"model": "Qwen3-8B",
"base_url": "http://localhost:8001/v1",
"timestamp": "2026-05-26T10:34:59",
"overall": {
"pass": 34,
"total": 40,
"pct": 0.85
},
"fabrication_rate": 0.0,
"per_category": {
"abstain": {
"pass": 8,
"total": 8,
"pct": 1.0
},
"grounding": {
"pass": 5,
"total": 6,
"pct": 0.833
},
"nl2sql": {
"pass": 5,
"total": 10,
"pct": 0.5
},
"scaffold": {
"pass": 6,
"total": 6,
"pct": 1.0
},
"tool_call": {
"pass": 10,
"total": 10,
"pct": 1.0
}
},
"items": [
{
"id": "nl2sql-01",
"category": "nl2sql",
"passed": true,
"detail": "ok"
},
{
"id": "nl2sql-02",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'running']"
},
{
"id": "nl2sql-03",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'running_pump_tags', 'p6']"
},
{
"id": "nl2sql-04",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'tripped_pumps']"
},
{
"id": "nl2sql-05",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_instrument_range']"
},
{
"id": "nl2sql-06",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state_corroborated', 'p6-1']"
},
{
"id": "nl2sql-07",
"category": "nl2sql",
"passed": true,
"detail": "ok"
},
{
"id": "nl2sql-08",
"category": "nl2sql",
"passed": true,
"detail": "ok"
},
{
"id": "nl2sql-09",
"category": "nl2sql",
"passed": true,
"detail": "ok"
},
{
"id": "nl2sql-10",
"category": "nl2sql",
"passed": true,
"detail": "ok"
},
{
"id": "tool-01",
"category": "tool_call",
"passed": true,
"detail": "선택=active_alarms 기대=['active_alarms']"
},
{
"id": "tool-02",
"category": "tool_call",
"passed": true,
"detail": "선택=active_alarms 기대=['active_alarms']"
},
{
"id": "tool-03",
"category": "tool_call",
"passed": true,
"detail": "선택=summarize_events 기대=['summarize_events']"
},
{
"id": "tool-04",
"category": "tool_call",
"passed": true,
"detail": "선택=generate_status_report 기대=['generate_status_report']"
},
{
"id": "tool-05",
"category": "tool_call",
"passed": true,
"detail": "선택=generate_status_report 기대=['generate_status_report']"
},
{
"id": "tool-06",
"category": "tool_call",
"passed": true,
"detail": "선택=find_tags 기대=['find_tags']"
},
{
"id": "tool-07",
"category": "tool_call",
"passed": true,
"detail": "선택=find_tags 기대=['find_tags']"
},
{
"id": "tool-08",
"category": "tool_call",
"passed": true,
"detail": "선택=search_kb 기대=['search_kb']"
},
{
"id": "tool-09",
"category": "tool_call",
"passed": true,
"detail": "선택=trace_connections 기대=['trace_connections']"
},
{
"id": "tool-10",
"category": "tool_call",
"passed": true,
"detail": "선택=query_pv_history 기대=['query_pv_history', 'query_with_nl']"
},
{
"id": "abstain-01",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-02",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-03",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-04",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-05",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-06",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-07",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-08",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "scaffold-01",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-02",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-03",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-04",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-05",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-06",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "ground-01",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-02",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-03",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-04",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-05",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-06",
"category": "grounding",
"passed": false,
"detail": "누락:['87']"
}
]
}