Files
ExperionCrawler/mcp-server/eval/results/Qwen2.5-7B-Instruct_20260526_102850.json

281 lines
6.0 KiB
JSON

{
"name": "Qwen2.5-7B-Instruct",
"model": "Qwen2.5-7B-Instruct",
"base_url": "http://localhost:8001/v1",
"timestamp": "2026-05-26T10:28:50",
"overall": {
"pass": 18,
"total": 40,
"pct": 0.45
},
"fabrication_rate": 0.0,
"per_category": {
"abstain": {
"pass": 8,
"total": 8,
"pct": 1.0
},
"grounding": {
"pass": 4,
"total": 6,
"pct": 0.667
},
"nl2sql": {
"pass": 0,
"total": 10,
"pct": 0.0
},
"scaffold": {
"pass": 2,
"total": 6,
"pct": 0.333
},
"tool_call": {
"pass": 4,
"total": 10,
"pct": 0.4
}
},
"items": [
{
"id": "nl2sql-01",
"category": "nl2sql",
"passed": false,
"detail": "누락:['history_table', 'ficq-6113.pv']"
},
{
"id": "nl2sql-02",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'running']"
},
{
"id": "nl2sql-03",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'running_pump_tags', 'p6']"
},
{
"id": "nl2sql-04",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state', 'tripped_pumps']"
},
{
"id": "nl2sql-05",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_instrument_range', 'ficq-6113']"
},
{
"id": "nl2sql-06",
"category": "nl2sql",
"passed": false,
"detail": "누락:['v_plant_running_state_corroborated', 'p6-1']"
},
{
"id": "nl2sql-07",
"category": "nl2sql",
"passed": false,
"detail": "누락:['/120', 'ti-6101.pv']"
},
{
"id": "nl2sql-08",
"category": "nl2sql",
"passed": false,
"detail": "누락:['p-6102']"
},
{
"id": "nl2sql-09",
"category": "nl2sql",
"passed": false,
"detail": "누락:['history_table', 'pica-6111']"
},
{
"id": "nl2sql-10",
"category": "nl2sql",
"passed": false,
"detail": "누락:['ficq-6113.pv']"
},
{
"id": "tool-01",
"category": "tool_call",
"passed": false,
"detail": "선택=(없음) 기대=['active_alarms']"
},
{
"id": "tool-02",
"category": "tool_call",
"passed": false,
"detail": "선택=(없음) 기대=['active_alarms']"
},
{
"id": "tool-03",
"category": "tool_call",
"passed": true,
"detail": "선택=summarize_events 기대=['summarize_events']"
},
{
"id": "tool-04",
"category": "tool_call",
"passed": true,
"detail": "선택=generate_status_report 기대=['generate_status_report']"
},
{
"id": "tool-05",
"category": "tool_call",
"passed": false,
"detail": "선택=(없음) 기대=['generate_status_report']"
},
{
"id": "tool-06",
"category": "tool_call",
"passed": true,
"detail": "선택=find_tags 기대=['find_tags']"
},
{
"id": "tool-07",
"category": "tool_call",
"passed": false,
"detail": "선택=(없음) 기대=['find_tags']"
},
{
"id": "tool-08",
"category": "tool_call",
"passed": false,
"detail": "선택=query_events 기대=['search_kb']"
},
{
"id": "tool-09",
"category": "tool_call",
"passed": false,
"detail": "선택=(없음) 기대=['trace_connections']"
},
{
"id": "tool-10",
"category": "tool_call",
"passed": true,
"detail": "선택=query_pv_history 기대=['query_pv_history', 'query_with_nl']"
},
{
"id": "abstain-01",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-02",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-03",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-04",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-05",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-06",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-07",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "abstain-08",
"category": "abstain",
"passed": true,
"detail": "거부 ok"
},
{
"id": "scaffold-01",
"category": "scaffold",
"passed": false,
"detail": "누락단계:['제약', '판단']"
},
{
"id": "scaffold-02",
"category": "scaffold",
"passed": false,
"detail": "누락단계:['판단']"
},
{
"id": "scaffold-03",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-04",
"category": "scaffold",
"passed": false,
"detail": "누락단계:['현재값']"
},
{
"id": "scaffold-05",
"category": "scaffold",
"passed": true,
"detail": "절차 ok"
},
{
"id": "scaffold-06",
"category": "scaffold",
"passed": false,
"detail": "누락단계:['현재값', '제약', '판단']"
},
{
"id": "ground-01",
"category": "grounding",
"passed": false,
"detail": "누락:['pgmea']"
},
{
"id": "ground-02",
"category": "grounding",
"passed": false,
"detail": "누락:['c-6211']"
},
{
"id": "ground-03",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-04",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-05",
"category": "grounding",
"passed": true,
"detail": "ok"
},
{
"id": "ground-06",
"category": "grounding",
"passed": true,
"detail": "ok"
}
]
}