#!/usr/bin/env python3 """8B vs 35B invention probe — content + tool_calls 둘 다 캡처. vLLM tool definitions 전달 + 35B thinking-off 처리 포함. """ import json, re, sys from openai import OpenAI # ── Tool definitions (production opencode에서 실제 사용하는 도구 시그니처) ── TOOLS = [ {"type":"function","function":{ "name":"find_tags","description":"태그 검색 (query/area/sub_area)", "parameters":{"type":"object","properties":{ "query":{"type":"string"},"area":{"type":"string"}, "sub_area":{"type":"string"},"top_k":{"type":"integer"} },"required":[]} }}, {"type":"function","function":{ "name":"get_tag_metadata","description":"태그 메타데이터 조회", "parameters":{"type":"object","properties":{ "query":{"type":"string"},"limit":{"type":"integer"} },"required":["query"]} }}, {"type":"function","function":{ "name":"trace_connections","description":"장비 연결 경로 추적", "parameters":{"type":"object","properties":{ "start_tag":{"type":"string"},"direction":{"type":"string","enum":["upstream","downstream"]}, "max_depth":{"type":"integer"} },"required":["start_tag"]} }}, {"type":"function","function":{ "name":"active_alarms","description":"활성 알람 조회", "parameters":{"type":"object","properties":{ "area":{"type":"string"},"limit":{"type":"integer"} },"required":[]} }}, {"type":"function","function":{ "name":"generate_status_report","description":"운전 상태 종합 보고서", "parameters":{"type":"object","properties":{ "area":{"type":"string"},"hours":{"type":"integer"} },"required":[]} }}, {"type":"function","function":{ "name":"query_pv_history","description":"과거 PV 히스토리", "parameters":{"type":"object","properties":{ "tag_names":{"type":"array","items":{"type":"string"}}, "time_from":{"type":"string"},"time_to":{"type":"string"} },"required":["tag_names","time_from","time_to"]} }}, {"type":"function","function":{ "name":"summarize_events","description":"이벤트 히스토리 요약", "parameters":{"type":"object","properties":{ "area":{"type":"string"},"hours":{"type":"integer"}, "event_type":{"type":"string"} },"required":[]} }}, {"type":"function","function":{ "name":"search_kb","description":"지식 베이스 검색", "parameters":{"type":"object","properties":{ "query":{"type":"string"},"collection_keys":{"type":"array","items":{"type":"string"}} },"required":["query"]} }}, ] SYS = ( "당신은 P6(PGMEA) 플랜트 운전 어시스턴트다.\n" "원칙:\n" "- 사실 지어내기 금지. 모르거나 DB·도구 결과에 없으면 '확인 불가'.\n" "- 사용자가 명시 안 한 태그/식별자 추측 금지. 불확실 시 find_tags 로 먼저 검증.\n" "- area는 'P[숫자](-[숫자])?' 형식. valid: P1,P2,P3,P4,P5,P6,P8,P9,P10,UTIL,PACKING (P7 없음).\n" "- 외부 도구가 빈 결과면 자기 인자 의심.\n" ) PROBES = [ ("원료-invention", "6-1차 플랜트 원료 투입 경로 알려줘"), ("area-형식-invention", "6-1차 플랜트 현재 운전 상황 보고해줘"), ("abstain-P7", "7차 플랜트 활성 알람 알려줘"), ("abstain-no-maintenance", "p-6102 펌프 다음 정비 일정 언제야?"), ("scaffold", "ficq-6113 SP=50 인데 PV=30이야. 어떻게 봐야 해? (range 0~2000 kg/hr)"), ] # ── Flag regexes ── INV_TAG = re.compile(r'\b(rm-\d+|raw_material_input|Plant_\d|Feed_Pump_\d)\b', re.I) BAD_AREA = re.compile(r'"area"\s*:\s*"6-1"|"area"\s*:\s*"6\b(?!-|")', re.I) FAKE_PARAM = re.compile(r'\b(tag_type|tag_category|tag_class)\b', re.I) REFUSE_KW = ['확인 불가','정보 없음','존재하지 않','판정 불가','없습니다','없어','지원하지 않습니다'] SCAFFOLD_KW = ['제어변수','현재값','설정치','제약','판단'] def capture(msg): parts = [] if msg.content: parts.append(msg.content) if hasattr(msg, 'tool_calls') and msg.tool_calls: for tc in msg.tool_calls: parts.append(json.dumps({ "name": tc.function.name, "arguments": tc.function.arguments }, ensure_ascii=False)) return "\n".join(parts) def flags(out): f = [] if INV_TAG.search(out): f.append("INV-tag") if BAD_AREA.search(out): f.append("BAD-area") if FAKE_PARAM.search(out): f.append("FAKE-param") if any(m in out for m in REFUSE_KW): f.append("refused") if 'find_tags' in out.lower(): f.append("find_tags-first") if all(s in out for s in SCAFFOLD_KW): f.append("5라벨") return f def probe(url, model, label): kwargs = {"model": model, "messages": [], "tools": TOOLS, "max_tokens": 2048, "temperature": 0, "seed": 42} if "35B" in label or "35B" in model: kwargs["extra_body"] = {"default_chat_template_kwargs": {"preserve_thinking": False}} c = OpenAI(base_url=url, api_key="dummy") print(f"\n========== {label} ({model}) ==========") rs = [] for tag, q in PROBES: try: kwargs["messages"] = [ {"role":"system","content":SYS}, {"role":"user","content":q} ] r = c.chat.completions.create(**kwargs) out = capture(r.choices[0].message) except Exception as e: out = f"(error: {e})" ff = flags(out) print(f" [{tag}] {'·'.join(ff) or '(none)'}") print(f" {(out[:280] or '(empty)').strip()}") rs.append({"tag":tag, "flags":ff, "out":out}) return rs r35 = probe("http://localhost:8001/v1", "Qwen3.6-35B-A3B-FP8", "35B") r08 = probe("http://localhost:8002/v1", "Qwen3-8B", "8B") print("\n========== 비교 요약 ==========") print(f"{'probe':<26} | {'35B':<32} | {'8B':<32}") print("-"*96) for a, b in zip(r35, r08): print(f"{a['tag']:<26} | {('·'.join(a['flags']) or '-'):<32} | {('·'.join(b['flags']) or '-'):<32}") def inv_rate(rs): n = sum(1 for r in rs if any(x in r['flags'] for x in ['INV-tag','BAD-area','FAKE-param'])) return n, len(rs) i35 = inv_rate(r35); i08 = inv_rate(r08) print(f"\ninvention(태그·area·param 합성) — 35B: {i35[0]}/{i35[1]} | 8B: {i08[0]}/{i08[1]}") out_path = sys.argv[1] if len(sys.argv) > 1 else "training/probe_8b_vs_35b_result.json" results = { "35B": r35, "8B": r08, "invention_rate": {"35B": f"{i35[0]}/{i35[1]}", "8B": f"{i08[0]}/{i08[1]}"}, "probe_config": {"tools_defined": len(TOOLS), "max_tokens": 2048, "temperature": 0, "seed": 42} } with open(out_path, "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"\n→ saved {out_path}")