#!/usr/bin/env python3 """ ExperionCrawler Unified MCP Server - RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen3-Coder-Next-FP8 - NL2SQL: 자연어 → LLM SQL 생성 → PostgreSQL 실행 - 사용처: stdio 모드 (기본): Claude Code MCP / Roo Code MCP HTTP 모드 (--http): C# McpClient (localhost:5001) """ from __future__ import annotations import sys import json import logging import httpx from functools import lru_cache from mcp.server.fastmcp import FastMCP logging.basicConfig(level=logging.WARNING, stream=sys.stderr) # ── 설정 ────────────────────────────────────────────────────────────────────── QDRANT_URL = "http://localhost:6333" OLLAMA_URL = "http://localhost:11434" EMBED_MODEL = "nomic-embed-text" # 768-dim, Roo Code 인덱스와 동일 VLLM_BASE_URL = "http://localhost:8000/v1" VLLM_MODEL = "Qwen/Qwen3-Coder-Next-FP8" # Qdrant 컬렉션 COL_CODEBASE = "ws-65f457145aee80b2" # ExperionCrawler 소스코드 COL_OPC_DOCS = "experion-opc-docs" # Experion HS R530 OPC UA 공식 문서 (266 chunks) # PostgreSQL 연결 DB_CONNECTION_STRING = "postgresql://postgres:postgres@localhost:5432/iiot_platform" DB_TIMEOUT = 10 # 초 # C# McpClient(localhost:5001)와 통신: json_response+stateless로 단순 POST→JSON 방식 mcp = FastMCP( "iiot-rag", port=5001, json_response=True, stateless_http=True, ) # Pipeline Imports from pipeline.extractor import PidGeometricExtractor from pipeline.topology import PidTopologyBuilder from pipeline.mapper import IntelligentMapper from pipeline.analyzer import PidAnalysisEngine import networkx as nx import os import asyncio # ── 임베딩 (Ollama) ─────────────────────────────────────────────────────────── def _embed(text: str) -> list[float]: """Ollama nomic-embed-text로 768-dim 벡터 생성.""" with httpx.Client(timeout=30) as client: resp = client.post( f"{OLLAMA_URL}/api/embeddings", json={"model": EMBED_MODEL, "prompt": text}, ) resp.raise_for_status() return resp.json()["embedding"] # ── LLM (vLLM / Qwen3-Coder-Next-FP8) ─────────────────────────────────────── @lru_cache(maxsize=1) def _llm(): from openai import OpenAI return OpenAI(base_url=VLLM_BASE_URL, api_key="dummy") # ── PaddleOCR 싱글톤 (PDF fallback용) ────────────────────────────────────────── @lru_cache(maxsize=1) def _ocr(): """PaddleOCR 인스턴스 (한/영, GPU). 첫 호출 시 ~50MB 모델 다운로드.""" from paddleocr import PaddleOCR import os use_gpu = os.environ.get("PADDLE_USE_GPU", "true").lower() == "true" try: ocr = PaddleOCR( use_angle_cls=True, lang="korean", use_gpu=use_gpu, show_log=False, ) return ocr except Exception as e: # GPU 실패 시 CPU 폴백 if use_gpu: os.environ["PADDLE_USE_GPU"] = "false" return _ocr() raise e # ── DXF/PDF 텍스트 추출 헬퍼 ─────────────────────────────────────────────────── def _extract_text_from_dxf(filepath: str) -> str: """ezdxf로 DXF 파일에서 텍스트 추출 (MTEXT 포맷 코드 제거).""" import ezdxf from ezdxf.tools.text import plain_mtext doc = ezdxf.readfile(filepath) msp = doc.modelspace() texts = [] for entity in msp: if entity.dxftype() == "TEXT": texts.append(entity.dxf.text) elif entity.dxftype() == "MTEXT": try: plain = plain_mtext(entity.dxf.text) if plain.strip(): texts.append(plain) except Exception: pass return "\n".join(texts) def _extract_text_from_pdf(filepath: str) -> str: """PyMuPDF로 PDF 파일에서 텍스트 추출.""" import fitz # pymupdf doc = fitz.open(filepath) texts = [] for page in doc: texts.append(page.get_text()) return "\n".join(texts) def _extract_text_from_pdf_ocr(filepath: str) -> str: """PaddleOCR로 PDF에서 이미지 추출 후 OCR (고정밀도).""" import fitz # pymupdf from PIL import Image import numpy as np doc = fitz.open(filepath) all_texts = [] for page_idx, page in enumerate(doc): # 페이지를 이미지로 변환 mat = fitz.Matrix(300 / 72) # 300 DPI pix = page.get_pixmap(matrix=mat) img_data = pix.tobytes("png") img = Image.open(__import__("io").BytesIO(img_data)) # OCR 실행 result = _ocr().ocr(np.array(img), cls=True) if result[0]: for line in result[0]: all_texts.append(line[1][0]) return "\n".join(all_texts) def _convert_dwg_to_dxf_dxflib(filepath: str) -> str: """libreoffice로 DWG를 DXF로 변환.""" import subprocess import os dxf_path = filepath.replace(".dwg", ".dxf") try: # LibreOffice로 변환 result = subprocess.run( [ "libreoffice", "--headless", "--convert-to", "dxf:AutoCAD DXF", "--outdir", os.path.dirname(filepath) or ".", filepath ], check=True, timeout=120, capture_output=True, text=True ) if os.path.exists(dxf_path): return dxf_path else: raise FileNotFoundError("DXF 변환 파일이 생성되지 않았습니다.") except subprocess.CalledProcessError as e: raise Exception(f"LibreOffice 변환 실패: {e.stderr}") # ── Qdrant 검색 헬퍼 ────────────────────────────────────────────────────────── def _search(collection: str, query: str, top_k: int, threshold: float = 0.25) -> str: vec = _embed(query) with httpx.Client(timeout=20) as client: resp = client.post( f"{QDRANT_URL}/collections/{collection}/points/search", json={ "vector": vec, "limit": top_k, "with_payload": True, "score_threshold": threshold, }, ) resp.raise_for_status() hits = resp.json().get("result", []) if not hits: return "관련 결과 없음." parts = [] for h in hits: p = h.get("payload", {}) file_path = p.get("filePath", p.get("path", "unknown")) chunk = p.get("codeChunk", p.get("content", p.get("text", ""))) start_line = p.get("startLine", "") loc = f"{file_path}:{start_line}" if start_line else file_path parts.append(f"[score={h['score']:.3f}] {loc}\n```\n{chunk[:700]}\n```") return "\n\n---\n\n".join(parts) # ── DB 헬퍼 ────────────────────────────────────────────────────────────────── def _get_db_connection(): """PostgreSQL DB 연결 획득.""" import psycopg return psycopg.connect(DB_CONNECTION_STRING, connect_timeout=DB_TIMEOUT) def _validate_sql(sql: str) -> tuple[bool, str]: """SQL 안전 검증 — SELECT만 허용, 위험 키워드 차단.""" if len(sql) > 2000: return False, "쿼리 길이 2000자를 초과했습니다." dangerous = ['EXEC', 'DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'CREATE', 'GRANT', 'REVOKE'] sql_upper = sql.upper() for kw in dangerous: if kw in sql_upper: return False, f"허용되지 않은 키워드 '{kw}'를 사용했습니다." if not sql_upper.strip().startswith('SELECT'): return False, "단순 SELECT 쿼리만 허용됩니다." if '..' in sql or '~' in sql: return False, "파일 경로 표현은 허용되지 않습니다." return True, "" # DB 스키마 — LLM SQL 생성 시 컨텍스트로 사용 _DB_SCHEMA = """ PostgreSQL 시계열 데이터베이스 스키마 테이블: history_table (시계열 이력) tagname TEXT - 태그명 (모두 소문자, 예: 'ficq-6113.pv') — 대소문자 구분 node_id TEXT - OPC UA 노드 ID value TEXT - 측정값, 수치 연산 시 ::double precision 캐스트 필요 recorded_at TIMESTAMPTZ - 기록 시각(UTC), 스냅샷 주기 약 60초 테이블: realtime_table (실시간 최신값) tagname TEXT - 태그명 (모두 소문자) node_id TEXT - OPC UA 노드 ID livevalue TEXT - 현재값 timestamp TIMESTAMPTZ - 최종 갱신 시각 N분 간격 집계 공식 (time_bucket 금지, date_trunc 사용): 1분 버킷: date_trunc('minute', recorded_at) AS bucket 2분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/120)*120) AS bucket 5분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/300)*300) AS bucket 10분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/600)*600) AS bucket N분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/(N*60))*(N*60)) AS bucket 예시 (2분 간격, 여러 태그): SELECT to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/120)*120) AS bucket, tagname, AVG(value::double precision) AS avg_val FROM history_table WHERE tagname IN ('tag1', 'tag2') AND recorded_at >= NOW() - INTERVAL '3 hours' GROUP BY bucket, tagname ORDER BY bucket, tagname 규칙: - SELECT만 허용 (INSERT/UPDATE/DELETE/DROP 등 불가) - tagname은 모두 소문자로 정확히 입력 - value 컬럼은 TEXT이므로 집계 시 ::double precision 캐스트 필수 - time_bucket 함수 사용 금지 — 위의 to_timestamp/FLOOR/EPOCH 공식 사용 """ # ── RAG 도구 ───────────────────────────────────────────────────────────────── @mcp.tool() def search_codebase(query: str, top_k: int = 6) -> str: """ExperionCrawler 프로젝트 소스코드 검색 (우리가 개발한 .NET 8 C# 코드). Experion HS R530 공식 문서가 아닌, ExperionCrawler 구현 코드를 검색함. 사용 시점: ExperionCrawler 코드의 구현 방법, 버그, 구조를 알고 싶을 때. ⚠️ Experion HS R530 제품 동작/설정/스펙을 알고 싶으면 search_r530_docs 사용. Args: query: 검색어 (예: "OPC UA 구독 시작", "히스토리 스냅샷", "TextToSql 서비스") top_k: 반환 결과 수 (기본 6) """ return _search(COL_CODEBASE, query, top_k) @mcp.tool() def search_r530_docs(query: str, top_k: int = 5) -> str: """Honeywell Experion HS R530 공식 제품 문서 검색. ExperionCrawler 코드가 아닌, Honeywell 공식 HTM 문서를 검색함. 사용 시점: Experion HS R530의 OPC UA 설정, 인증서, 보안 정책, 포인트 주소 형식, 채널/컨트롤러 속성, 문제해결 등 제품 스펙과 동작을 알고 싶을 때. Args: query: 검색어 (예: "certificate configuration", "endpoint security policy") top_k: 반환 결과 수 (기본 5) """ return _search(COL_OPC_DOCS, query, top_k) @mcp.tool() def ask_iiot_llm(question: str, context: str = "") -> str: """Qwen3-Coder-Next에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문). 사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨 종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문. Args: question: 질문 내용 context: (선택) search_codebase 또는 search_r530_docs 검색 결과 """ system = ( "당신은 IIoT(산업용 IoT), OPC UA, Honeywell Experion PKS/HS R530 전문가입니다.\n" "컨텍스트가 제공된 경우 컨텍스트를 우선 근거로 삼아 한국어로 답변합니다.\n" "컨텍스트 출처가 'Experion HS R530 공식 문서'인지 'ExperionCrawler 코드'인지 명확히 구분하여 설명합니다." ) user_msg = f"컨텍스트:\n{context}\n\n질문: {question}" if context else question resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": user_msg}, ], max_tokens=2048, temperature=0.1, ) return resp.choices[0].message.content or "(응답 없음)" @mcp.tool() def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str: """검색 → Qwen3-Coder-Next 답변 생성 (통합 RAG). 기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False). ExperionCrawler 코드도 함께 보려면 search_code=True 추가. Args: question: 질문 search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True) search_code: ExperionCrawler 소스코드 검색 여부 (기본 False) """ context_parts: list[str] = [] if search_docs: context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{_search(COL_OPC_DOCS, question, 4)}") if search_code: context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{_search(COL_CODEBASE, question, 3)}") return ask_iiot_llm(question, "\n\n".join(context_parts)) # ── NL2SQL 도구 ─────────────────────────────────────────────────────────────── @mcp.tool() def run_sql(sql: str) -> str: """SQL 쿼리 실행 (SELECT만 허용). Args: sql: 실행할 SELECT SQL 문자열 Returns: JSON: { success, columns, count, data } 또는 { success, error } """ valid, err = _validate_sql(sql) if not valid: return json.dumps({"success": False, "error": f"SQL 검증 실패: {err}"}, ensure_ascii=False) try: conn = _get_db_connection() with conn.cursor() as cur: cur.execute(sql) rows = cur.fetchall() columns = [desc[0] for desc in cur.description] result_data = [dict(zip(columns, row)) for row in rows] return json.dumps({ "success": True, "columns": columns, "count": len(result_data), "data": result_data }, ensure_ascii=False, default=str) except Exception as e: return json.dumps({"success": False, "error": f"SQL 실행 실패: {e}"}, ensure_ascii=False) @mcp.tool() def query_pv_history(tag_names: list[str], time_from: str, time_to: str, limit: int = 100) -> str: """과거 값(PV) 히스토리 조회. Args: tag_names: 태그 이름 목록 (예: ["ficq-6113.pv", "ti-6101.pv"]) time_from: 시작 시간 (ISO 8601, 예: "2026-04-01T00:00:00") time_to: 종료 시간 (ISO 8601, 예: "2026-04-02T00:00:00") limit: 반환 행 수 제한 (기본 100, 최대 5000) Returns: JSON: { success, tag_names, time_range, limit, data } """ try: limit = min(limit, 5000) conn = _get_db_connection() with conn.cursor() as cur: cur.execute( """SELECT tagname, recorded_at, value FROM history_table WHERE tagname = ANY(%s) AND recorded_at >= %s AND recorded_at <= %s ORDER BY recorded_at, tagname LIMIT %s""", (tag_names, time_from, time_to, limit) ) rows = cur.fetchall() data = [{"tag_name": r[0], "timestamp": r[1].isoformat(), "value": r[2]} for r in rows] return json.dumps({ "success": True, "tag_names": tag_names, "time_range": f"{time_from} ~ {time_to}", "count": len(data), "data": data }, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"히스토리 쿼리 실패: {e}"}, ensure_ascii=False) @mcp.tool() def get_tag_metadata(query: str, limit: int = 10) -> str: """태그 메타데이터 검색 (realtime_table 기반). Args: query: 태그명 검색어 (패턴 매칭) limit: 반환 태그 수 제한 (기본 10) Returns: JSON: { success, query, count, tags } """ try: conn = _get_db_connection() with conn.cursor() as cur: cur.execute( """SELECT tagname, livevalue, timestamp, node_id FROM realtime_table WHERE tagname ILIKE %s ORDER BY tagname LIMIT %s""", (f"%{query}%", limit) ) rows = cur.fetchall() tags = [{"tag_name": r[0], "current_value": r[1], "last_updated": r[2].isoformat() if r[2] else None, "node_id": r[3]} for r in rows] return json.dumps({"success": True, "query": query, "count": len(tags), "tags": tags}, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"태그 메타데이터 검색 실패: {e}"}, ensure_ascii=False) @mcp.tool() def list_drawings(unit_no: str | None = None) -> str: """단위별 도면 목록 조회 (node_map_master.name 기반). Args: unit_no: 단위 번호 접두사 (예: "A", "B"). None이면 전체 목록 Returns: JSON: { success, unit_no, count, names } """ try: conn = _get_db_connection() with conn.cursor() as cur: if unit_no: cur.execute( "SELECT DISTINCT name FROM node_map_master WHERE name ILIKE %s ORDER BY name LIMIT 100", (f"{unit_no}%",) ) else: cur.execute("SELECT DISTINCT name FROM node_map_master ORDER BY name LIMIT 100") rows = cur.fetchall() return json.dumps({"success": True, "unit_no": unit_no, "count": len(rows), "names": [r[0] for r in rows]}, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"도면 목록 조회 실패: {e}"}, ensure_ascii=False) @mcp.tool() def query_with_nl(question: str) -> str: """자연어 질문을 LLM이 SQL로 변환하고 시계열 DB를 조회합니다. Args: question: 자연어 질문 (예: "FICQ-6113.PV 최근 1시간 값을 1분 단위로 표시") Returns: JSON: { sql, success, columns, count, data } 또는 { sql, success, error } """ system = ( "You are a PostgreSQL SQL expert.\n" "Convert the user's question into a SELECT SQL using the schema below.\n" "IMPORTANT rules:\n" "- Use ONLY PostgreSQL syntax. No DATE_FORMAT, no INTERVAL N DAY.\n" "- Time column is 'recorded_at' (TIMESTAMPTZ). Do NOT use 'timestamp'.\n" "- NEVER use time_bucket(). For N-minute buckets use to_timestamp/FLOOR/EPOCH formula.\n" "- INTERVAL rule:\n" " * If the question specifies an interval (e.g. '2분 간격', '5-minute interval'):\n" " use: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/(N*60))*(N*60)) AS bucket\n" " with GROUP BY bucket, tagname and AVG(value::double precision) AS avg_val\n" " * If NO interval is specified: SELECT recorded_at, tagname, value — NO GROUP BY.\n" "- Current year is 2026. '4월 27일' means 2026-04-27.\n" "- All times in DB are UTC. Korean input is KST (UTC+9). Convert: KST 12:00 = UTC 03:00.\n" "- value column is TEXT; cast with ::double precision only when aggregating.\n" "- All tagnames are lowercase (e.g. 'ficq-6113.pv'). Match exactly.\n" "- PostgreSQL LIKE: dot has no special meaning, no escaping needed.\n" "- Return ONLY the SQL statement. No explanation, no markdown.\n\n" f"{_DB_SCHEMA}" ) try: resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": question}, ], max_tokens=8192, temperature=0.1, ) sql = (resp.choices[0].message.content or "").strip() # 마크다운 코드 블록 제거 if sql.startswith("```"): lines = sql.splitlines() sql = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip() if not sql: return json.dumps({"success": False, "sql": "", "error": "LLM이 SQL을 생성하지 못했습니다."}, ensure_ascii=False) except Exception as e: return json.dumps({"success": False, "sql": "", "error": f"LLM SQL 생성 실패: {e}"}, ensure_ascii=False) # SQL 실행 raw = run_sql(sql) result = json.loads(raw) result["sql"] = sql # long format → pivot 변환 (tagname 컬럼이 있으면 자동 PIVOT) if result.get("success") and "data" in result: cols = result.get("columns", []) data = result["data"] if "tagname" in cols and data: time_col = next((c for c in cols if c not in ("tagname", "value", "livevalue", "avg_val")), None) val_col = next((c for c in ("avg_val", "value") if c in cols), cols[-1]) if time_col: tag_names_list = sorted(dict.fromkeys(row["tagname"] for row in data)) pivoted: dict = {} for row in data: key = str(row[time_col]) if key not in pivoted: pivoted[key] = {time_col: row[time_col]} pivoted[key][row["tagname"]] = row.get(val_col) result["data"] = list(pivoted.values()) result["columns"] = [time_col] + tag_names_list result["count"] = len(result["data"]) return json.dumps(result, ensure_ascii=False, default=str) # ── P&ID 추출 도구 ────────────────────────────────────────────────────────────── @mcp.tool() def extract_pid_tags(text: str, source_type: str) -> str: """P&ID 도면(DXF/PDF)에서 태그 정보를 추출합니다. Args: text: DXF/PDF에서 추출한 텍스트 source_type: 'dxf' 또는 'pdf' Returns: JSON: { success, count, tags: [{tagNo, equipmentName, instrumentType, lineNumber, pidDrawingNo, confidence}] } """ system = ( "You are a P&ID (Piping and Instrumentation Diagram) expert.\n" "Extract all instrument and equipment tags from the provided text.\n" "Return ONLY a valid JSON array. Each element must have exactly these fields:\n" '{"tagNo":"FCV-101","equipmentName":null,"instrumentType":"FCV","lineNumber":null,"pidDrawingNo":null,"confidence":0.95}\n' "Rules:\n" "- tagNo: any token matching [LETTERS]-[DIGITS] or [LETTERS]-[DIGITS]-[SUFFIX]\n" " Examples: FCV-101, P-10101, T-10100, VG-6203-15A-F1A-n, BT-6200, DP-10101\n" "- instrumentType: leading letters of tagNo (e.g. FCV, P, T, VG, BT, DP, PSV)\n" "- equipmentName: descriptive name if present in text near the tag, else null\n" "- lineNumber: null unless a line number is explicitly associated\n" "- pidDrawingNo: null unless a drawing number is explicitly associated\n" "- confidence: 0.95 for clear tags, lower for ambiguous ones\n" "- Output ONLY the JSON array, no markdown, no explanation.\n" "- If no tags found, return: []\n" ) import logging import re import json as json_module try: truncated_text = text[:100000] if len(text) > 100000 else text resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated_text}"}, ], max_tokens=32768, temperature=0.1, extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ) raw = (resp.choices[0].message.content or "").strip() finish_reason = resp.choices[0].finish_reason # 마크다운 코드 블록 제거 if raw.startswith("```"): lines = raw.splitlines() raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip() # finish_reason=length 로 잘린 경우: 마지막 완전한 객체까지 살린 뒤 배열 닫기 if finish_reason == "length": last_close = raw.rfind("}") if last_close != -1: raw = raw[:last_close + 1] + "]" # 유효한 JSON 배열 추출 (가장 긴 균형 잡힌 [...] 선택) def _extract_array(s: str) -> str: depth = 0; start = -1; best = "" for i, c in enumerate(s): if c == '[': if depth == 0: start = i depth += 1 elif c == ']': depth -= 1 if depth == 0 and start >= 0: cand = s[start:i+1] if len(cand) > len(best): best = cand return best if best else "[]" raw = _extract_array(raw) # JSON 파싱 — 실패 시 개별 객체 추출로 폴백 try: data = json_module.loads(raw) except json_module.JSONDecodeError: objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL) data = [] for obj in objects: try: data.append(json_module.loads(obj)) except json_module.JSONDecodeError: pass if not data: return json_module.dumps({"success": False, "count": 0, "tags": []}, ensure_ascii=False) logging.info(f"[extract_pid_tags] source={source_type} count={len(data) if isinstance(data, list) else 0}") return json_module.dumps({ "success": True, "count": len(data), "tags": data }, ensure_ascii=False, indent=2) except Exception as e: logging.error(f"P&ID 태그 추출 실패: {e}") logging.error(f"Raw response: {raw[:1000]}") return json.dumps({"success": False, "error": f"P&ID 태그 추출 실패: {e}"}, ensure_ascii=False) @mcp.tool() def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str: """P&ID 태그를 Experion 태그에 매핑합니다. Args: pid_tags: P&ID에서 추출한 태그 목록 (예: ["FT-101", "PT-201"]) experion_tags: Experion 시스템 태그 목록 (예: ["ficq-6113.pv", "pt-201.pv"]) Returns: JSON: { success, count, mappings: [{pidTag, experionTag, confidence}] } """ system = ( "You are a P&ID to Experion tag matching expert.\n" "Match P&ID tags to Experion tags based on similarity.\n" "Return ONLY a JSON array of objects with the following structure:\n" '[{"pidTag":"FT-101","experionTag":"ft-101.pv","confidence":0.92},...]\n' "IMPORTANT rules:\n" "- pidTag: The original P&ID tag from input\n" "- experionTag: The matched Experion tag (lowercase, with .pv/.sp/.mv suffix)\n" "- confidence: 0.0 to 1.0 based on match quality\n" "- If no good match found, set confidence < 0.5 and leave experionTag null\n" "- Do NOT include any explanation, only the JSON array.\n" "- If no matches found, return an empty array: []\n" "- temperature=0.1 for deterministic output.\n" ) import re import json as json_module try: pid_str = "\n".join(pid_tags) experion_str = "\n".join(experion_tags) resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": f"P&ID Tags:\n{pid_str}\n\nExperion Tags:\n{experion_str}"}, ], max_tokens=16384, temperature=0.1, extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ) raw = (resp.choices[0].message.content or "").strip() finish_reason = resp.choices[0].finish_reason if raw.startswith("```"): lines = raw.splitlines() raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip() if finish_reason == "length": last_close = raw.rfind("}") if last_close != -1: raw = raw[:last_close + 1] + "]" match = re.search(r'\[.*\]', raw, re.DOTALL) raw = match.group(0) if match else "[]" data = json_module.loads(raw) return json_module.dumps({"success": True, "count": len(data), "mappings": data}, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"P&ID 태그 매핑 실패: {e}"}, ensure_ascii=False) # ── P&ID 파싱 도구 (DXF/PDF/DWG) ─────────────────────────────────────────────── @mcp.tool() def parse_pid_dxf(filepath: str) -> str: """ezdxf 기반 DXF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출. Args: filepath: DXF 파일 경로 Returns: JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] } """ try: text = _extract_text_from_dxf(filepath) if not text.strip(): return json.dumps({ "success": True, "text": "", "count": 0, "tags": [] }, ensure_ascii=False, indent=2) # LLM으로 태그 추출 system = ( "You are a P&ID (Piping and Instrumentation Diagram) expert.\n" "Extract instrument and equipment tags from the provided text.\n" "Return ONLY a JSON array of objects with the following structure:\n" '[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n' "IMPORTANT rules:\n" "- tagNo: Standard tag format with these patterns:\n" " * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n" " * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n" " * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n" " * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n" "- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n" "- equipmentName: Descriptive name if available, otherwise null\n" "- lineNumber: Line number if available, otherwise null\n" "- pidDrawingNo: Drawing number if available, otherwise null\n" "- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n" "- Do NOT include any explanation, only the JSON array.\n" "- If no tags found, return an empty array: []\n" "- temperature=0.1 for deterministic output.\n" ) truncated_text = text[:12000] if len(text) > 12000 else text resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": f"Source: dxf\n\nText:\n{truncated_text}"}, ], max_tokens=4096, temperature=0.1, ) raw = (resp.choices[0].message.content or "").strip() # 마크다운 코드 블록 제거 if raw.startswith("```"): lines = raw.splitlines() raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip() # JSON 배열 추출 import re match = re.search(r'\[.*\]', raw, re.DOTALL) if match: raw = match.group(0) # JSON 파싱 시도 try: data = json.loads(raw) except json.JSONDecodeError: # JSON 배열 추출 시도 (더 엄격한 패턴) match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL) if match: raw_clean = match.group(0) try: data = json.loads(raw_clean) except json.JSONDecodeError: # 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도 objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL) data = [] for obj in objects: try: data.append(json.loads(obj)) except json.JSONDecodeError: pass if not isinstance(data, list): data = [] return json.dumps({ "success": True, "text": text[:10000], # 제한 "count": len(text), "tags": data }, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"DXF 파싱 실패: {e}"}, ensure_ascii=False) @mcp.tool() def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str: """PyMuPDF 기반 PDF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출. Args: filepath: PDF 파일 경로 use_ocr: OCR 사용 여부 (기본 True, 고정밀도) Returns: JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] } """ try: if use_ocr: text = _extract_text_from_pdf_ocr(filepath) else: text = _extract_text_from_pdf(filepath) if not text.strip(): return json.dumps({ "success": True, "text": "", "count": 0, "tags": [] }, ensure_ascii=False, indent=2) # LLM으로 태그 추출 system = ( "You are a P&ID (Piping and Instrumentation Diagram) expert.\n" "Extract instrument and equipment tags from the provided text.\n" "Return ONLY a JSON array of objects with the following structure:\n" '[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n' "IMPORTANT rules:\n" "- tagNo: Standard tag format with these patterns:\n" " * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n" " * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n" " * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n" " * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n" "- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n" "- equipmentName: Descriptive name if available, otherwise null\n" "- lineNumber: Line number if available, otherwise null\n" "- pidDrawingNo: Drawing number if available, otherwise null\n" "- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n" "- Do NOT include any explanation, only the JSON array.\n" "- If no tags found, return an empty array: []\n" "- temperature=0.1 for deterministic output.\n" ) truncated_text = text[:12000] if len(text) > 12000 else text resp = _llm().chat.completions.create( model=VLLM_MODEL, messages=[ {"role": "system", "content": system}, {"role": "user", "content": f"Source: pdf\n\nText:\n{truncated_text}"}, ], max_tokens=4096, temperature=0.1, ) raw = (resp.choices[0].message.content or "").strip() # 마크다운 코드 블록 제거 if raw.startswith("```"): lines = raw.splitlines() raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip() # JSON 배열 추출 import re match = re.search(r'\[.*\]', raw, re.DOTALL) if match: raw = match.group(0) # JSON 파싱 시도 try: data = json.loads(raw) except json.JSONDecodeError: # JSON 배열 추출 시도 (더 엄격한 패턴) match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL) if match: raw_clean = match.group(0) try: data = json.loads(raw_clean) except json.JSONDecodeError: # 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도 objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL) data = [] for obj in objects: try: data.append(json.loads(obj)) except json.JSONDecodeError: pass if not isinstance(data, list): data = [] return json.dumps({ "success": True, "text": text[:10000], "count": len(text), "tags": data }, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"PDF 파싱 실패: {e}"}, ensure_ascii=False) @mcp.tool() async def build_pid_graph_parallel(filepath: str) -> str: """ 분산 처리 기법을 적용한 P&ID 그래프 생성 툴. 전처리 -> 병렬 분산 추출 -> 위상 모델링 -> 저장 과정을 수행합니다. """ try: # 1. 전처리 (Phase 1: Geometric Extraction) extractor = PidGeometricExtractor(filepath) geo_data_path = f"mcp-server/storage/{os.path.basename(filepath)}_geo.json" geo_data_list = extractor.extract_and_save(geo_data_path) # geo_data_list는 경로를 반환하므로 다시 로드 with open(geo_data_path, 'r', encoding='utf-8') as f: geo_data = json.load(f) # 2. 병렬 분산 추출 (Phase 3: Intelligent Mapping) # 시스템 태그 목록 가져오기 (DB에서 조회하는 로직 필요, 여기서는 예시로 빈 리스트 또는 기본값) # 실제로는 get_tag_metadata 등을 통해 전체 태그 리스트를 확보해야 함 system_tags = [] try: conn = _get_db_connection() with conn.cursor() as cur: cur.execute("SELECT tagname FROM realtime_table") system_tags = [r[0] for r in cur.fetchall()] except Exception as e: logging.warning(f"Failed to fetch system tags: {e}") # 그래프 임시 생성 (Mapper가 위상 정보를 사용하므로 필요) builder = PidTopologyBuilder(geo_data) builder.build_graph() # Mapper 설정 from openai import AsyncOpenAI api_client = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy") mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client) # 분류별 노드 분리 nodes = list(builder.G.nodes()) transmitter_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FIT', 'FT', 'LT', 'PT', 'TE']] # 단순화된 필터 valve_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FCV', 'LCV', 'TCV', 'PCV', 'XV']] equipment_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('type') not in ['TEXT', 'LINE', 'LWPOLYLINE']] # 병렬 호출 (vLLM Batching 유도) tasks = [ mapper.extract_transmitters(transmitter_nodes), mapper.extract_valves(valve_nodes), mapper.extract_equipment(equipment_nodes) ] extracted_results = await asyncio.gather(*tasks) # 결과 통합 all_mapped_tags = [] for res_dict in extracted_results: for node_id, mapping in res_dict.items(): if mapping.resolved_tag != "UNKNOWN": # TopologyBuilder가 기대하는 형식으로 변환 node_data = builder.G.nodes[node_id] all_mapped_tags.append({ "entity_id": node_id, "tagName": mapping.resolved_tag, "bbox": node_data['bbox'].bounds if hasattr(node_data['bbox'], 'bounds') else node_data['bbox'], "clean_value": mapping.resolved_tag }) # 3. 최종 위상 모델링 (Phase 2) final_builder = PidTopologyBuilder(geo_data, all_extracted_tags=all_mapped_tags) final_builder.build_graph() # 4. 저장 graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json") graph_path = f"mcp-server/storage/{graph_id}" final_builder.save_graph(graph_path) return json.dumps({ "success": True, "graph_id": graph_id, "graph_path": graph_path, "nodes": final_builder.G.number_of_nodes(), "edges": final_builder.G.number_of_edges() }, ensure_ascii=False) except Exception as e: logging.error(f"build_pid_graph_parallel failed: {e}") return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False) @mcp.tool() def analyze_pid_impact(graph_id: str, start_node_id: str) -> str: """ 구축된 그래프를 기반으로 특정 설비 장애 시 영향도 분석을 수행합니다. """ try: graph_path = f"mcp-server/storage/{graph_id}" mapping_path = graph_path.replace("_graph.json", "_mapping.json") # 매핑 파일이 따로 저장된다고 가정 analyzer = PidAnalysisEngine(graph_path, mapping_path) result = analyzer.analyze_impact(start_node_id) return json.dumps(result, ensure_ascii=False, indent=2) except Exception as e: return json.dumps({"success": False, "error": f"Impact analysis failed: {e}"}, ensure_ascii=False) @mcp.tool() def parse_pid_drawing(filepath: str) -> str: """확장자 자동 감지하여 P&ID 도면 파싱. Args: filepath: DXF/DWG/PDF 파일 경로 Returns: JSON: { success, text, count, tags, format } """ import os ext = os.path.splitext(filepath)[1].lower() if ext == ".dxf": return parse_pid_dxf(filepath) elif ext == ".dwg": # DWG 파일은 사전에 DXF로 변환하여 업로드해야 합니다. # Linux에서 DWG를 DXF로 변환하는 도구는 제한되어 있습니다. return json.dumps({ "success": False, "error": "DWG 파일은 현재 직접 파싱할 수 없습니다.\n" + "사전에 DXF로 변환하여 업로드해 주세요.\n" + "\n변환 방법:\n" + "1. Windows에서 AutoCAD 또는 ODA File Converter 사용\n" + "2. 온라인 DWG → DXF 변환기 사용\n" + "3. LibreOffice Draw (Windows/macOS 전용) 사용" }, ensure_ascii=False) elif ext == ".pdf": return parse_pid_pdf(filepath) else: return json.dumps({ "success": False, "error": f"Unsupported format: {ext}. Supported: .dxf, .dwg, .pdf" }, ensure_ascii=False) # ── 엔트리포인트 ────────────────────────────────────────────────────────────── def main(): """HTTP 모드로 실행 — C# McpClient (localhost:5001) 용.""" mcp.run(transport="streamable-http") if __name__ == "__main__": # --http 플래그: HTTP 모드 (C# McpClient 용) # 플래그 없음: stdio 모드 (Claude Code / Roo Code MCP 용) if "--http" in sys.argv: mcp.run(transport="streamable-http") else: mcp.run(transport="stdio")