1580 lines
62 KiB
Python
1580 lines
62 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
ExperionCrawler Unified MCP Server
|
|
- RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen3.6-27B-FP8
|
|
- NL2SQL: 자연어 → LLM SQL 생성 → PostgreSQL 실행
|
|
- 사용처:
|
|
stdio 모드 (기본): Claude Code MCP / Roo Code MCP
|
|
HTTP 모드 (--http): C# McpClient (localhost:5001)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import sys
|
|
import json
|
|
import logging
|
|
import httpx
|
|
from functools import lru_cache
|
|
from mcp.server.fastmcp import FastMCP
|
|
|
|
logging.basicConfig(level=logging.WARNING, stream=sys.stderr)
|
|
|
|
# ── 설정 ──────────────────────────────────────────────────────────────────────
|
|
QDRANT_URL = "http://localhost:6333"
|
|
OLLAMA_URL = "http://localhost:11434"
|
|
EMBED_MODEL = "nomic-embed-text" # 768-dim, Roo Code 인덱스와 동일
|
|
VLLM_BASE_URL = "http://localhost:8000/v1"
|
|
VLLM_MODEL = "Qwen3.6-27B-FP8"
|
|
|
|
# Qdrant 컬렉션
|
|
COL_CODEBASE = "ws-65f457145aee80b2" # ExperionCrawler 소스코드
|
|
COL_OPC_DOCS = "experion-opc-docs" # Experion HS R530 OPC UA 공식 문서 (266 chunks)
|
|
|
|
# PostgreSQL 연결
|
|
DB_CONNECTION_STRING = "postgresql://postgres:postgres@localhost:5432/iiot_platform"
|
|
DB_TIMEOUT = 10 # 초
|
|
|
|
# C# McpClient(localhost:5001)와 통신: json_response+stateless로 단순 POST→JSON 방식
|
|
mcp = FastMCP(
|
|
"iiot-rag",
|
|
port=5001,
|
|
json_response=True,
|
|
stateless_http=True,
|
|
)
|
|
|
|
# Pipeline Imports
|
|
from pipeline.extractor import PidGeometricExtractor
|
|
from pipeline.topology import PidTopologyBuilder
|
|
from pipeline.mapper import IntelligentMapper
|
|
from pipeline.analyzer import PidAnalysisEngine
|
|
import networkx as nx
|
|
import os
|
|
import asyncio
|
|
import subprocess
|
|
import atexit
|
|
import signal
|
|
from dataclasses import dataclass
|
|
from typing import Dict, Optional
|
|
from functools import cache
|
|
|
|
# ── ProcessManager ─────────────────────────────────────────────────────────────
|
|
|
|
@dataclass
|
|
class WorkerProcess:
|
|
process: subprocess.Popen
|
|
port: int
|
|
status: str # "running", "stopped", "error"
|
|
one_shot: bool = False # 요청 후 프로세스 종료 여부 (P&ID 워커용)
|
|
|
|
|
|
class ProcessManager:
|
|
"""워커 프로세스 관리자."""
|
|
|
|
def __init__(self):
|
|
self.workers: Dict[str, WorkerProcess] = {}
|
|
self._locks: Dict[str, asyncio.Lock] = {}
|
|
self._pid_locks: Dict[str, asyncio.Lock] = {} # 파일/ID별 세부 Lock
|
|
self._worker_ports = {"rag": 5002, "nl2sql": 5003, "pid": 5004}
|
|
|
|
# 정리 훅 등록
|
|
atexit.register(self._cleanup)
|
|
signal.signal(signal.SIGTERM, lambda *_: self._cleanup())
|
|
signal.signal(signal.SIGINT, lambda *_: self._cleanup())
|
|
|
|
def _get_available_port(self, worker_type: str) -> int:
|
|
"""워커 타입에 대한 포트 반환."""
|
|
return self._worker_ports.get(worker_type, 5002)
|
|
|
|
def _classify_tool(self, tool_name: str) -> str:
|
|
"""도구 이름을 워커 타입으로 분류."""
|
|
rag_tools = {"search_codebase", "search_r530_docs", "ask_iiot_llm", "rag_query"}
|
|
nl2sql_tools = {"run_sql", "query_pv_history", "get_tag_metadata", "list_drawings", "query_with_nl"}
|
|
pid_tools = {
|
|
"extract_pid_tags", "match_pid_tags", "parse_pid_dxf", "parse_pid_pdf",
|
|
"parse_pid_drawing", "build_pid_graph_parallel", "analyze_pid_impact"
|
|
}
|
|
|
|
if tool_name in rag_tools:
|
|
return "rag"
|
|
elif tool_name in nl2sql_tools:
|
|
return "nl2sql"
|
|
elif tool_name in pid_tools:
|
|
return "pid"
|
|
else:
|
|
return "default"
|
|
|
|
async def start_worker(self, worker_type: str, one_shot: bool = False) -> WorkerProcess:
|
|
"""서브 프로세스 시작.
|
|
|
|
Args:
|
|
worker_type: 워커 타입 (rag, nl2sql, pid)
|
|
one_shot: True일 경우 요청 후 프로세스 종료 (P&ID 워커용)
|
|
"""
|
|
port = self._get_available_port(worker_type)
|
|
cmd = [
|
|
sys.executable,
|
|
f"worker/{worker_type}_worker.py",
|
|
str(port)
|
|
]
|
|
|
|
# 로그 파일 열기
|
|
log_dir = os.path.join(os.path.dirname(__file__), "logs")
|
|
os.makedirs(log_dir, exist_ok=True)
|
|
log_file = open(os.path.join(log_dir, f"{worker_type}_worker.log"), "a")
|
|
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
stdout=log_file,
|
|
stderr=log_file,
|
|
)
|
|
|
|
# 헬스체크 루프 (최대 15초 대기)
|
|
for _ in range(30): # 0.5초 * 30 = 15초
|
|
await asyncio.sleep(0.5)
|
|
if proc.poll() is not None:
|
|
log_file.close()
|
|
raise RuntimeError(f"{worker_type} 워커가 시작 직후 종료됨")
|
|
try:
|
|
async with httpx.AsyncClient(timeout=1) as client:
|
|
await client.get(f"http://localhost:{port}/health")
|
|
break # 헬스체크 성공
|
|
except Exception:
|
|
continue
|
|
else:
|
|
proc.kill()
|
|
log_file.close()
|
|
raise RuntimeError(f"{worker_type} 워커 시작 타임아웃")
|
|
|
|
worker = WorkerProcess(
|
|
process=proc,
|
|
port=port,
|
|
status="running",
|
|
one_shot=one_shot
|
|
)
|
|
self.workers[worker_type] = worker
|
|
log_file.close()
|
|
return worker
|
|
|
|
async def stop_worker(self, worker_type: str):
|
|
"""서브 프로세스 종료."""
|
|
if worker_type in self.workers:
|
|
proc = self.workers[worker_type].process
|
|
proc.terminate()
|
|
await asyncio.sleep(0.5)
|
|
if proc.poll() is None:
|
|
proc.kill()
|
|
del self.workers[worker_type]
|
|
|
|
async def get_worker(self, tool_name: str, one_shot: bool = False) -> WorkerProcess:
|
|
"""도구 이름에 해당하는 워커 프로세스 반환 (자동 시작).
|
|
|
|
Args:
|
|
tool_name: 도구 이름
|
|
one_shot: True일 경우 요청 후 프로세스 종료 (P&ID 워커용)
|
|
"""
|
|
worker_type = self._classify_tool(tool_name)
|
|
|
|
if worker_type not in self._locks:
|
|
self._locks[worker_type] = asyncio.Lock()
|
|
|
|
async with self._locks[worker_type]:
|
|
if worker_type not in self.workers:
|
|
return await self.start_worker(worker_type, one_shot)
|
|
|
|
proc = self.workers[worker_type].process
|
|
if proc.poll() is not None:
|
|
del self.workers[worker_type]
|
|
return await self.start_worker(worker_type, one_shot)
|
|
|
|
return self.workers[worker_type]
|
|
|
|
def _cleanup(self):
|
|
"""모든 워커 프로세스 정리."""
|
|
for wtype, worker in list(self.workers.items()):
|
|
try:
|
|
worker.process.terminate()
|
|
except Exception:
|
|
pass
|
|
self.workers.clear()
|
|
|
|
|
|
# 전역 ProcessManager 인스턴스
|
|
process_manager = ProcessManager()
|
|
|
|
# ── 임베딩 (Ollama) ───────────────────────────────────────────────────────────
|
|
|
|
async def _embed(text: str) -> list[float]:
|
|
"""Ollama nomic-embed-text로 768-dim 벡터 생성."""
|
|
import asyncio
|
|
|
|
def _call_embed():
|
|
with httpx.Client(timeout=30) as client:
|
|
resp = client.post(
|
|
f"{OLLAMA_URL}/api/embeddings",
|
|
json={"model": EMBED_MODEL, "prompt": text},
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json()["embedding"]
|
|
|
|
return await asyncio.to_thread(_call_embed)
|
|
|
|
# ── LLM (vLLM / Qwen3.6-27B-FP8) ─────────────────────────────────────
|
|
|
|
@lru_cache(maxsize=1)
|
|
def _llm():
|
|
from openai import OpenAI
|
|
return OpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
|
|
|
|
|
|
# ── PaddleOCR 싱글톤 (PDF fallback용) ──────────────────────────────────────────
|
|
|
|
@lru_cache(maxsize=1)
|
|
def _ocr():
|
|
"""PaddleOCR 인스턴스 (한/영, GPU). 첫 호출 시 ~50MB 모델 다운로드."""
|
|
from paddleocr import PaddleOCR
|
|
import os
|
|
|
|
use_gpu = os.environ.get("PADDLE_USE_GPU", "true").lower() == "true"
|
|
try:
|
|
ocr = PaddleOCR(
|
|
use_angle_cls=True,
|
|
lang="korean",
|
|
use_gpu=use_gpu,
|
|
show_log=False,
|
|
)
|
|
return ocr
|
|
except Exception as e:
|
|
# GPU 실패 시 CPU 폴백
|
|
if use_gpu:
|
|
os.environ["PADDLE_USE_GPU"] = "false"
|
|
return _ocr()
|
|
raise e
|
|
|
|
|
|
# ── DXF/PDF 텍스트 추출 헬퍼 ───────────────────────────────────────────────────
|
|
|
|
async def _extract_text_from_dxf(filepath: str) -> str:
|
|
"""ezdxf로 DXF 파일에서 텍스트 추출 (MTEXT 포맷 코드 제거)."""
|
|
import asyncio
|
|
import ezdxf
|
|
from ezdxf.tools.text import plain_mtext
|
|
|
|
def _extract():
|
|
doc = ezdxf.readfile(filepath)
|
|
msp = doc.modelspace()
|
|
texts = []
|
|
for entity in msp:
|
|
if entity.dxftype() == "TEXT":
|
|
texts.append(entity.dxf.text)
|
|
elif entity.dxftype() == "MTEXT":
|
|
try:
|
|
plain = plain_mtext(entity.dxf.text)
|
|
if plain.strip():
|
|
texts.append(plain)
|
|
except Exception:
|
|
pass
|
|
return "\n".join(texts)
|
|
|
|
return await asyncio.to_thread(_extract)
|
|
|
|
|
|
async def _extract_text_from_pdf(filepath: str) -> str:
|
|
"""PyMuPDF로 PDF 파일에서 텍스트 추출."""
|
|
import asyncio
|
|
import fitz # pymupdf
|
|
|
|
def _extract():
|
|
doc = fitz.open(filepath)
|
|
texts = []
|
|
for page in doc:
|
|
texts.append(page.get_text())
|
|
return "\n".join(texts)
|
|
|
|
return await asyncio.to_thread(_extract)
|
|
|
|
|
|
async def _extract_text_from_pdf_ocr(filepath: str) -> str:
|
|
"""PaddleOCR로 PDF에서 이미지 추출 후 OCR (고정밀도)."""
|
|
import asyncio
|
|
import fitz # pymupdf
|
|
from PIL import Image
|
|
import numpy as np
|
|
|
|
def _extract():
|
|
doc = fitz.open(filepath)
|
|
all_texts = []
|
|
|
|
for page_idx, page in enumerate(doc):
|
|
# 페이지를 이미지로 변환
|
|
mat = fitz.Matrix(300 / 72) # 300 DPI
|
|
pix = page.get_pixmap(matrix=mat)
|
|
img_data = pix.tobytes("png")
|
|
img = Image.open(__import__("io").BytesIO(img_data))
|
|
|
|
# OCR 실행
|
|
result = _ocr().ocr(np.array(img), cls=True)
|
|
if result[0]:
|
|
for line in result[0]:
|
|
all_texts.append(line[1][0])
|
|
|
|
return "\n".join(all_texts)
|
|
|
|
return await asyncio.to_thread(_extract)
|
|
|
|
|
|
async def _convert_dwg_to_dxf_dxflib(filepath: str) -> str:
|
|
"""libreoffice로 DWG를 DXF로 변환."""
|
|
import asyncio
|
|
import subprocess
|
|
import os
|
|
|
|
dxf_path = filepath.replace(".dwg", ".dxf")
|
|
|
|
def _convert():
|
|
try:
|
|
# LibreOffice로 변환
|
|
result = subprocess.run(
|
|
[
|
|
"libreoffice",
|
|
"--headless",
|
|
"--convert-to", "dxf:AutoCAD DXF",
|
|
"--outdir", os.path.dirname(filepath) or ".",
|
|
filepath
|
|
],
|
|
check=True,
|
|
timeout=120,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
|
|
if os.path.exists(dxf_path):
|
|
return dxf_path
|
|
else:
|
|
raise FileNotFoundError("DXF 변환 파일이 생성되지 않았습니다.")
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
raise Exception(f"LibreOffice 변환 실패: {e.stderr}")
|
|
|
|
return await asyncio.to_thread(_convert)
|
|
|
|
|
|
# ── Qdrant 검색 헬퍼 ──────────────────────────────────────────────────────────
|
|
|
|
async def _search(collection: str, query: str, top_k: int, threshold: float = 0.25) -> str:
|
|
import asyncio
|
|
|
|
def _call_embed():
|
|
return _embed(query)
|
|
|
|
vec = await _call_embed()
|
|
|
|
def _call_search():
|
|
with httpx.Client(timeout=20) as client:
|
|
resp = client.post(
|
|
f"{QDRANT_URL}/collections/{collection}/points/search",
|
|
json={
|
|
"vector": vec,
|
|
"limit": top_k,
|
|
"with_payload": True,
|
|
"score_threshold": threshold,
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
return resp.json().get("result", [])
|
|
|
|
hits = await asyncio.to_thread(_call_search)
|
|
|
|
if not hits:
|
|
return "관련 결과 없음."
|
|
|
|
parts = []
|
|
for h in hits:
|
|
p = h.get("payload", {})
|
|
file_path = p.get("filePath", p.get("path", "unknown"))
|
|
chunk = p.get("codeChunk", p.get("content", p.get("text", "")))
|
|
start_line = p.get("startLine", "")
|
|
loc = f"{file_path}:{start_line}" if start_line else file_path
|
|
parts.append(f"[score={h['score']:.3f}] {loc}\n```\n{chunk[:700]}\n```")
|
|
|
|
return "\n\n---\n\n".join(parts)
|
|
|
|
# ── DB 헬퍼 ──────────────────────────────────────────────────────────────────
|
|
|
|
async def _get_db_connection():
|
|
"""PostgreSQL DB 연결 획득."""
|
|
import asyncio
|
|
import psycopg
|
|
|
|
def _connect():
|
|
return psycopg.connect(DB_CONNECTION_STRING, connect_timeout=DB_TIMEOUT)
|
|
|
|
return await asyncio.to_thread(_connect)
|
|
|
|
|
|
def _validate_sql(sql: str) -> tuple[bool, str]:
|
|
"""SQL 안전 검증 — SELECT만 허용, 위험 키워드 차단."""
|
|
if len(sql) > 2000:
|
|
return False, "쿼리 길이 2000자를 초과했습니다."
|
|
dangerous = ['EXEC', 'DROP', 'DELETE', 'UPDATE', 'INSERT', 'ALTER', 'CREATE', 'GRANT', 'REVOKE']
|
|
sql_upper = sql.upper()
|
|
for kw in dangerous:
|
|
if kw in sql_upper:
|
|
return False, f"허용되지 않은 키워드 '{kw}'를 사용했습니다."
|
|
if not sql_upper.strip().startswith('SELECT'):
|
|
return False, "단순 SELECT 쿼리만 허용됩니다."
|
|
if '..' in sql or '~' in sql:
|
|
return False, "파일 경로 표현은 허용되지 않습니다."
|
|
return True, ""
|
|
|
|
|
|
# DB 스키마 — LLM SQL 생성 시 컨텍스트로 사용
|
|
_DB_SCHEMA = """
|
|
PostgreSQL 시계열 데이터베이스 스키마
|
|
|
|
테이블: history_table (시계열 이력)
|
|
tagname TEXT - 태그명 (모두 소문자, 예: 'ficq-6113.pv') — 대소문자 구분
|
|
node_id TEXT - OPC UA 노드 ID
|
|
value TEXT - 측정값, 수치 연산 시 ::double precision 캐스트 필요
|
|
recorded_at TIMESTAMPTZ - 기록 시각(UTC), 스냅샷 주기 약 60초
|
|
|
|
테이블: realtime_table (실시간 최신값)
|
|
tagname TEXT - 태그명 (모두 소문자)
|
|
node_id TEXT - OPC UA 노드 ID
|
|
livevalue TEXT - 현재값
|
|
timestamp TIMESTAMPTZ - 최종 갱신 시각
|
|
|
|
N분 간격 집계 공식 (time_bucket 금지, date_trunc 사용):
|
|
1분 버킷: date_trunc('minute', recorded_at) AS bucket
|
|
2분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/120)*120) AS bucket
|
|
5분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/300)*300) AS bucket
|
|
10분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/600)*600) AS bucket
|
|
N분 버킷: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/(N*60))*(N*60)) AS bucket
|
|
|
|
예시 (2분 간격, 여러 태그):
|
|
SELECT to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/120)*120) AS bucket,
|
|
tagname, AVG(value::double precision) AS avg_val
|
|
FROM history_table
|
|
WHERE tagname IN ('tag1', 'tag2')
|
|
AND recorded_at >= NOW() - INTERVAL '3 hours'
|
|
GROUP BY bucket, tagname ORDER BY bucket, tagname
|
|
|
|
규칙:
|
|
- SELECT만 허용 (INSERT/UPDATE/DELETE/DROP 등 불가)
|
|
- tagname은 모두 소문자로 정확히 입력
|
|
- value 컬럼은 TEXT이므로 집계 시 ::double precision 캐스트 필수
|
|
- time_bucket 함수 사용 금지 — 위의 to_timestamp/FLOOR/EPOCH 공식 사용
|
|
"""
|
|
|
|
# ── RAG 도구 ─────────────────────────────────────────────────────────────────
|
|
|
|
@mcp.tool()
|
|
def search_codebase(query: str, top_k: int = 6) -> str:
|
|
"""ExperionCrawler 프로젝트 소스코드 검색 (우리가 개발한 .NET 8 C# 코드).
|
|
Experion HS R530 공식 문서가 아닌, ExperionCrawler 구현 코드를 검색함.
|
|
|
|
사용 시점: ExperionCrawler 코드의 구현 방법, 버그, 구조를 알고 싶을 때.
|
|
⚠️ Experion HS R530 제품 동작/설정/스펙을 알고 싶으면 search_r530_docs 사용.
|
|
|
|
Args:
|
|
query: 검색어 (예: "OPC UA 구독 시작", "히스토리 스냅샷", "TextToSql 서비스")
|
|
top_k: 반환 결과 수 (기본 6)
|
|
"""
|
|
return _search(COL_CODEBASE, query, top_k)
|
|
|
|
|
|
@mcp.tool()
|
|
def search_r530_docs(query: str, top_k: int = 5) -> str:
|
|
"""Honeywell Experion HS R530 공식 제품 문서 검색.
|
|
ExperionCrawler 코드가 아닌, Honeywell 공식 HTM 문서를 검색함.
|
|
|
|
사용 시점: Experion HS R530의 OPC UA 설정, 인증서, 보안 정책, 포인트 주소 형식,
|
|
채널/컨트롤러 속성, 문제해결 등 제품 스펙과 동작을 알고 싶을 때.
|
|
|
|
Args:
|
|
query: 검색어 (예: "certificate configuration", "endpoint security policy")
|
|
top_k: 반환 결과 수 (기본 5)
|
|
"""
|
|
return _search(COL_OPC_DOCS, query, top_k)
|
|
|
|
|
|
@mcp.tool()
|
|
def ask_iiot_llm(question: str, context: str = "") -> str:
|
|
"""Qwen3.6-27B-FP8에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
|
|
|
|
사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨
|
|
종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문.
|
|
|
|
Args:
|
|
question: 질문 내용
|
|
context: (선택) search_codebase 또는 search_r530_docs 검색 결과
|
|
"""
|
|
system = (
|
|
"당신은 IIoT(산업용 IoT), OPC UA, Honeywell Experion PKS/HS R530 전문가입니다.\n"
|
|
"컨텍스트가 제공된 경우 컨텍스트를 우선 근거로 삼아 한국어로 답변합니다.\n"
|
|
"컨텍스트 출처가 'Experion HS R530 공식 문서'인지 'ExperionCrawler 코드'인지 명확히 구분하여 설명합니다."
|
|
)
|
|
user_msg = f"컨텍스트:\n{context}\n\n질문: {question}" if context else question
|
|
resp = _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": user_msg},
|
|
],
|
|
max_tokens=2048,
|
|
temperature=0.1,
|
|
)
|
|
return resp.choices[0].message.content or "(응답 없음)"
|
|
|
|
|
|
@mcp.tool()
|
|
def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
|
|
"""검색 → Qwen3.6-27B-FP8 답변 생성 (통합 RAG).
|
|
|
|
기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
|
|
ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
|
|
|
|
Args:
|
|
question: 질문
|
|
search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True)
|
|
search_code: ExperionCrawler 소스코드 검색 여부 (기본 False)
|
|
"""
|
|
context_parts: list[str] = []
|
|
if search_docs:
|
|
context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{_search(COL_OPC_DOCS, question, 4)}")
|
|
if search_code:
|
|
context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{_search(COL_CODEBASE, question, 3)}")
|
|
return ask_iiot_llm(question, "\n\n".join(context_parts))
|
|
|
|
|
|
# ── NL2SQL 도구 ───────────────────────────────────────────────────────────────
|
|
|
|
async def _execute_sql_internal(sql: str) -> str:
|
|
"""SQL 실행 내부 함수 (run_sql과 query_with_nl에서 공유)."""
|
|
valid, err = _validate_sql(sql)
|
|
if not valid:
|
|
return json.dumps({"success": False, "error": f"SQL 검증 실패: {err}"}, ensure_ascii=False)
|
|
|
|
conn = None
|
|
try:
|
|
conn = await _get_db_connection()
|
|
with conn.cursor() as cur:
|
|
cur.execute(sql)
|
|
rows = cur.fetchall()
|
|
columns = [desc[0] for desc in cur.description]
|
|
result_data = [dict(zip(columns, row)) for row in rows]
|
|
return json.dumps({
|
|
"success": True,
|
|
"columns": columns,
|
|
"count": len(result_data),
|
|
"data": result_data
|
|
}, ensure_ascii=False, default=str)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"SQL 실행 실패: {e}"}, ensure_ascii=False)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
@mcp.tool()
|
|
async def run_sql(sql: str) -> str:
|
|
"""SQL 쿼리 실행 (SELECT만 허용).
|
|
|
|
Args:
|
|
sql: 실행할 SELECT SQL 문자열
|
|
|
|
Returns:
|
|
JSON: { success, columns, count, data } 또는 { success, error }
|
|
"""
|
|
return await _execute_sql_internal(sql)
|
|
|
|
|
|
@mcp.tool()
|
|
def query_pv_history(tag_names: list[str], time_from: str, time_to: str, limit: int = 100) -> str:
|
|
"""과거 값(PV) 히스토리 조회.
|
|
|
|
Args:
|
|
tag_names: 태그 이름 목록 (예: ["ficq-6113.pv", "ti-6101.pv"])
|
|
time_from: 시작 시간 (ISO 8601, 예: "2026-04-01T00:00:00")
|
|
time_to: 종료 시간 (ISO 8601, 예: "2026-04-02T00:00:00")
|
|
limit: 반환 행 수 제한 (기본 100, 최대 5000)
|
|
|
|
Returns:
|
|
JSON: { success, tag_names, time_range, limit, data }
|
|
"""
|
|
conn = None
|
|
try:
|
|
limit = min(limit, 5000)
|
|
conn = _get_db_connection()
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT tagname, recorded_at, value
|
|
FROM history_table
|
|
WHERE tagname = ANY(%s)
|
|
AND recorded_at >= %s AND recorded_at <= %s
|
|
ORDER BY recorded_at, tagname
|
|
LIMIT %s""",
|
|
(tag_names, time_from, time_to, limit)
|
|
)
|
|
rows = cur.fetchall()
|
|
data = [{"tag_name": r[0], "timestamp": r[1].isoformat(), "value": r[2]} for r in rows]
|
|
return json.dumps({
|
|
"success": True,
|
|
"tag_names": tag_names,
|
|
"time_range": f"{time_from} ~ {time_to}",
|
|
"count": len(data),
|
|
"data": data
|
|
}, ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"히스토리 쿼리 실패: {e}"}, ensure_ascii=False)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
|
|
@mcp.tool()
|
|
def get_tag_metadata(query: str, limit: int = 10) -> str:
|
|
"""태그 메타데이터 검색 (realtime_table 기반).
|
|
|
|
Args:
|
|
query: 태그명 검색어 (패턴 매칭)
|
|
limit: 반환 태그 수 제한 (기본 10)
|
|
|
|
Returns:
|
|
JSON: { success, query, count, tags }
|
|
"""
|
|
conn = None
|
|
try:
|
|
conn = _get_db_connection()
|
|
with conn.cursor() as cur:
|
|
cur.execute(
|
|
"""SELECT tagname, livevalue, timestamp, node_id
|
|
FROM realtime_table
|
|
WHERE tagname ILIKE %s
|
|
ORDER BY tagname LIMIT %s""",
|
|
(f"%{query}%", limit)
|
|
)
|
|
rows = cur.fetchall()
|
|
tags = [{"tag_name": r[0], "current_value": r[1],
|
|
"last_updated": r[2].isoformat() if r[2] else None,
|
|
"node_id": r[3]} for r in rows]
|
|
return json.dumps({"success": True, "query": query, "count": len(tags), "tags": tags},
|
|
ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"태그 메타데이터 검색 실패: {e}"}, ensure_ascii=False)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
|
|
@mcp.tool()
|
|
def list_drawings(unit_no: str | None = None) -> str:
|
|
"""단위별 도면 목록 조회 (node_map_master.name 기반).
|
|
|
|
Args:
|
|
unit_no: 단위 번호 접두사 (예: "A", "B"). None이면 전체 목록
|
|
|
|
Returns:
|
|
JSON: { success, unit_no, count, names }
|
|
"""
|
|
conn = None
|
|
try:
|
|
conn = _get_db_connection()
|
|
with conn.cursor() as cur:
|
|
if unit_no:
|
|
cur.execute(
|
|
"SELECT DISTINCT name FROM node_map_master WHERE name ILIKE %s ORDER BY name LIMIT 100",
|
|
(f"{unit_no}%",)
|
|
)
|
|
else:
|
|
cur.execute("SELECT DISTINCT name FROM node_map_master ORDER BY name LIMIT 100")
|
|
rows = cur.fetchall()
|
|
return json.dumps({"success": True, "unit_no": unit_no,
|
|
"count": len(rows), "names": [r[0] for r in rows]},
|
|
ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"도면 목록 조회 실패: {e}"}, ensure_ascii=False)
|
|
finally:
|
|
if conn:
|
|
conn.close()
|
|
|
|
|
|
@mcp.tool()
|
|
async def query_with_nl(question: str) -> str:
|
|
"""자연어 질문을 LLM이 SQL로 변환하고 시계열 DB를 조회합니다.
|
|
|
|
Args:
|
|
question: 자연어 질문 (예: "FICQ-6113.PV 최근 1시간 값을 1분 단위로 표시")
|
|
|
|
Returns:
|
|
JSON: { sql, success, columns, count, data } 또는 { sql, success, error }
|
|
"""
|
|
import asyncio
|
|
import json as json_module
|
|
|
|
system = (
|
|
"You are a PostgreSQL SQL expert.\n"
|
|
"Convert the user's question into a SELECT SQL using the schema below.\n"
|
|
"IMPORTANT rules:\n"
|
|
"- Use ONLY PostgreSQL syntax. No DATE_FORMAT, no INTERVAL N DAY.\n"
|
|
"- Time column is 'recorded_at' (TIMESTAMPTZ). Do NOT use 'timestamp'.\n"
|
|
"- NEVER use time_bucket(). For N-minute buckets use to_timestamp/FLOOR/EPOCH formula.\n"
|
|
"- INTERVAL rule:\n"
|
|
" * If the question specifies an interval (e.g. '2분 간격', '5-minute interval'):\n"
|
|
" use: to_timestamp(FLOOR(EXTRACT(EPOCH FROM recorded_at)/(N*60))*(N*60)) AS bucket\n"
|
|
" with GROUP BY bucket, tagname and AVG(value::double precision) AS avg_val\n"
|
|
" * If NO interval is specified: SELECT recorded_at, tagname, value — NO GROUP BY.\n"
|
|
"- Current year is 2026. '4월 27일' means 2026-04-27.\n"
|
|
"- All times in DB are UTC. Korean input is KST (UTC+9). Convert: KST 12:00 = UTC 03:00.\n"
|
|
"- value column is TEXT; cast with ::double precision only when aggregating.\n"
|
|
"- All tagnames are lowercase (e.g. 'ficq-6113.pv'). Match exactly.\n"
|
|
"- PostgreSQL LIKE: dot has no special meaning, no escaping needed.\n"
|
|
"- Return ONLY the SQL statement. No explanation, no markdown.\n\n"
|
|
f"{_DB_SCHEMA}"
|
|
)
|
|
|
|
try:
|
|
def _call_llm():
|
|
return _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": question},
|
|
],
|
|
max_tokens=8192,
|
|
temperature=0.1,
|
|
)
|
|
|
|
resp = await asyncio.to_thread(_call_llm)
|
|
sql = (resp.choices[0].message.content or "").strip()
|
|
# 마크다운 코드 블록 제거
|
|
if sql.startswith("```"):
|
|
lines = sql.splitlines()
|
|
sql = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip()
|
|
if not sql:
|
|
return json.dumps({"success": False, "sql": "", "error": "LLM이 SQL을 생성하지 못했습니다."}, ensure_ascii=False)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "sql": "", "error": f"LLM SQL 생성 실패: {e}"}, ensure_ascii=False)
|
|
|
|
# SQL 실행
|
|
raw = await _execute_sql_internal(sql)
|
|
result = json.loads(raw)
|
|
result["sql"] = sql
|
|
|
|
# long format → pivot 변환 (tagname 컬럼이 있으면 자동 PIVOT)
|
|
if result.get("success") and "data" in result:
|
|
cols = result.get("columns", [])
|
|
data = result["data"]
|
|
if "tagname" in cols and data:
|
|
time_col = next((c for c in cols if c not in ("tagname", "value", "livevalue", "avg_val")), None)
|
|
val_col = next((c for c in ("avg_val", "value") if c in cols), cols[-1])
|
|
if time_col:
|
|
tag_names_list = sorted(dict.fromkeys(row["tagname"] for row in data))
|
|
pivoted: dict = {}
|
|
for row in data:
|
|
key = str(row[time_col])
|
|
if key not in pivoted:
|
|
pivoted[key] = {time_col: row[time_col]}
|
|
pivoted[key][row["tagname"]] = row.get(val_col)
|
|
result["data"] = list(pivoted.values())
|
|
result["columns"] = [time_col] + tag_names_list
|
|
result["count"] = len(result["data"])
|
|
|
|
return json.dumps(result, ensure_ascii=False, default=str)
|
|
|
|
|
|
# ── P&ID 추출 도구 ──────────────────────────────────────────────────────────────
|
|
|
|
@mcp.tool()
|
|
async def extract_pid_tags(text: str, source_type: str) -> str:
|
|
"""P&ID 도면(DXF/PDF)에서 태그 정보를 추출합니다.
|
|
|
|
Args:
|
|
text: DXF/PDF에서 추출한 텍스트
|
|
source_type: 'dxf' 또는 'pdf'
|
|
|
|
Returns:
|
|
JSON: { success, count, tags: [{tagNo, equipmentName, instrumentType, lineNumber, pidDrawingNo, confidence}] }
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
import json as json_module
|
|
|
|
system = (
|
|
"You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
|
|
"Extract all instrument and equipment tags from the provided text.\n"
|
|
"Return ONLY a valid JSON array. Each element must have exactly these fields:\n"
|
|
'{"tagNo":"FCV-101","equipmentName":null,"instrumentType":"FCV","lineNumber":null,"pidDrawingNo":null,"confidence":0.95}\n'
|
|
"Rules:\n"
|
|
"- tagNo: any token matching [LETTERS]-[DIGITS] or [LETTERS]-[DIGITS]-[SUFFIX]\n"
|
|
" Examples: FCV-101, P-10101, T-10100, VG-6203-15A-F1A-n, BT-6200, DP-10101\n"
|
|
"- instrumentType: leading letters of tagNo (e.g. FCV, P, T, VG, BT, DP, PSV)\n"
|
|
"- equipmentName: descriptive name if present in text near the tag, else null\n"
|
|
"- lineNumber: null unless a line number is explicitly associated\n"
|
|
"- pidDrawingNo: null unless a drawing number is explicitly associated\n"
|
|
"- confidence: 0.95 for clear tags, lower for ambiguous ones\n"
|
|
"- Output ONLY the JSON array, no markdown, no explanation.\n"
|
|
"- If no tags found, return: []\n"
|
|
)
|
|
|
|
try:
|
|
truncated_text = text[:100000] if len(text) > 100000 else text
|
|
|
|
def _call_llm():
|
|
return _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated_text}"},
|
|
],
|
|
max_tokens=32768,
|
|
temperature=0.1,
|
|
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
|
|
)
|
|
|
|
resp = await asyncio.to_thread(_call_llm)
|
|
|
|
raw = (resp.choices[0].message.content or "").strip()
|
|
finish_reason = resp.choices[0].finish_reason
|
|
|
|
# 마크다운 코드 블록 제거
|
|
if raw.startswith("```"):
|
|
lines = raw.splitlines()
|
|
raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
|
|
|
|
# finish_reason=length 로 잘린 경우: 마지막 완전한 객체까지 살린 뒤 배열 닫기
|
|
if finish_reason == "length":
|
|
last_close = raw.rfind("}")
|
|
if last_close != -1:
|
|
raw = raw[:last_close + 1] + "]"
|
|
|
|
# 유효한 JSON 배열 추출 (가장 긴 균형 잡힌 [...] 선택)
|
|
def _extract_array(s: str) -> str:
|
|
depth = 0; start = -1; best = ""
|
|
for i, c in enumerate(s):
|
|
if c == '[':
|
|
if depth == 0: start = i
|
|
depth += 1
|
|
elif c == ']':
|
|
depth -= 1
|
|
if depth == 0 and start >= 0:
|
|
cand = s[start:i+1]
|
|
if len(cand) > len(best): best = cand
|
|
return best if best else "[]"
|
|
|
|
raw = _extract_array(raw)
|
|
|
|
# JSON 파싱 — 실패 시 개별 객체 추출로 폴백
|
|
try:
|
|
data = json_module.loads(raw)
|
|
except json_module.JSONDecodeError:
|
|
objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
|
|
data = []
|
|
for obj in objects:
|
|
try:
|
|
data.append(json_module.loads(obj))
|
|
except json_module.JSONDecodeError:
|
|
pass
|
|
if not data:
|
|
return json_module.dumps({"success": False, "count": 0, "tags": []}, ensure_ascii=False)
|
|
|
|
logging.info(f"[extract_pid_tags] source={source_type} count={len(data) if isinstance(data, list) else 0}")
|
|
|
|
return json_module.dumps({
|
|
"success": True,
|
|
"count": len(data),
|
|
"tags": data
|
|
}, ensure_ascii=False, indent=2)
|
|
|
|
except Exception as e:
|
|
logging.error(f"P&ID 태그 추출 실패: {e}")
|
|
logging.error(f"Raw response: {raw[:1000]}")
|
|
return json.dumps({"success": False, "error": f"P&ID 태그 추출 실패: {e}"}, ensure_ascii=False)
|
|
|
|
|
|
@mcp.tool()
|
|
async def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str:
|
|
"""P&ID 태그를 Experion 태그에 매핑합니다.
|
|
|
|
Args:
|
|
pid_tags: P&ID에서 추출한 태그 목록 (예: ["FT-101", "PT-201"])
|
|
experion_tags: Experion 시스템 태그 목록 (예: ["ficq-6113.pv", "pt-201.pv"])
|
|
|
|
Returns:
|
|
JSON: { success, count, mappings: [{pidTag, experionTag, confidence}] }
|
|
"""
|
|
import asyncio
|
|
import re
|
|
import json as json_module
|
|
|
|
system = (
|
|
"You are a P&ID to Experion tag matching expert.\n"
|
|
"Match P&ID tags to Experion tags based on similarity.\n"
|
|
"Return ONLY a JSON array of objects with the following structure:\n"
|
|
'[{"pidTag":"FT-101","experionTag":"ft-101.pv","confidence":0.92},...]\n'
|
|
"IMPORTANT rules:\n"
|
|
"- pidTag: The original P&ID tag from input\n"
|
|
"- experionTag: The matched Experion tag (lowercase, with .pv/.sp/.mv suffix)\n"
|
|
"- confidence: 0.0 to 1.0 based on match quality\n"
|
|
"- If no good match found, set confidence < 0.5 and leave experionTag null\n"
|
|
"- Do NOT include any explanation, only the JSON array.\n"
|
|
"- If no matches found, return an empty array: []\n"
|
|
"- temperature=0.1 for deterministic output.\n"
|
|
)
|
|
|
|
try:
|
|
pid_str = "\n".join(pid_tags)
|
|
experion_str = "\n".join(experion_tags)
|
|
|
|
def _call_llm():
|
|
return _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": f"P&ID Tags:\n{pid_str}\n\nExperion Tags:\n{experion_str}"},
|
|
],
|
|
max_tokens=16384,
|
|
temperature=0.1,
|
|
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
|
|
)
|
|
|
|
resp = await asyncio.to_thread(_call_llm)
|
|
|
|
raw = (resp.choices[0].message.content or "").strip()
|
|
finish_reason = resp.choices[0].finish_reason
|
|
|
|
if raw.startswith("```"):
|
|
lines = raw.splitlines()
|
|
raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
|
|
|
|
if finish_reason == "length":
|
|
last_close = raw.rfind("}")
|
|
if last_close != -1:
|
|
raw = raw[:last_close + 1] + "]"
|
|
|
|
match = re.search(r'\[.*\]', raw, re.DOTALL)
|
|
raw = match.group(0) if match else "[]"
|
|
|
|
data = json_module.loads(raw)
|
|
return json_module.dumps({"success": True, "count": len(data), "mappings": data},
|
|
ensure_ascii=False, indent=2)
|
|
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"P&ID 태그 매핑 실패: {e}"}, ensure_ascii=False)
|
|
|
|
|
|
# ── P&ID 파싱 도구 (DXF/PDF/DWG) ───────────────────────────────────────────────
|
|
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_dxf(filepath: str) -> str:
|
|
"""ezdxf 기반 DXF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출.
|
|
|
|
Args:
|
|
filepath: DXF 파일 경로
|
|
|
|
Returns:
|
|
JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] }
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import re
|
|
|
|
try:
|
|
def _extract_text():
|
|
return _extract_text_from_dxf(filepath)
|
|
text = await asyncio.to_thread(_extract_text)
|
|
|
|
if not text.strip():
|
|
return json.dumps({
|
|
"success": True,
|
|
"text": "",
|
|
"count": 0,
|
|
"tags": []
|
|
}, ensure_ascii=False, indent=2)
|
|
|
|
# LLM으로 태그 추출
|
|
system = (
|
|
"You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
|
|
"Extract instrument and equipment tags from the provided text.\n"
|
|
"Return ONLY a JSON array of objects with the following structure:\n"
|
|
'[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n'
|
|
"IMPORTANT rules:\n"
|
|
"- tagNo: Standard tag format with these patterns:\n"
|
|
" * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n"
|
|
" * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n"
|
|
" * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n"
|
|
" * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n"
|
|
"- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n"
|
|
"- equipmentName: Descriptive name if available, otherwise null\n"
|
|
"- lineNumber: Line number if available, otherwise null\n"
|
|
"- pidDrawingNo: Drawing number if available, otherwise null\n"
|
|
"- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n"
|
|
"- Do NOT include any explanation, only the JSON array.\n"
|
|
"- If no tags found, return an empty array: []\n"
|
|
"- temperature=0.1 for deterministic output.\n"
|
|
)
|
|
|
|
truncated_text = text[:12000] if len(text) > 12000 else text
|
|
|
|
def _call_llm():
|
|
return _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": f"Source: dxf\n\nText:\n{truncated_text}"},
|
|
],
|
|
max_tokens=4096,
|
|
temperature=0.1,
|
|
)
|
|
|
|
resp = await asyncio.to_thread(_call_llm)
|
|
|
|
raw = (resp.choices[0].message.content or "").strip()
|
|
|
|
# 마크다운 코드 블록 제거
|
|
if raw.startswith("```"):
|
|
lines = raw.splitlines()
|
|
raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
|
|
|
|
# JSON 배열 추출
|
|
match = re.search(r'\[.*\]', raw, re.DOTALL)
|
|
if match:
|
|
raw = match.group(0)
|
|
|
|
# JSON 파싱 시도
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
# JSON 배열 추출 시도 (더 엄격한 패턴)
|
|
match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL)
|
|
if match:
|
|
raw_clean = match.group(0)
|
|
try:
|
|
data = json.loads(raw_clean)
|
|
except json.JSONDecodeError:
|
|
# 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도
|
|
objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
|
|
data = []
|
|
for obj in objects:
|
|
try:
|
|
data.append(json.loads(obj))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
if not isinstance(data, list):
|
|
data = []
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"text": text[:10000], # 제한
|
|
"count": len(text),
|
|
"tags": data
|
|
}, ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"DXF 파싱 실패: {e}"}, ensure_ascii=False)
|
|
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
|
|
"""PyMuPDF 기반 PDF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출.
|
|
|
|
Args:
|
|
filepath: PDF 파일 경로
|
|
use_ocr: OCR 사용 여부 (기본 True, 고정밀도)
|
|
|
|
Returns:
|
|
JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] }
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import re
|
|
|
|
try:
|
|
def _extract_text():
|
|
if use_ocr:
|
|
return _extract_text_from_pdf_ocr(filepath)
|
|
else:
|
|
return _extract_text_from_pdf(filepath)
|
|
text = await asyncio.to_thread(_extract_text)
|
|
|
|
if not text.strip():
|
|
return json.dumps({
|
|
"success": True,
|
|
"text": "",
|
|
"count": 0,
|
|
"tags": []
|
|
}, ensure_ascii=False, indent=2)
|
|
|
|
# LLM으로 태그 추출
|
|
system = (
|
|
"You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
|
|
"Extract instrument and equipment tags from the provided text.\n"
|
|
"Return ONLY a JSON array of objects with the following structure:\n"
|
|
'[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n'
|
|
"IMPORTANT rules:\n"
|
|
"- tagNo: Standard tag format with these patterns:\n"
|
|
" * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n"
|
|
" * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n"
|
|
" * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n"
|
|
" * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n"
|
|
"- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n"
|
|
"- equipmentName: Descriptive name if available, otherwise null\n"
|
|
"- lineNumber: Line number if available, otherwise null\n"
|
|
"- pidDrawingNo: Drawing number if available, otherwise null\n"
|
|
"- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n"
|
|
"- Do NOT include any explanation, only the JSON array.\n"
|
|
"- If no tags found, return an empty array: []\n"
|
|
"- temperature=0.1 for deterministic output.\n"
|
|
)
|
|
|
|
truncated_text = text[:12000] if len(text) > 12000 else text
|
|
|
|
def _call_llm():
|
|
return _llm().chat.completions.create(
|
|
model=VLLM_MODEL,
|
|
messages=[
|
|
{"role": "system", "content": system},
|
|
{"role": "user", "content": f"Source: pdf\n\nText:\n{truncated_text}"},
|
|
],
|
|
max_tokens=4096,
|
|
temperature=0.1,
|
|
)
|
|
|
|
resp = await asyncio.to_thread(_call_llm)
|
|
|
|
raw = (resp.choices[0].message.content or "").strip()
|
|
|
|
# 마크다운 코드 블록 제거
|
|
if raw.startswith("```"):
|
|
lines = raw.splitlines()
|
|
raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
|
|
|
|
# JSON 배열 추출
|
|
match = re.search(r'\[.*\]', raw, re.DOTALL)
|
|
if match:
|
|
raw = match.group(0)
|
|
|
|
# JSON 파싱 시도
|
|
try:
|
|
data = json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
# JSON 배열 추출 시도 (더 엄격한 패턴)
|
|
match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL)
|
|
if match:
|
|
raw_clean = match.group(0)
|
|
try:
|
|
data = json.loads(raw_clean)
|
|
except json.JSONDecodeError:
|
|
# 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도
|
|
objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
|
|
data = []
|
|
for obj in objects:
|
|
try:
|
|
data.append(json.loads(obj))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
if not isinstance(data, list):
|
|
data = []
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"text": text[:10000],
|
|
"count": len(text),
|
|
"tags": data
|
|
}, ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"PDF 파싱 실패: {e}"}, ensure_ascii=False)
|
|
|
|
|
|
@mcp.tool()
|
|
async def build_pid_graph_parallel(filepath: str) -> str:
|
|
"""
|
|
분산 처리 기법을 적용한 P&ID 그래프 생성 툴.
|
|
전처리 -> 병렬 분산 추출 -> 위상 모델링 -> 저장 과정을 수행합니다.
|
|
"""
|
|
import asyncio
|
|
import json
|
|
|
|
try:
|
|
# 1. 전처리 (Phase 1: Geometric Extraction)
|
|
def _extract_and_save():
|
|
extractor = PidGeometricExtractor(filepath)
|
|
geo_data_path = f"mcp-server/storage/{os.path.basename(filepath)}_geo.json"
|
|
geo_data_list = extractor.extract_and_save(geo_data_path)
|
|
return geo_data_path
|
|
geo_data_path = await asyncio.to_thread(_extract_and_save)
|
|
|
|
# geo_data_list는 경로를 반환하므로 다시 로드
|
|
def _load_geo_data():
|
|
with open(geo_data_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
geo_data = await asyncio.to_thread(_load_geo_data)
|
|
|
|
# 2. 병렬 분산 추출 (Phase 3: Intelligent Mapping)
|
|
# 시스템 태그 목록 가져오기 (DB에서 조회하는 로직 필요, 여기서는 예시로 빈 리스트 또는 기본값)
|
|
# 실제로는 get_tag_metadata 등을 통해 전체 태그 리스트를 확보해야 함
|
|
system_tags = []
|
|
try:
|
|
def _fetch_system_tags():
|
|
conn = _get_db_connection()
|
|
try:
|
|
with conn.cursor() as cur:
|
|
cur.execute("SELECT tagname FROM realtime_table")
|
|
return [r[0] for r in cur.fetchall()]
|
|
finally:
|
|
conn.close()
|
|
system_tags = await asyncio.to_thread(_fetch_system_tags)
|
|
except Exception as e:
|
|
logging.warning(f"Failed to fetch system tags: {e}")
|
|
|
|
# 그래프 임시 생성 (Mapper가 위상 정보를 사용하므로 필요)
|
|
builder = PidTopologyBuilder(geo_data)
|
|
builder.build_graph()
|
|
|
|
# Mapper 설정
|
|
from openai import AsyncOpenAI
|
|
api_client = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
|
|
mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client)
|
|
|
|
# 분류별 노드 분리
|
|
nodes = list(builder.G.nodes())
|
|
transmitter_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FIT', 'FT', 'LT', 'PT', 'TE']] # 단순화된 필터
|
|
valve_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FCV', 'LCV', 'TCV', 'PCV', 'XV']]
|
|
equipment_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('type') not in ['TEXT', 'LINE', 'LWPOLYLINE']]
|
|
|
|
# 병렬 호출 (vLLM Batching 유도)
|
|
tasks = [
|
|
mapper.extract_transmitters(transmitter_nodes),
|
|
mapper.extract_valves(valve_nodes),
|
|
mapper.extract_equipment(equipment_nodes)
|
|
]
|
|
extracted_results = await asyncio.gather(*tasks)
|
|
|
|
# 결과 통합
|
|
all_mapped_tags = []
|
|
for res_dict in extracted_results:
|
|
for node_id, mapping in res_dict.items():
|
|
if mapping.resolved_tag != "UNKNOWN":
|
|
# TopologyBuilder가 기대하는 형식으로 변환
|
|
node_data = builder.G.nodes[node_id]
|
|
all_mapped_tags.append({
|
|
"entity_id": node_id,
|
|
"tagName": mapping.resolved_tag,
|
|
"bbox": node_data['bbox'].bounds if hasattr(node_data['bbox'], 'bounds') else node_data['bbox'],
|
|
"clean_value": mapping.resolved_tag
|
|
})
|
|
|
|
# 3. 최종 위상 모델링 (Phase 2)
|
|
final_builder = PidTopologyBuilder(geo_data, all_extracted_tags=all_mapped_tags)
|
|
final_builder.build_graph()
|
|
|
|
# 4. 저장
|
|
graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json")
|
|
graph_path = f"mcp-server/storage/{graph_id}"
|
|
final_builder.save_graph(graph_path)
|
|
|
|
return json.dumps({
|
|
"success": True,
|
|
"data": {
|
|
"graph_id": graph_id,
|
|
"graph_path": graph_path,
|
|
"nodes": final_builder.G.number_of_nodes(),
|
|
"edges": final_builder.G.number_of_edges()
|
|
},
|
|
"message": "그래프 생성 완료"
|
|
}, ensure_ascii=False)
|
|
|
|
except Exception as e:
|
|
logging.error(f"build_pid_graph_parallel failed: {e}")
|
|
return json.dumps({"success": False, "data": None, "error": str(e), "message": "그래프 생성 실패"}, ensure_ascii=False)
|
|
|
|
@mcp.tool()
|
|
async def analyze_pid_impact(graph_id: str, start_node_id: str) -> str:
|
|
"""
|
|
구축된 그래프를 기반으로 특정 설비 장애 시 영향도 분석을 수행합니다.
|
|
"""
|
|
import asyncio
|
|
|
|
try:
|
|
graph_path = f"mcp-server/storage/{graph_id}"
|
|
mapping_path = graph_path.replace("_graph.json", "_mapping.json")
|
|
|
|
def _analyze():
|
|
analyzer = PidAnalysisEngine(graph_path, mapping_path)
|
|
return analyzer.analyze_impact(start_node_id)
|
|
|
|
result = await asyncio.to_thread(_analyze)
|
|
return json.dumps(result, ensure_ascii=False, indent=2)
|
|
except Exception as e:
|
|
return json.dumps({"success": False, "error": f"Impact analysis failed: {e}"}, ensure_ascii=False)
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_drawing(filepath: str) -> str:
|
|
"""확장자 자동 감지하여 P&ID 도면 파싱.
|
|
|
|
Args:
|
|
filepath: DXF/DWG/PDF 파일 경로
|
|
|
|
Returns:
|
|
JSON: { success, text, count, tags, format }
|
|
"""
|
|
import os
|
|
|
|
ext = os.path.splitext(filepath)[1].lower()
|
|
|
|
if ext == ".dxf":
|
|
return await parse_pid_dxf(filepath)
|
|
elif ext == ".dwg":
|
|
# DWG 파일은 사전에 DXF로 변환하여 업로드해야 합니다.
|
|
# Linux에서 DWG를 DXF로 변환하는 도구는 제한되어 있습니다.
|
|
return json.dumps({
|
|
"success": False,
|
|
"error": "DWG 파일은 현재 직접 파싱할 수 없습니다.\n" +
|
|
"사전에 DXF로 변환하여 업로드해 주세요.\n" +
|
|
"\n변환 방법:\n" +
|
|
"1. Windows에서 AutoCAD 또는 ODA File Converter 사용\n" +
|
|
"2. 온라인 DWG → DXF 변환기 사용\n" +
|
|
"3. LibreOffice Draw (Windows/macOS 전용) 사용"
|
|
}, ensure_ascii=False)
|
|
elif ext == ".pdf":
|
|
return await parse_pid_pdf(filepath)
|
|
else:
|
|
return json.dumps({
|
|
"success": False,
|
|
"error": f"Unsupported format: {ext}. Supported: .dxf, .dwg, .pdf"
|
|
}, ensure_ascii=False)
|
|
|
|
|
|
# ── 워커 요청 전달 ────────────────────────────────────────────────────────────
|
|
|
|
async def _forward_request(port: int, tool_name: str, params: dict, one_shot: bool = False) -> str:
|
|
"""HTTP를 통해 워커 프로세스로 요청 전달.
|
|
|
|
Args:
|
|
port: 워커 포트
|
|
tool_name: 도구 이름
|
|
params: 요청 파라미터
|
|
one_shot: True일 경우 요청 완료 후 워커 종료
|
|
"""
|
|
async with httpx.AsyncClient(timeout=600) as client: # 5분 타임아웃 (대용량 DXF 처리용)
|
|
endpoint = "/execute/one_shot" if one_shot else "/execute"
|
|
response = await client.post(
|
|
f"http://localhost:{port}{endpoint}",
|
|
json={"tool": tool_name, "params": params}
|
|
)
|
|
response.raise_for_status()
|
|
return response.text
|
|
|
|
|
|
# ── 요청 라우팅 (워커 프로세스 사용) ───────────────────────────────────────────
|
|
|
|
@mcp.tool()
|
|
async def search_codebase(query: str, top_k: int = 6) -> str:
|
|
"""RAG 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("search_codebase")
|
|
return await _forward_request(worker.port, "search_codebase", {
|
|
"query": query,
|
|
"top_k": top_k
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def search_r530_docs(query: str, top_k: int = 5) -> str:
|
|
"""RAG 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("search_r530_docs")
|
|
return await _forward_request(worker.port, "search_r530_docs", {
|
|
"query": query,
|
|
"top_k": top_k
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def ask_iiot_llm(question: str, context: str = "") -> str:
|
|
"""RAG 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("ask_iiot_llm")
|
|
return await _forward_request(worker.port, "ask_iiot_llm", {
|
|
"question": question,
|
|
"context": context
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
|
|
"""RAG 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("rag_query")
|
|
return await _forward_request(worker.port, "rag_query", {
|
|
"question": question,
|
|
"search_code": search_code,
|
|
"search_docs": search_docs
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def run_sql(sql: str) -> str:
|
|
"""NL2SQL 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("run_sql")
|
|
return await _forward_request(worker.port, "run_sql", {"sql": sql})
|
|
|
|
|
|
@mcp.tool()
|
|
async def query_pv_history(tag_names: list[str], time_from: str, time_to: str, limit: int = 100) -> str:
|
|
"""NL2SQL 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("query_pv_history")
|
|
return await _forward_request(worker.port, "query_pv_history", {
|
|
"tag_names": tag_names,
|
|
"time_from": time_from,
|
|
"time_to": time_to,
|
|
"limit": limit
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def get_tag_metadata(query: str, limit: int = 10) -> str:
|
|
"""NL2SQL 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("get_tag_metadata")
|
|
return await _forward_request(worker.port, "get_tag_metadata", {
|
|
"query": query,
|
|
"limit": limit
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def list_drawings(unit_no: str = None) -> str:
|
|
"""NL2SQL 워커로 요청 전달."""
|
|
worker = await process_manager.get_worker("list_drawings")
|
|
return await _forward_request(worker.port, "list_drawings", {
|
|
"unit_no": unit_no
|
|
})
|
|
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_dxf(filepath: str) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
# 파일 경로 기반으로 Lock 획득하여 동일 파일 중복 처리 방지 및 다른 파일 병렬 처리 허용
|
|
lock_key = os.path.basename(filepath)
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("parse_pid_dxf", one_shot=True)
|
|
return await _forward_request(worker.port, "parse_pid_dxf", {"filepath": filepath}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
lock_key = os.path.basename(filepath)
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("parse_pid_pdf", one_shot=True)
|
|
return await _forward_request(worker.port, "parse_pid_pdf", {
|
|
"filepath": filepath,
|
|
"use_ocr": use_ocr
|
|
}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def parse_pid_drawing(filepath: str) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
lock_key = os.path.basename(filepath)
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("parse_pid_drawing", one_shot=True)
|
|
return await _forward_request(worker.port, "parse_pid_drawing", {"filepath": filepath}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def extract_pid_tags(text: str, source_type: str) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
# 텍스트 추출/매핑은 특정 파일에 종속되지 않으므로 전역 Lock 사용 (또는 세마포어 유지)
|
|
# 여기서는 단순화를 위해 전역 Lock 하나를 사용하거나,
|
|
# 텍스트 기반 작업은 병렬 처리가 가능하므로 Lock을 제거할 수도 있으나,
|
|
# 워커 리소스 보호를 위해 'global_text' 키로 Lock 관리
|
|
lock_key = "global_text_processing"
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("extract_pid_tags", one_shot=True)
|
|
return await _forward_request(worker.port, "extract_pid_tags", {
|
|
"text": text,
|
|
"source_type": source_type
|
|
}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
lock_key = "global_matching"
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("match_pid_tags", one_shot=True)
|
|
return await _forward_request(worker.port, "match_pid_tags", {
|
|
"pid_tags": pid_tags,
|
|
"experion_tags": experion_tags
|
|
}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def build_pid_graph_parallel(filepath: str) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
lock_key = os.path.basename(filepath)
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("build_pid_graph_parallel", one_shot=True)
|
|
return await _forward_request(worker.port, "build_pid_graph_parallel", {"filepath": filepath}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
async def analyze_pid_impact(graph_id: str, start_node_id: str) -> str:
|
|
"""P&ID 워커로 요청 전달 (one_shot: 요청 후 종료)."""
|
|
# graph_id 기반으로 Lock 관리
|
|
lock_key = graph_id
|
|
if lock_key not in process_manager._pid_locks:
|
|
process_manager._pid_locks[lock_key] = asyncio.Lock()
|
|
|
|
async with process_manager._pid_locks[lock_key]:
|
|
worker = await process_manager.get_worker("analyze_pid_impact", one_shot=True)
|
|
return await _forward_request(worker.port, "analyze_pid_impact", {
|
|
"graph_id": graph_id,
|
|
"start_node_id": start_node_id
|
|
}, one_shot=True)
|
|
|
|
|
|
@mcp.tool()
|
|
def get_worker_status() -> str:
|
|
"""모든 워커 프로세스 상태 조회."""
|
|
status = {}
|
|
for name, worker in process_manager.workers.items():
|
|
status[name] = {
|
|
"pid": worker.process.pid,
|
|
"status": worker.status,
|
|
"port": worker.port,
|
|
"one_shot": worker.one_shot
|
|
}
|
|
return json.dumps(status, ensure_ascii=False, indent=2)
|
|
|
|
|
|
# ── 엔트리포인트 ──────────────────────────────────────────────────────────────
|
|
|
|
def main():
|
|
"""HTTP 모드로 실행 — C# McpClient (localhost:5001) 용."""
|
|
mcp.run(transport="streamable-http")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# --http 플래그: HTTP 모드 (C# McpClient 용)
|
|
# 플래그 없음: stdio 모드 (Claude Code / Roo Code MCP 용)
|
|
if "--http" in sys.argv:
|
|
mcp.run(transport="streamable-http")
|
|
else:
|
|
mcp.run(transport="stdio")
|