feat: ExperionCrawler IIoT OPC UA Data Bridge Infrastructure

Major project initialization and feature implementation: **Core Features:** - OPC UA client for Honeywell Experion HS R530 integration - Real-time data streaming and history data retrieval - Text-to-SQL query engine with TimeScaleDB - JSON-based node configuration system - SQLite database with migration support **Architecture:** - Clean architecture with Domain, Application, Infrastructure layers - ASP.NET Core Web API frontend - Web UI with real-time visualization - PKI-based OPC UA authentication (TLS) **Infrastructure Components:** - ExperionOpcClient: OPC UA connection management - ExperionRealtimeService: Real-time data streaming - ExperionHistoryService: Historical data queries - TextToSqlService: Natural language to SQL queries - SqlValidator: SQL injection prevention **Database:** - TimescaleDB integration (recommended) or SQLite fallback - Entity Framework Core with Extenstion methods - OPCTag, KeyValue tables for data storage **Security:** - Certificate-based OPC UA endpoint security - SSL/TLS encryption for database connections - Output param binding injection prevention **Testing:** - Unit tests for TextToSqlService and SqlValidator - Integration tests for Korean time range extraction See REVIEW_REQUEST.md for detailed code review information.
2026-04-26 19:28:56 +09:00
parent e34ec08001
commit 77bdcf1f7f
60 changed files with 10948 additions and 227 deletions
--- a/mcp-server/pycache/server.cpython-312.pyc
+++ b/mcp-server/pycache/server.cpython-312.pyc
--- a/mcp-server/index_opc_docs.py
+++ b/mcp-server/index_opc_docs.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""
+Experion OPC UA 문서 인덱싱 스크립트
+- HTM 파일 → 텍스트 추출 → 청킹 → Ollama 임베딩 → Qdrant 업서트
+- 사용 모델: nomic-embed-text (768-dim, MCP 서버와 동일)
+- 컬렉션: experion-opc-docs
+"""
+
+import os
+import sys
+import uuid
+import time
+import textwrap
+from html.parser import HTMLParser
+from pathlib import Path
+import httpx
+
+# ── 설정 ──────────────────────────────────────────────────────────────────────
+DOCS_DIR      = "/home/windpacer/projects/Experion_opcua_documents"
+QDRANT_URL    = "http://localhost:6333"
+OLLAMA_URL    = "http://localhost:11434"
+EMBED_MODEL   = "nomic-embed-text"
+COLLECTION    = "experion-opc-docs"
+CHUNK_SIZE    = 600   # 문자 수
+CHUNK_OVERLAP = 100
+VECTOR_DIM    = 768
+
+# ── HTML → 텍스트 추출 ────────────────────────────────────────────────────────
+
+class _TextExtractor(HTMLParser):
+    SKIP_TAGS = {"script", "style", "head", "nav", "footer"}
+
+    def __init__(self):
+        super().__init__()
+        self._skip  = 0
+        self._parts = []
+
+    def handle_starttag(self, tag, attrs):
+        if tag in self.SKIP_TAGS:
+            self._skip += 1
+
+    def handle_endtag(self, tag):
+        if tag in self.SKIP_TAGS and self._skip:
+            self._skip -= 1
+        if tag in ("p", "h1", "h2", "h3", "h4", "li", "td", "tr", "div"):
+            self._parts.append("\n")
+
+    def handle_data(self, data):
+        if not self._skip:
+            stripped = data.strip()
+            if stripped:
+                self._parts.append(stripped + " ")
+
+    def get_text(self) -> str:
+        raw = "".join(self._parts)
+        lines = [l.strip() for l in raw.splitlines()]
+        lines = [l for l in lines if l]
+        return "\n".join(lines)
+
+
+def extract_text(htm_path: str) -> str:
+    with open(htm_path, encoding="utf-8", errors="replace") as f:
+        html = f.read()
+    p = _TextExtractor()
+    p.feed(html)
+    return p.get_text()
+
+
+# ── 청킹 ─────────────────────────────────────────────────────────────────────
+
+def chunk_text(text: str, size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> list[str]:
+    if len(text) <= size:
+        return [text] if text.strip() else []
+    chunks = []
+    start = 0
+    while start < len(text):
+        end = start + size
+        chunk = text[start:end]
+        if chunk.strip():
+            chunks.append(chunk.strip())
+        start += size - overlap
+    return chunks
+
+
+# ── Ollama 임베딩 ─────────────────────────────────────────────────────────────
+
+def embed(text: str) -> list[float]:
+    with httpx.Client(timeout=30) as client:
+        resp = client.post(
+            f"{OLLAMA_URL}/api/embeddings",
+            json={"model": EMBED_MODEL, "prompt": text},
+        )
+        resp.raise_for_status()
+        return resp.json()["embedding"]
+
+
+# ── Qdrant 컬렉션 생성 ────────────────────────────────────────────────────────
+
+def ensure_collection():
+    with httpx.Client(timeout=15) as client:
+        resp = client.get(f"{QDRANT_URL}/collections/{COLLECTION}")
+        if resp.status_code == 200:
+            info = resp.json()["result"]
+            count = info.get("points_count", 0)
+            print(f"컬렉션 '{COLLECTION}' 이미 존재 (points: {count})")
+            answer = input("기존 컬렉션을 삭제하고 재인덱싱? [y/N]: ").strip().lower()
+            if answer != "y":
+                print("취소")
+                sys.exit(0)
+            client.delete(f"{QDRANT_URL}/collections/{COLLECTION}")
+            print("기존 컬렉션 삭제 완료")
+
+        create_resp = client.put(
+            f"{QDRANT_URL}/collections/{COLLECTION}",
+            json={"vectors": {"size": VECTOR_DIM, "distance": "Cosine"}},
+        )
+        create_resp.raise_for_status()
+        print(f"컬렉션 '{COLLECTION}' 생성 완료")
+
+
+# ── Qdrant 업서트 ─────────────────────────────────────────────────────────────
+
+def upsert_batch(points: list[dict]):
+    with httpx.Client(timeout=30) as client:
+        resp = client.put(
+            f"{QDRANT_URL}/collections/{COLLECTION}/points",
+            json={"points": points},
+        )
+        resp.raise_for_status()
+
+
+# ── 메인 ─────────────────────────────────────────────────────────────────────
+
+def main():
+    htm_files = sorted(Path(DOCS_DIR).rglob("*.htm"))
+    if not htm_files:
+        print(f"HTM 파일 없음: {DOCS_DIR}")
+        sys.exit(1)
+
+    print(f"HTM 파일 수: {len(htm_files)}")
+    ensure_collection()
+
+    total_chunks = 0
+    batch: list[dict] = []
+    BATCH_SIZE = 20
+
+    for i, path in enumerate(htm_files, 1):
+        rel = str(path.relative_to(Path(DOCS_DIR).parent))
+        text = extract_text(str(path))
+        chunks = chunk_text(text)
+
+        for j, chunk in enumerate(chunks):
+            vec = embed(chunk)
+            batch.append({
+                "id":      str(uuid.uuid5(uuid.NAMESPACE_URL, f"{path}#{j}")),
+                "vector":  vec,
+                "payload": {
+                    "filePath":   rel,
+                    "content":    chunk,
+                    "chunkIndex": j,
+                },
+            })
+
+            if len(batch) >= BATCH_SIZE:
+                upsert_batch(batch)
+                total_chunks += len(batch)
+                batch = []
+
+        print(f"[{i:2d}/{len(htm_files)}] {path.name}  ({len(chunks)} chunks)", flush=True)
+
+    if batch:
+        upsert_batch(batch)
+        total_chunks += len(batch)
+
+    print(f"\n완료: {total_chunks}개 청크 → 컬렉션 '{COLLECTION}'")
+
+
+if __name__ == "__main__":
+    main()
--- a/mcp-server/pyproject.toml
+++ b/mcp-server/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "iiot-rag-mcp"
+version = "0.1.0"
+description = "ExperionCrawler RAG MCP Server — Qdrant + GLM-4.7-Flash"
+requires-python = ">=3.10"
+dependencies = [
+    "mcp[cli]>=1.0.0",
+    "qdrant-client>=1.9.0",
+    "sentence-transformers>=3.0.0",
+    "openai>=1.0.0",
+    "httpx>=0.27.0",
+]
+
+[project.scripts]
+iiot-rag-mcp = "server:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
--- a/mcp-server/server.py
+++ b/mcp-server/server.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+ExperionCrawler RAG MCP Server
+- 임베딩: Ollama nomic-embed-text (768-dim) — Roo Code 인덱스와 동일 모델
+- 벡터 DB: Qdrant localhost:6333
+- LLM: vLLM GLM-4.7-Flash localhost:8000/v1
+- 사용처: Claude Code MCP / Roo Code MCP (동일 서버)
+"""
+
+from __future__ import annotations
+import sys
+import logging
+import httpx
+from functools import lru_cache
+from mcp.server.fastmcp import FastMCP
+
+logging.basicConfig(level=logging.WARNING, stream=sys.stderr)
+
+# ── 설정 ──────────────────────────────────────────────────────────────────────
+QDRANT_URL      = "http://localhost:6333"
+OLLAMA_URL      = "http://localhost:11434"
+EMBED_MODEL     = "nomic-embed-text"        # 768-dim, Roo Code 인덱스와 동일
+VLLM_BASE_URL   = "http://localhost:8000/v1"
+VLLM_MODEL      = "glm-4.7-flash"
+
+# Qdrant 컬렉션
+COL_CODEBASE    = "ws-65f457145aee80b2"     # ExperionCrawler 소스코드
+COL_OPC_DOCS    = "experion-opc-docs"       # Experion HS R530 OPC UA 공식 문서 (266 chunks)
+
+mcp = FastMCP("iiot-rag")
+
+# ── 임베딩 (Ollama) ───────────────────────────────────────────────────────────
+
+def _embed(text: str) -> list[float]:
+    """Ollama nomic-embed-text로 768-dim 벡터 생성."""
+    with httpx.Client(timeout=30) as client:
+        resp = client.post(
+            f"{OLLAMA_URL}/api/embeddings",
+            json={"model": EMBED_MODEL, "prompt": text},
+        )
+        resp.raise_for_status()
+        return resp.json()["embedding"]
+
+# ── LLM (vLLM / GLM-4.7-Flash) ───────────────────────────────────────────────
+
+@lru_cache(maxsize=1)
+def _llm():
+    from openai import OpenAI
+    return OpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
+
+# ── Qdrant 검색 헬퍼 ──────────────────────────────────────────────────────────
+
+def _search(collection: str, query: str, top_k: int, threshold: float = 0.25) -> str:
+    vec = _embed(query)
+    with httpx.Client(timeout=20) as client:
+        resp = client.post(
+            f"{QDRANT_URL}/collections/{collection}/points/search",
+            json={
+                "vector": vec,
+                "limit": top_k,
+                "with_payload": True,
+                "score_threshold": threshold,
+            },
+        )
+        resp.raise_for_status()
+        hits = resp.json().get("result", [])
+
+    if not hits:
+        return "관련 결과 없음."
+
+    parts = []
+    for h in hits:
+        p = h.get("payload", {})
+        file_path  = p.get("filePath", p.get("path", "unknown"))
+        chunk      = p.get("codeChunk", p.get("content", p.get("text", "")))
+        start_line = p.get("startLine", "")
+        loc = f"{file_path}:{start_line}" if start_line else file_path
+        parts.append(f"[score={h['score']:.3f}] {loc}\n```\n{chunk[:700]}\n```")
+
+    return "\n\n---\n\n".join(parts)
+
+# ── MCP 도구 ─────────────────────────────────────────────────────────────────
+
+@mcp.tool()
+def search_codebase(query: str, top_k: int = 6) -> str:
+    """ExperionCrawler 프로젝트 소스코드 검색 (우리가 개발한 .NET 8 C# 코드).
+    Experion HS R530 공식 문서가 아닌, ExperionCrawler 구현 코드를 검색함.
+
+    사용 시점: ExperionCrawler 코드의 구현 방법, 버그, 구조를 알고 싶을 때.
+    ⚠️  Experion HS R530 제품 동작/설정/스펙을 알고 싶으면 search_r530_docs 사용.
+
+    Args:
+        query: 검색어 (예: "OPC UA 구독 시작", "히스토리 스냅샷", "TextToSql 서비스")
+        top_k: 반환 결과 수 (기본 6)
+    """
+    return _search(COL_CODEBASE, query, top_k)
+
+
+@mcp.tool()
+def search_r530_docs(query: str, top_k: int = 5) -> str:
+    """Honeywell Experion HS R530 공식 제품 문서 검색.
+    ExperionCrawler 코드가 아닌, Honeywell 공식 HTM 문서를 검색함.
+
+    사용 시점: Experion HS R530의 OPC UA 설정, 인증서, 보안 정책, 포인트 주소 형식,
+    채널/컨트롤러 속성, 문제해결 등 제품 스펙과 동작을 알고 싶을 때.
+    ⚠️  ExperionCrawler 구현 코드를 찾으려면 search_codebase 사용.
+
+    Args:
+        query: 검색어 (예: "certificate configuration", "endpoint security policy", "point address syntax")
+        top_k: 반환 결과 수 (기본 5)
+    """
+    return _search(COL_OPC_DOCS, query, top_k)
+
+
+@mcp.tool()
+def ask_iiot_llm(question: str, context: str = "") -> str:
+    """GLM-4.7-Flash에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
+
+    사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨
+    종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문.
+
+    Args:
+        question: 질문 내용
+        context:  (선택) search_codebase 또는 search_r530_docs 검색 결과
+    """
+    system = (
+        "당신은 IIoT(산업용 IoT), OPC UA, Honeywell Experion PKS/HS R530 전문가입니다.\n"
+        "컨텍스트가 제공된 경우 컨텍스트를 우선 근거로 삼아 한국어로 답변합니다.\n"
+        "컨텍스트 출처가 'Experion HS R530 공식 문서'인지 'ExperionCrawler 코드'인지 명확히 구분하여 설명합니다."
+    )
+    user_msg = f"컨텍스트:\n{context}\n\n질문: {question}" if context else question
+    resp = _llm().chat.completions.create(
+        model=VLLM_MODEL,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user",   "content": user_msg},
+        ],
+        max_tokens=2048,
+        temperature=0.1,
+    )
+    return resp.choices[0].message.content or "(응답 없음)"
+
+
+@mcp.tool()
+def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
+    """검색 → GLM-4.7-Flash 답변 생성 (통합 RAG).
+
+    기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
+    ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
+
+    사용 시점: Experion HS R530 제품 질문이나 ExperionCrawler 코드 질문에
+    검색+LLM 답변을 한 번에 얻고 싶을 때.
+
+    Args:
+        question:    질문
+        search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True)
+        search_code: ExperionCrawler 소스코드 검색 여부 (기본 False)
+    """
+    context_parts: list[str] = []
+    if search_docs:
+        context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{_search(COL_OPC_DOCS, question, 4)}")
+    if search_code:
+        context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{_search(COL_CODEBASE, question, 3)}")
+
+    return ask_iiot_llm(question, "\n\n".join(context_parts))
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
--- a/mcp-server/uv.lock
+++ b/mcp-server/uv.lock