feat: LLM 모델명 외부 설정 + 포인트 추가 기능

- mcp-server: 하드코딩된 모델명을 llm-model.json 기반 config.py로 외부화 - C#: AppendPointsAsync로 기존 데이터 유지하면서 포인트 추가 - C#: LlmConfigController로 LLM 모델명 조회/저장 API - Frontend: LLM 설정 UI 카드 + 포인트 빌더에서 추가하기 버튼
2026-05-11 17:55:18 +09:00
parent de728f013a
commit 5cacc5dbb5
15 changed files with 544 additions and 35 deletions
--- a/mcp-server/server.py
+++ b/mcp-server/server.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 ExperionCrawler Unified MCP Server
- RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen3.6-27B-FP8
+- RAG: Qdrant + Ollama nomic-embed-text + vLLM (llm-model.json)
 - NL2SQL: 자연어 → LLM SQL 생성 → PostgreSQL 실행
 - 사용처:
    stdio 모드 (기본): Claude Code MCP / Roo Code MCP
@@ -24,7 +24,8 @@ QDRANT_URL      = os.environ.get("QDRANT_URL",    "http://localhost:6333")
 OLLAMA_URL      = os.environ.get("OLLAMA_URL",    "http://localhost:11434")
 EMBED_MODEL     = os.environ.get("EMBED_MODEL",   "nomic-embed-text")
 VLLM_BASE_URL   = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-VLLM_MODEL      = os.environ.get("VLLM_MODEL",    "Qwen3.6-27B-FP8")
+from config import get_vllm_model
+VLLM_MODEL      = get_vllm_model()

 # Qdrant 컬렉션
 COL_CODEBASE    = "ws-65f457145aee80b2"     # ExperionCrawler 소스코드
@@ -67,7 +68,7 @@ async def _embed(text: str) -> list[float]:
    
    return await asyncio.to_thread(_call_embed)

-# ── LLM (vLLM / Qwen3.6-27B-FP8) ─────────────────────────────────────
+    # ── LLM (vLLM) ──────────────────────────────────────────────────────

@lru_cache(maxsize=1)
 def _llm():
@@ -377,7 +378,7 @@ async def search_r530_docs(query: str, top_k: int = 5) -> str:

@mcp.tool()
 def ask_iiot_llm(question: str, context: str = "") -> str:
-    """Qwen3.6-27B-FP8에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
+    """LLM에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).

    사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨
    종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문.
@@ -393,7 +394,7 @@ def ask_iiot_llm(question: str, context: str = "") -> str:
    )
    user_msg = f"컨텍스트:\n{context}\n\n질문: {question}" if context else question
    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user",   "content": user_msg},
@@ -406,7 +407,7 @@ def ask_iiot_llm(question: str, context: str = "") -> str:

@mcp.tool()
 async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
-    """검색 → Qwen3.6-27B-FP8 답변 생성 (통합 RAG).
+    """검색 → LLM 답변 생성 (통합 RAG).

    기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
    ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
@@ -612,7 +613,7 @@ async def query_with_nl(question: str) -> str:
    try:
        def _call_llm():
            return _llm().chat.completions.create(
-                model="Qwen3.6-27B-FP8",
+                model=VLLM_MODEL,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user",   "content": question},
@@ -699,7 +700,7 @@ async def extract_pid_tags(text: str, source_type: str) -> str:

        def _call_llm():
            return _llm().chat.completions.create(
-                model="Qwen3.6-27B-FP8",
+                model=VLLM_MODEL,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated_text}"},
@@ -805,7 +806,7 @@ async def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str:

        def _call_llm():
            return _llm().chat.completions.create(
-                model="Qwen3.6-27B-FP8",
+                model=VLLM_MODEL,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": f"P&ID Tags:\n{pid_str}\n\nExperion Tags:\n{experion_str}"},
@@ -896,7 +897,7 @@ async def parse_pid_dxf(filepath: str) -> str:
        
        def _call_llm():
            return _llm().chat.completions.create(
-                model="Qwen3.6-27B-FP8",
+                model=VLLM_MODEL,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": f"Source: dxf\n\nText:\n{truncated_text}"},
@@ -1009,7 +1010,7 @@ async def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
        
        def _call_llm():
            return _llm().chat.completions.create(
-                model="Qwen3.6-27B-FP8",
+                model=VLLM_MODEL,
                messages=[
                    {"role": "system", "content": system},
                    {"role": "user", "content": f"Source: pdf\n\nText:\n{truncated_text}"},
@@ -1111,7 +1112,7 @@ async def build_pid_graph_parallel(filepath: str) -> str:
        # Mapper 설정
        from openai import AsyncOpenAI
        api_client = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
-        mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client)
+        mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client, model_name=VLLM_MODEL)
        
        # 분류별 노드 분리
        nodes = list(builder.G.nodes())