feat: LLM 모델명 외부 설정 + 포인트 추가 기능

- mcp-server: 하드코딩된 모델명을 llm-model.json 기반 config.py로 외부화 - C#: AppendPointsAsync로 기존 데이터 유지하면서 포인트 추가 - C#: LlmConfigController로 LLM 모델명 조회/저장 API - Frontend: LLM 설정 UI 카드 + 포인트 빌더에서 추가하기 버튼
2026-05-11 17:55:18 +09:00
parent de728f013a
commit 5cacc5dbb5
15 changed files with 544 additions and 35 deletions
--- a/mcp-server/worker/nl2sql_worker.py
+++ b/mcp-server/worker/nl2sql_worker.py
@@ -34,7 +34,8 @@ DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://post
 DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))

 VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
+from config import get_vllm_model
+VLLM_MODEL = get_vllm_model()

 logging.basicConfig(
    level=logging.INFO,
@@ -150,7 +151,7 @@ async def _generate_sql(natural_language: str) -> str:
    )
    
    response = await client.chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": natural_language},
--- a/mcp-server/worker/pid_extract_template.py
+++ b/mcp-server/worker/pid_extract_template.py
@@ -10,7 +10,7 @@

 환경 변수:
    VLLM_BASE_URL: vLLM 엔드포인트 (기본: http://localhost:8000/v1)
-    VLLM_MODEL: 모델명 (기본: Qwen3.6-27B-FP8)
+    VLLM_MODEL: 모델명 (기본: llm-model.json 참조)
 """

 import argparse
@@ -22,6 +22,9 @@ import sys
 import time
 from typing import List

+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from config import get_vllm_model
+
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(name)s] %(levelname)s %(message)s",
@@ -84,7 +87,7 @@ def call_llm(system_prompt: str, user_text: str, max_tokens: int = 65536) -> Lis
    from openai import OpenAI
    
    base_url = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-    model = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
+    model = os.environ.get("VLLM_MODEL") or get_vllm_model()
    
    client = OpenAI(base_url=base_url, api_key="dummy")
    
--- a/mcp-server/worker/pid_worker.py
+++ b/mcp-server/worker/pid_worker.py
@@ -30,7 +30,8 @@ import uvicorn
 # ── 설정 ─────────────────────────────────────────────────────────────────────

 VLLM_BASE_URL        = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-VLLM_MODEL           = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
+from config import get_vllm_model
+VLLM_MODEL           = get_vllm_model()
 DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
 DB_TIMEOUT           = int(os.environ.get("DB_TIMEOUT", "10"))

@@ -173,7 +174,7 @@ def _extract_pid_tags(text: str, source_type: str) -> str:
    )
    truncated = text[:100000]
    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated}"},
@@ -202,7 +203,7 @@ def _match_pid_tags(pid_tags: list, experion_tags: list) -> str:
        "- Output ONLY the JSON array.\n"
    )
    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": (
@@ -247,7 +248,7 @@ def _parse_pid_dxf(filepath: str) -> str:
                          ensure_ascii=False, indent=2)

    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": _TAG_EXTRACT_SYSTEM},
            {"role": "user", "content": f"Source: dxf\n\nText:\n{text[:8000]}"},
@@ -273,7 +274,7 @@ def _parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
                          ensure_ascii=False, indent=2)

    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": _TAG_EXTRACT_SYSTEM},
            {"role": "user", "content": f"Source: pdf\n\nText:\n{text[:12000]}"},
--- a/mcp-server/worker/pid_worker_test.py
+++ b/mcp-server/worker/pid_worker_test.py
@@ -31,7 +31,8 @@ import uvicorn
 # ── 설정 ─────────────────────────────────────────────────────────────────────

 VLLM_BASE_URL        = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-VLLM_MODEL           = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
+from config import get_vllm_model
+VLLM_MODEL           = get_vllm_model()
 DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
 DB_TIMEOUT           = int(os.environ.get("DB_TIMEOUT", "10"))

@@ -174,7 +175,7 @@ def _extract_pid_tags(text: str, source_type: str) -> str:
    )
    truncated = text[:100000]
    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated}"},
@@ -203,7 +204,7 @@ def _match_pid_tags(pid_tags: list, experion_tags: list) -> str:
        "- Output ONLY the JSON array.\n"
    )
    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": (
@@ -248,7 +249,7 @@ def _parse_pid_dxf(filepath: str) -> str:
                          ensure_ascii=False, indent=2)

    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": _TAG_EXTRACT_SYSTEM},
            {"role": "user", "content": f"Source: dxf\n\nText:\n{text[:8000]}"},
@@ -274,7 +275,7 @@ def _parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
                          ensure_ascii=False, indent=2)

    resp = _llm().chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": _TAG_EXTRACT_SYSTEM},
            {"role": "user", "content": f"Source: pdf\n\nText:\n{text[:12000]}"},
--- a/mcp-server/worker/rag_worker.py
+++ b/mcp-server/worker/rag_worker.py
@@ -32,7 +32,8 @@ import httpx
 OLLAMA_URL      = os.environ.get("OLLAMA_URL", "http://localhost:11434")
 QDRANT_URL      = os.environ.get("QDRANT_URL", "http://localhost:6333")
 VLLM_BASE_URL   = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
-VLLM_MODEL      = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
+from config import get_vllm_model
+VLLM_MODEL      = get_vllm_model()
 EMBED_MODEL     = os.environ.get("EMBED_MODEL", "nomic-embed-text")

 COL_CODEBASE    = os.environ.get("COL_CODEBASE", "ws-65f457145aee80b2")
@@ -105,7 +106,7 @@ async def _ask_llm(question: str, context: str = "") -> str:
        prompt = question
    
    response = await client.chat.completions.create(
-        model="Qwen3.6-27B-FP8",
+        model=VLLM_MODEL,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},