feat: LLM 모델명 외부 설정 + 포인트 추가 기능
- mcp-server: 하드코딩된 모델명을 llm-model.json 기반 config.py로 외부화 - C#: AppendPointsAsync로 기존 데이터 유지하면서 포인트 추가 - C#: LlmConfigController로 LLM 모델명 조회/저장 API - Frontend: LLM 설정 UI 카드 + 포인트 빌더에서 추가하기 버튼
This commit is contained in:
21
mcp-server/config.py
Normal file
21
mcp-server/config.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
_SERVER_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
_MODEL_FILE = os.path.join(_SERVER_DIR, "llm-model.json")
|
||||
|
||||
_DEFAULT_MODEL = "Qwen3.6-27B-FP8"
|
||||
|
||||
|
||||
def get_vllm_model() -> str:
|
||||
env = os.environ.get("VLLM_MODEL")
|
||||
if env:
|
||||
return env
|
||||
if not os.path.isfile(_MODEL_FILE):
|
||||
return _DEFAULT_MODEL
|
||||
try:
|
||||
with open(_MODEL_FILE, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data.get("vllm_model", _DEFAULT_MODEL)
|
||||
except Exception:
|
||||
return _DEFAULT_MODEL
|
||||
3
mcp-server/llm-model.json
Normal file
3
mcp-server/llm-model.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"vllm_model": "Qwen3.6-27B-FP8"
|
||||
}
|
||||
@@ -13,10 +13,11 @@ class MappingResult(BaseModel):
|
||||
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score from 0 to 1")
|
||||
|
||||
class IntelligentMapper:
|
||||
def __init__(self, graph: nx.Graph, system_tags: List[str], api_client: Optional[AsyncOpenAI] = None):
|
||||
self.graph = graph # Phase 2에서 생성된 NetworkX 그래프
|
||||
self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
|
||||
def __init__(self, graph: nx.Graph, system_tags: List[str], api_client: Optional[AsyncOpenAI] = None, model_name: str = "Qwen3.6-27B-FP8"):
|
||||
self.graph = graph
|
||||
self.system_tags = system_tags
|
||||
self.client = api_client
|
||||
self.model_name = model_name
|
||||
|
||||
def get_node_context(self, node_id: str) -> str:
|
||||
"""노드의 주변 위상 정보를 텍스트로 변환 (확장된 컨텍스트 제공)"""
|
||||
@@ -84,7 +85,7 @@ class IntelligentMapper:
|
||||
|
||||
try:
|
||||
response = await self.client.chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8", # MCP 서버 설정 모델 사용
|
||||
model=model_name,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
response_format={ "type": "json_object" }
|
||||
)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
ExperionCrawler Unified MCP Server
|
||||
- RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen3.6-27B-FP8
|
||||
- RAG: Qdrant + Ollama nomic-embed-text + vLLM (llm-model.json)
|
||||
- NL2SQL: 자연어 → LLM SQL 생성 → PostgreSQL 실행
|
||||
- 사용처:
|
||||
stdio 모드 (기본): Claude Code MCP / Roo Code MCP
|
||||
@@ -24,7 +24,8 @@ QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
from config import get_vllm_model
|
||||
VLLM_MODEL = get_vllm_model()
|
||||
|
||||
# Qdrant 컬렉션
|
||||
COL_CODEBASE = "ws-65f457145aee80b2" # ExperionCrawler 소스코드
|
||||
@@ -67,7 +68,7 @@ async def _embed(text: str) -> list[float]:
|
||||
|
||||
return await asyncio.to_thread(_call_embed)
|
||||
|
||||
# ── LLM (vLLM / Qwen3.6-27B-FP8) ─────────────────────────────────────
|
||||
# ── LLM (vLLM) ──────────────────────────────────────────────────────
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _llm():
|
||||
@@ -377,7 +378,7 @@ async def search_r530_docs(query: str, top_k: int = 5) -> str:
|
||||
|
||||
@mcp.tool()
|
||||
def ask_iiot_llm(question: str, context: str = "") -> str:
|
||||
"""Qwen3.6-27B-FP8에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
|
||||
"""LLM에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
|
||||
|
||||
사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨
|
||||
종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문.
|
||||
@@ -393,7 +394,7 @@ def ask_iiot_llm(question: str, context: str = "") -> str:
|
||||
)
|
||||
user_msg = f"컨텍스트:\n{context}\n\n질문: {question}" if context else question
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user_msg},
|
||||
@@ -406,7 +407,7 @@ def ask_iiot_llm(question: str, context: str = "") -> str:
|
||||
|
||||
@mcp.tool()
|
||||
async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
|
||||
"""검색 → Qwen3.6-27B-FP8 답변 생성 (통합 RAG).
|
||||
"""검색 → LLM 답변 생성 (통합 RAG).
|
||||
|
||||
기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
|
||||
ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
|
||||
@@ -612,7 +613,7 @@ async def query_with_nl(question: str) -> str:
|
||||
try:
|
||||
def _call_llm():
|
||||
return _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": question},
|
||||
@@ -699,7 +700,7 @@ async def extract_pid_tags(text: str, source_type: str) -> str:
|
||||
|
||||
def _call_llm():
|
||||
return _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated_text}"},
|
||||
@@ -805,7 +806,7 @@ async def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str:
|
||||
|
||||
def _call_llm():
|
||||
return _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"P&ID Tags:\n{pid_str}\n\nExperion Tags:\n{experion_str}"},
|
||||
@@ -896,7 +897,7 @@ async def parse_pid_dxf(filepath: str) -> str:
|
||||
|
||||
def _call_llm():
|
||||
return _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Source: dxf\n\nText:\n{truncated_text}"},
|
||||
@@ -1009,7 +1010,7 @@ async def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
|
||||
|
||||
def _call_llm():
|
||||
return _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Source: pdf\n\nText:\n{truncated_text}"},
|
||||
@@ -1111,7 +1112,7 @@ async def build_pid_graph_parallel(filepath: str) -> str:
|
||||
# Mapper 설정
|
||||
from openai import AsyncOpenAI
|
||||
api_client = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
|
||||
mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client)
|
||||
mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client, model_name=VLLM_MODEL)
|
||||
|
||||
# 분류별 노드 분리
|
||||
nodes = list(builder.G.nodes())
|
||||
|
||||
@@ -34,7 +34,8 @@ DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://post
|
||||
DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))
|
||||
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
from config import get_vllm_model
|
||||
VLLM_MODEL = get_vllm_model()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
@@ -150,7 +151,7 @@ async def _generate_sql(natural_language: str) -> str:
|
||||
)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": natural_language},
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
|
||||
환경 변수:
|
||||
VLLM_BASE_URL: vLLM 엔드포인트 (기본: http://localhost:8000/v1)
|
||||
VLLM_MODEL: 모델명 (기본: Qwen3.6-27B-FP8)
|
||||
VLLM_MODEL: 모델명 (기본: llm-model.json 참조)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -22,6 +22,9 @@ import sys
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from config import get_vllm_model
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(name)s] %(levelname)s %(message)s",
|
||||
@@ -84,7 +87,7 @@ def call_llm(system_prompt: str, user_text: str, max_tokens: int = 65536) -> Lis
|
||||
from openai import OpenAI
|
||||
|
||||
base_url = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
model = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
model = os.environ.get("VLLM_MODEL") or get_vllm_model()
|
||||
|
||||
client = OpenAI(base_url=base_url, api_key="dummy")
|
||||
|
||||
|
||||
@@ -30,7 +30,8 @@ import uvicorn
|
||||
# ── 설정 ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
from config import get_vllm_model
|
||||
VLLM_MODEL = get_vllm_model()
|
||||
DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
|
||||
DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))
|
||||
|
||||
@@ -173,7 +174,7 @@ def _extract_pid_tags(text: str, source_type: str) -> str:
|
||||
)
|
||||
truncated = text[:100000]
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated}"},
|
||||
@@ -202,7 +203,7 @@ def _match_pid_tags(pid_tags: list, experion_tags: list) -> str:
|
||||
"- Output ONLY the JSON array.\n"
|
||||
)
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": (
|
||||
@@ -247,7 +248,7 @@ def _parse_pid_dxf(filepath: str) -> str:
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": _TAG_EXTRACT_SYSTEM},
|
||||
{"role": "user", "content": f"Source: dxf\n\nText:\n{text[:8000]}"},
|
||||
@@ -273,7 +274,7 @@ def _parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": _TAG_EXTRACT_SYSTEM},
|
||||
{"role": "user", "content": f"Source: pdf\n\nText:\n{text[:12000]}"},
|
||||
|
||||
@@ -31,7 +31,8 @@ import uvicorn
|
||||
# ── 설정 ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
from config import get_vllm_model
|
||||
VLLM_MODEL = get_vllm_model()
|
||||
DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
|
||||
DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))
|
||||
|
||||
@@ -174,7 +175,7 @@ def _extract_pid_tags(text: str, source_type: str) -> str:
|
||||
)
|
||||
truncated = text[:100000]
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated}"},
|
||||
@@ -203,7 +204,7 @@ def _match_pid_tags(pid_tags: list, experion_tags: list) -> str:
|
||||
"- Output ONLY the JSON array.\n"
|
||||
)
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": (
|
||||
@@ -248,7 +249,7 @@ def _parse_pid_dxf(filepath: str) -> str:
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": _TAG_EXTRACT_SYSTEM},
|
||||
{"role": "user", "content": f"Source: dxf\n\nText:\n{text[:8000]}"},
|
||||
@@ -274,7 +275,7 @@ def _parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
|
||||
ensure_ascii=False, indent=2)
|
||||
|
||||
resp = _llm().chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": _TAG_EXTRACT_SYSTEM},
|
||||
{"role": "user", "content": f"Source: pdf\n\nText:\n{text[:12000]}"},
|
||||
|
||||
@@ -32,7 +32,8 @@ import httpx
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
from config import get_vllm_model
|
||||
VLLM_MODEL = get_vllm_model()
|
||||
EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
|
||||
|
||||
COL_CODEBASE = os.environ.get("COL_CODEBASE", "ws-65f457145aee80b2")
|
||||
@@ -105,7 +106,7 @@ async def _ask_llm(question: str, context: str = "") -> str:
|
||||
prompt = question
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="Qwen3.6-27B-FP8",
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": prompt},
|
||||
|
||||
Reference in New Issue
Block a user