MCP-서버 리팩토링 후 P&ID 추출 테스트전 다른 기능 확인 후 커밋
This commit is contained in:
229
mcp-server/worker/rag_worker.py
Normal file
229
mcp-server/worker/rag_worker.py
Normal file
@@ -0,0 +1,229 @@
|
||||
#!/usr/bin/env python3
|
||||
"""RAG 전용 워커 프로세스
|
||||
|
||||
Usage: python rag_worker.py <port>
|
||||
|
||||
담당 도구:
|
||||
search_codebase, search_r530_docs, ask_iiot_llm, rag_query
|
||||
|
||||
특징:
|
||||
- Ollama Embedding + Qdrant 검색 + vLLM LLM 조합
|
||||
- 메모리: ~200MB (워커 자체, vLLM 외부 서비스 사용 시)
|
||||
- 생명주기: 메인 서버 종료 시까지 유지
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
import os
|
||||
|
||||
# mcp-server 디렉토리를 Python 경로에 추가 (pipeline 패키지 접근)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
from functools import lru_cache
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
import uvicorn
|
||||
import httpx
|
||||
|
||||
# ── 설정 ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
|
||||
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "http://localhost:8000/v1")
|
||||
VLLM_MODEL = os.environ.get("VLLM_MODEL", "Qwen3.6-27B-FP8")
|
||||
EMBED_MODEL = os.environ.get("EMBED_MODEL", "nomic-embed-text")
|
||||
|
||||
COL_CODEBASE = os.environ.get("COL_CODEBASE", "ws-65f457145aee80b2")
|
||||
COL_OPC_DOCS = os.environ.get("COL_OPC_DOCS", "experion-opc-docs")
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
stream=sys.stderr,
|
||||
format="%(asctime)s [rag_worker] %(levelname)s %(message)s",
|
||||
)
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# ── HTTP 클라이언트 싱글톤 ────────────────────────────────────────────────────
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _get_http_client():
|
||||
return httpx.AsyncClient(timeout=30)
|
||||
|
||||
# ── 임베딩 (Ollama) ───────────────────────────────────────────────────────────
|
||||
|
||||
async def _embed(text: str) -> list[float]:
|
||||
"""Ollama nomic-embed-text로 768-dim 벡터 생성."""
|
||||
async with _get_http_client() as client:
|
||||
resp = await client.post(
|
||||
f"{OLLAMA_URL}/api/embeddings",
|
||||
json={"model": EMBED_MODEL, "prompt": text},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["embedding"]
|
||||
|
||||
# ── Qdrant 검색 ──────────────────────────────────────────────────────────────
|
||||
|
||||
async def _qdrant_search(collection: str, query_vector: list[float], top_k: int = 6) -> list[dict]:
|
||||
"""Qdrant에서 벡터 유사도 검색."""
|
||||
async with _get_http_client() as client:
|
||||
resp = await client.post(
|
||||
f"{QDRANT_URL}/collections/{collection}/points/search",
|
||||
json={
|
||||
"vector": query_vector,
|
||||
"limit": top_k,
|
||||
"with_payload": True,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", [])
|
||||
|
||||
# ── LLM (vLLM) ───────────────────────────────────────────────────────────────
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _llm_client():
|
||||
from openai import AsyncOpenAI
|
||||
return AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
|
||||
|
||||
async def _ask_llm(question: str, context: str = "") -> str:
|
||||
"""vLLM LLM으로 질문 응답."""
|
||||
client = _llm_client()
|
||||
|
||||
if context:
|
||||
prompt = f"""주어진 컨텍스트를 바탕으로 질문에 답변하세요.
|
||||
|
||||
컨텍스트:
|
||||
{context}
|
||||
|
||||
질문:
|
||||
{question}
|
||||
|
||||
답변:"""
|
||||
else:
|
||||
prompt = question
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model=VLLM_MODEL,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
max_tokens=4096,
|
||||
temperature=0.1,
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
|
||||
# ── RAG 도구 구현 ─────────────────────────────────────────────────────────────
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
"""워커 헬스체크."""
|
||||
return {"status": "ok"}
|
||||
|
||||
@app.post("/execute")
|
||||
async def execute(request: Request):
|
||||
"""HTTP 요청을 MCP 도구 호출로 변환."""
|
||||
body = await request.json()
|
||||
tool = body["tool"]
|
||||
params = body["params"]
|
||||
|
||||
try:
|
||||
if tool == "search_codebase":
|
||||
result = await _search_codebase(**params)
|
||||
elif tool == "search_r530_docs":
|
||||
result = await _search_r530_docs(**params)
|
||||
elif tool == "ask_iiot_llm":
|
||||
result = await _ask_iiot_llm(**params)
|
||||
elif tool == "rag_query":
|
||||
result = await _rag_query(**params)
|
||||
else:
|
||||
return {"success": False, "error": f"Unknown tool: {tool}"}
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"Error executing {tool}: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
async def _search_codebase(query: str, top_k: int = 6) -> str:
|
||||
"""소스코드 검색."""
|
||||
query_vector = await _embed(query)
|
||||
results = await _qdrant_search(COL_CODEBASE, query_vector, top_k)
|
||||
|
||||
items = []
|
||||
for hit in results:
|
||||
payload = hit.get("payload", {})
|
||||
items.append({
|
||||
"score": hit.get("score", 0),
|
||||
"file": payload.get("file", "unknown"),
|
||||
"content": payload.get("content", "")[:500],
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"count": len(items),
|
||||
"items": items,
|
||||
}
|
||||
|
||||
async def _search_r530_docs(query: str, top_k: int = 5) -> str:
|
||||
"""Experion HS R530 공식 문서 검색."""
|
||||
query_vector = await _embed(query)
|
||||
results = await _qdrant_search(COL_OPC_DOCS, query_vector, top_k)
|
||||
|
||||
items = []
|
||||
for hit in results:
|
||||
payload = hit.get("payload", {})
|
||||
items.append({
|
||||
"score": hit.get("score", 0),
|
||||
"title": payload.get("title", "unknown"),
|
||||
"content": payload.get("content", "")[:500],
|
||||
})
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"count": len(items),
|
||||
"items": items,
|
||||
}
|
||||
|
||||
async def _ask_iiot_llm(question: str, context: str = "") -> str:
|
||||
"""IIoT/OPC UA 질문 응답."""
|
||||
answer = await _ask_llm(question, context)
|
||||
return {
|
||||
"success": True,
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
}
|
||||
|
||||
async def _rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
|
||||
"""통합 RAG 검색."""
|
||||
contexts = []
|
||||
|
||||
if search_code:
|
||||
query_vector = await _embed(question)
|
||||
code_results = await _qdrant_search(COL_CODEBASE, query_vector, 3)
|
||||
for hit in code_results:
|
||||
contexts.append(hit.get("payload", {}).get("content", ""))
|
||||
|
||||
if search_docs:
|
||||
query_vector = await _embed(question)
|
||||
doc_results = await _qdrant_search(COL_OPC_DOCS, query_vector, 3)
|
||||
for hit in doc_results:
|
||||
contexts.append(hit.get("payload", {}).get("content", ""))
|
||||
|
||||
context = "\n\n".join(contexts[:5])
|
||||
answer = await _ask_llm(question, context)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"question": question,
|
||||
"context_count": len(contexts),
|
||||
"answer": answer,
|
||||
}
|
||||
|
||||
# ── 메인 ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
port = int(sys.argv[1]) if len(sys.argv) > 1 else 5002
|
||||
logging.info(f"Starting RAG worker on port {port}")
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
Reference in New Issue
Block a user