feat: Knowledge Base RAG 시스템 + 채팅 LLM 개선 (Phase 0~5 완료)

- KB RAG 전체 파이프라인: 업로드, 파싱(xlsx/pdf/docx/text), 임베딩, Qdrant 인덱싱 - KB 관리 UI(14번 탭): 로그인, 문서 목록, 업로드, 삭제, 재인덱스 - OllamaController: 한글 시스템 프롬프트, plant_context.md 외부 파일화, SSE tool_start/tool_result 이벤트 - 프론트: 툴 실행 카드, KB 인용 링크, 표 자동 렌더, 추천 질문 칩 - nl2sql_worker: history_table.recorded_at 사용, tag_metadata 응답 개선 - DB: KB 테이블 5개 DDL + 시드, pgcrypto 확장
2026-05-13 20:22:27 +09:00
parent 35136ba91e
commit 908bfe151f
32 changed files with 3202 additions and 91 deletions
--- a/mcp-server/parsers/init.py
+++ b/mcp-server/parsers/init.py
@@ -0,0 +1,8 @@
+"""KB 문서 파서 모음.
+
+각 모듈은 `parse(path: str) -> list[dict]` 인터페이스를 제공한다.
+반환 청크는 다음 키를 가진다:
+  text:       str   임베딩 대상 본문 (보통 200~1500자)
+  chunk_kind: str   row | sheet | section | table | page | paragraph | heading
+  locator:    str   사람 가독 위치 문자열 (예: "sheet=Pump-A; row=12")
+"""
--- a/mcp-server/parsers/docx_parser.py
+++ b/mcp-server/parsers/docx_parser.py
@@ -0,0 +1,41 @@
+"""docx 청킹 — 헤딩 경로 별 청크."""
+from __future__ import annotations
+
+
+def parse(path: str) -> list[dict]:
+    from docx import Document
+
+    doc = Document(path)
+    chunks: list[dict] = []
+
+    cur_path: list[str] = []
+    buf: list[str] = []
+
+    def flush():
+        if buf:
+            heading = " / ".join(cur_path) if cur_path else "preface"
+            chunks.append({
+                "text": "\n".join(buf).strip(),
+                "chunk_kind": "heading",
+                "locator": f"heading={heading}",
+            })
+
+    for p in doc.paragraphs:
+        text = (p.text or "").strip()
+        if not text:
+            continue
+
+        style_name = (p.style.name or "").lower() if p.style else ""
+        if style_name.startswith("heading"):
+            flush()
+            buf = []
+            try:
+                level = int(style_name.split()[-1])
+            except (ValueError, IndexError):
+                level = 1
+            cur_path = cur_path[: max(0, level - 1)] + [text]
+        else:
+            buf.append(text)
+
+    flush()
+    return chunks
--- a/mcp-server/parsers/pdf_parser.py
+++ b/mcp-server/parsers/pdf_parser.py
@@ -0,0 +1,34 @@
+"""pdf 청킹 — pdfplumber로 페이지/표 추출, 헤딩 분리 실패 시 페이지 단위 fallback."""
+from __future__ import annotations
+
+
+def parse(path: str) -> list[dict]:
+    import pdfplumber
+
+    chunks: list[dict] = []
+    with pdfplumber.open(path) as pdf:
+        for pno, page in enumerate(pdf.pages, start=1):
+            txt = (page.extract_text() or "").strip()
+            if txt:
+                chunks.append({
+                    "text": txt[:5000],
+                    "chunk_kind": "page",
+                    "locator": f"page={pno}",
+                })
+
+            try:
+                tables = page.extract_tables() or []
+            except Exception:
+                tables = []
+            for ti, table in enumerate(tables, start=1):
+                rows = [[(c or "").strip() for c in row] for row in table if row]
+                if not rows:
+                    continue
+                md = "\n".join(" | ".join(r) for r in rows[:200])
+                chunks.append({
+                    "text": md,
+                    "chunk_kind": "table",
+                    "locator": f"page={pno}; table={ti}",
+                })
+
+    return chunks
--- a/mcp-server/parsers/text_parser.py
+++ b/mcp-server/parsers/text_parser.py
@@ -0,0 +1,56 @@
+"""md / txt 청킹 — md는 # 헤딩 단위, txt는 빈 줄 두 개 단위."""
+from __future__ import annotations
+import os
+
+
+def parse(path: str) -> list[dict]:
+    ext = os.path.splitext(path)[1].lower()
+    with open(path, "r", encoding="utf-8", errors="ignore") as f:
+        content = f.read()
+
+    if ext == ".md":
+        return _parse_md(content)
+    return _parse_txt(content)
+
+
+def _parse_md(text: str) -> list[dict]:
+    chunks: list[dict] = []
+    lines = text.split("\n")
+
+    cur_heading = "preface"
+    buf: list[str] = []
+    section_idx = 0
+
+    def flush():
+        nonlocal section_idx
+        body = "\n".join(buf).strip()
+        if body:
+            section_idx += 1
+            chunks.append({
+                "text": body,
+                "chunk_kind": "heading",
+                "locator": f"heading={cur_heading}",
+            })
+
+    for ln in lines:
+        s = ln.lstrip()
+        if s.startswith("#"):
+            flush()
+            buf = []
+            cur_heading = s.lstrip("#").strip() or "section"
+        else:
+            buf.append(ln)
+    flush()
+    return chunks
+
+
+def _parse_txt(text: str) -> list[dict]:
+    chunks: list[dict] = []
+    parts = [p.strip() for p in text.split("\n\n") if p.strip()]
+    for i, p in enumerate(parts, start=1):
+        chunks.append({
+            "text": p,
+            "chunk_kind": "paragraph",
+            "locator": f"paragraph={i}",
+        })
+    return chunks
--- a/mcp-server/parsers/xlsx_parser.py
+++ b/mcp-server/parsers/xlsx_parser.py
@@ -0,0 +1,49 @@
+"""xlsx 청킹 — 시트 단위(markdown) + 행 단위 둘 다 생성."""
+from __future__ import annotations
+
+
+def parse(path: str) -> list[dict]:
+    from openpyxl import load_workbook
+
+    wb = load_workbook(path, read_only=True, data_only=True)
+    chunks: list[dict] = []
+
+    for sheet in wb.worksheets:
+        rows = list(sheet.iter_rows(values_only=True))
+        if not rows:
+            continue
+
+        header = [str(c) if c is not None else "" for c in rows[0]]
+        sheet_name = sheet.title
+
+        # 1) 시트 청크 — markdown 표 (선두 1000행 제한)
+        body_rows = rows[1:1001]
+        md_lines = ["| " + " | ".join(header) + " |",
+                    "| " + " | ".join(["---"] * len(header)) + " |"]
+        for r in body_rows:
+            cells = [str(c) if c is not None else "" for c in r]
+            cells += [""] * (len(header) - len(cells))
+            md_lines.append("| " + " | ".join(cells[: len(header)]) + " |")
+        chunks.append({
+            "text": "\n".join(md_lines),
+            "chunk_kind": "sheet",
+            "locator": f"sheet={sheet_name}",
+        })
+
+        # 2) 행 청크 — 각 행을 'col=val' 형식 한 줄로
+        for i, r in enumerate(rows[1:], start=2):
+            parts = []
+            for j, val in enumerate(r):
+                if val is None or val == "":
+                    continue
+                col = header[j] if j < len(header) and header[j] else f"col{j+1}"
+                parts.append(f"{col}={val}")
+            if not parts:
+                continue
+            chunks.append({
+                "text": f"{sheet_name}: " + ", ".join(parts),
+                "chunk_kind": "row",
+                "locator": f"sheet={sheet_name}; row={i}",
+            })
+
+    return chunks
--- a/mcp-server/pyproject.toml
+++ b/mcp-server/pyproject.toml
@@ -24,6 +24,10 @@ dependencies = [
    "scikit-learn>=1.3.0",
    "numpy>=1.24.0",
    "Pillow>=10.0.0",
+    # KB 문서 파싱
+    "openpyxl>=3.1.0",
+    "python-docx>=1.1.0",
+    "pdfplumber>=0.11.0",
 ]

 [project.scripts]
--- a/mcp-server/server.py
+++ b/mcp-server/server.py
@@ -31,6 +31,15 @@ VLLM_MODEL      = get_vllm_model()
 COL_CODEBASE    = "ws-65f457145aee80b2"     # ExperionCrawler 소스코드
 COL_OPC_DOCS    = "experion-opc-docs"       # Experion HS R530 OPC UA 공식 문서 (266 chunks)

+# 사용자 KB 컬렉션 (kb_collections 시드 5종과 일치)
+KB_COLLECTIONS  = {
+    "system_instrument": "kb_system_instrument",
+    "plant_operation":   "kb_plant_operation",
+    "procedure":         "kb_procedure",
+    "report":            "kb_report",
+    "vendor_doc":        "kb_vendor_doc",
+}
+
 # PostgreSQL 연결
 DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
 DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))
@@ -248,6 +257,60 @@ async def _search(collection: str, query: str, top_k: int, threshold: float = 0.

    return "\n\n---\n\n".join(parts)

+
+async def _search_kb_collection(
+    qdrant_name: str,
+    vec: list[float],
+    top_k: int,
+    tags: list[str] | None = None,
+) -> list[dict]:
+    """KB 컬렉션 1개에 대해 의미 검색. 결과를 정규화된 dict 리스트로 반환."""
+    must = []
+    if tags:
+        must.append({"key": "tags", "match": {"any": tags}})
+
+    body: dict = {
+        "vector": vec,
+        "limit": top_k,
+        "with_payload": True,
+        "score_threshold": 0.20,
+    }
+    if must:
+        body["filter"] = {"must": must}
+
+    def _call():
+        with httpx.Client(timeout=20) as client:
+            resp = client.post(f"{QDRANT_URL}/collections/{qdrant_name}/points/search", json=body)
+            if resp.status_code == 404:
+                return []
+            resp.raise_for_status()
+            return resp.json().get("result", [])
+
+    try:
+        return await asyncio.to_thread(_call)
+    except Exception as e:
+        logging.warning(f"[search_kb] {qdrant_name} 검색 실패: {e}")
+        return []
+
+
+def _recency_factor(uploaded_at_iso: str | None) -> float:
+    """uploaded_at 기준 최신 가중치. 최근 7일 +10%, 30일 +5%, 90일 +2%, 그 외 1.0."""
+    if not uploaded_at_iso:
+        return 1.0
+    try:
+        from datetime import datetime, timezone
+        ts = datetime.fromisoformat(uploaded_at_iso.replace("Z", "+00:00"))
+        if ts.tzinfo is None:
+            ts = ts.replace(tzinfo=timezone.utc)
+        age = (datetime.now(timezone.utc) - ts).total_seconds() / 86400.0
+        if age < 7:   return 1.10
+        if age < 30:  return 1.05
+        if age < 90:  return 1.02
+        return 1.0
+    except Exception:
+        return 1.0
+
+
 # ── DB 헬퍼 ──────────────────────────────────────────────────────────────────

 async def _get_db_connection():
@@ -406,25 +469,161 @@ def ask_iiot_llm(question: str, context: str = "") -> str:


@mcp.tool()
-async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
+async def rag_query(
+    question: str,
+    search_code: bool = False,
+    search_docs: bool = True,
+    search_kb: bool = False,
+    kb_collections: list[str] | None = None,
+) -> str:
    """검색 → LLM 답변 생성 (통합 RAG).

-    기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
-    ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
+    기본값: Experion HS R530 공식 문서만 검색.
+    사용자 KB 검색을 포함하려면 search_kb=True. 코드 검색은 search_code=True.

    Args:
-        question:    질문
-        search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True)
-        search_code: ExperionCrawler 소스코드 검색 여부 (기본 False)
+        question:       질문
+        search_docs:    Experion HS R530 공식 문서 검색 여부 (기본 True)
+        search_code:    ExperionCrawler 소스코드 검색 여부 (기본 False)
+        search_kb:      사용자 KB 검색 여부 (기본 False)
+        kb_collections: 검색 대상 KB 컬렉션 키 목록. None이면 전체.
+                        예: ["plant_operation", "procedure"]
    """
    context_parts: list[str] = []
    if search_docs:
        context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{await _search(COL_OPC_DOCS, question, 4)}")
    if search_code:
        context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{await _search(COL_CODEBASE, question, 3)}")
+    if search_kb:
+        kb_text = await _format_kb_results(question, kb_collections, top_k=6)
+        context_parts.append(f"=== 사용자 지식 베이스 ===\n{kb_text}")
    return ask_iiot_llm(question, "\n\n".join(context_parts))


+async def _format_kb_results(
+    query: str,
+    collection_keys: list[str] | None,
+    top_k: int,
+    tags: list[str] | None = None,
+    since: str | None = None,
+    boost_recent: bool = True,
+) -> str:
+    """search_kb 내부 헬퍼: 다중 컬렉션 의미검색 후 인용 텍스트로 직렬화."""
+    hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent)
+    if not hits:
+        return "관련 KB 결과 없음."
+
+    parts = []
+    for h in hits:
+        title  = h.get("title") or "(제목없음)"
+        loc    = h.get("locator") or ""
+        score  = h.get("score", 0.0)
+        text   = (h.get("text") or "").strip()
+        # 인용 헤더: "[score=0.812] 정비이력_2026Q1.xlsx > 시트:Pump-A > 행 12"
+        loc_str = f" > {loc}" if loc else ""
+        parts.append(f"[score={score:.3f}] {title}{loc_str}\n{text[:700]}")
+    return "\n\n---\n\n".join(parts)
+
+
+async def _search_kb_raw(
+    query: str,
+    collection_keys: list[str] | None,
+    top_k: int,
+    tags: list[str] | None,
+    since: str | None,
+    boost_recent: bool,
+) -> list[dict]:
+    """KB 검색 핵심 로직 — 다중 컬렉션 의미검색 + 최신 가중치 + 후필터."""
+    targets = collection_keys or list(KB_COLLECTIONS.keys())
+    qdrant_names = [KB_COLLECTIONS[k] for k in targets if k in KB_COLLECTIONS]
+    if not qdrant_names:
+        return []
+
+    vec = await _embed(query)
+    per_coll_k = max(top_k, 8)
+
+    results: list[dict] = []
+    for qname in qdrant_names:
+        hits = await _search_kb_collection(qname, vec, per_coll_k, tags=tags)
+        for h in hits:
+            p = h.get("payload", {})
+            uploaded_at = p.get("uploaded_at")
+
+            if since and uploaded_at:
+                try:
+                    if uploaded_at < since:
+                        continue
+                except Exception:
+                    pass
+
+            base_score = h.get("score", 0.0)
+            recency = _recency_factor(uploaded_at) if boost_recent else 1.0
+            results.append({
+                "score": base_score * recency,
+                "raw_score": base_score,
+                "doc_id": p.get("doc_id"),
+                "collection_key": p.get("collection_key"),
+                "title": p.get("title"),
+                "text": p.get("text", ""),
+                "chunk_kind": p.get("chunk_kind"),
+                "locator": p.get("locator"),
+                "uploaded_at": uploaded_at,
+                "tags": p.get("tags") or [],
+            })
+
+    # 점수 내림차순 정렬, 동일 doc_id 중복 dedup(최고점만)
+    results.sort(key=lambda r: r["score"], reverse=True)
+    seen: set[str] = set()
+    unique: list[dict] = []
+    for r in results:
+        key = f'{r.get("doc_id")}::{r.get("locator")}'
+        if key in seen:
+            continue
+        seen.add(key)
+        unique.append(r)
+        if len(unique) >= top_k:
+            break
+    return unique
+
+
+@mcp.tool()
+async def search_kb(
+    query: str,
+    collection_keys: list[str] | None = None,
+    top_k: int = 8,
+    tags: list[str] | None = None,
+    since: str | None = None,
+    boost_recent: bool = True,
+) -> str:
+    """사용자 지식 베이스(KB) 다중 컬렉션 의미 검색.
+
+    관리탭에서 업로드/인덱싱한 문서에서 질의와 의미적으로 가까운 청크를 찾는다.
+
+    Args:
+        query:           검색어 또는 자연어 질문
+        collection_keys: 대상 컬렉션 키 목록. None이면 전체.
+                         가능한 값: system_instrument, plant_operation,
+                                 procedure, report, vendor_doc
+        top_k:           반환 결과 수 (기본 8)
+        tags:            태그 필터 (any 매칭). 예: ["unit-a", "P-6201"]
+        since:           이 ISO 시각 이후 업로드된 문서만. 예: "2026-04-01T00:00:00Z"
+        boost_recent:    True이면 uploaded_at 기준 최신 가중치 적용 (기본 True)
+
+    Returns:
+        JSON 문자열: { success, count, hits: [{ doc_id, collection_key, title,
+                       text, chunk_kind, locator, score, uploaded_at, tags }, ...] }
+    """
+    try:
+        hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent)
+        return json.dumps(
+            {"success": True, "count": len(hits), "hits": hits},
+            ensure_ascii=False,
+            default=str,
+        )
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"search_kb 실패: {e}"}, ensure_ascii=False)
+
+
 # ── NL2SQL 도구 ───────────────────────────────────────────────────────────────

 async def _execute_sql_internal(sql: str) -> str:
@@ -1224,6 +1423,63 @@ async def parse_pid_drawing(filepath: str) -> str:



+# ── KB ingest 파서 ────────────────────────────────────────────────────────────
+
+@mcp.tool()
+async def parse_document(
+    doc_id: str,
+    title: str,
+    file_path: str,
+    mime_type: str = "",
+    collection_key: str = "",
+    chunking_policy: str = "",
+) -> str:
+    """KB ingest 파서. 파일 확장자에 따라 적절한 청킹을 수행한다.
+
+    Args:
+        doc_id: 문서 ID (UUID 문자열)
+        title: 제목 (오류 메시지에만 사용)
+        file_path: 절대 경로
+        mime_type: 정보용 (옵션)
+        collection_key: 정보용 (옵션)
+        chunking_policy: JSON 문자열, 향후 정책 분기에 사용
+
+    Returns:
+        JSON 문자열: {"success": true, "chunks": [{"text", "chunk_kind", "locator"}, ...]}
+                  or {"success": false, "error": "..."}
+    """
+    import os
+    if not os.path.isfile(file_path):
+        return json.dumps({"success": False, "error": f"file not found: {file_path}"}, ensure_ascii=False)
+
+    ext = os.path.splitext(file_path)[1].lower()
+    try:
+        if ext in (".xlsx", ".xlsm"):
+            from parsers import xlsx_parser
+            chunks = await asyncio.to_thread(xlsx_parser.parse, file_path)
+        elif ext == ".pdf":
+            from parsers import pdf_parser
+            chunks = await asyncio.to_thread(pdf_parser.parse, file_path)
+        elif ext == ".docx":
+            from parsers import docx_parser
+            chunks = await asyncio.to_thread(docx_parser.parse, file_path)
+        elif ext in (".md", ".txt", ".markdown"):
+            from parsers import text_parser
+            chunks = await asyncio.to_thread(text_parser.parse, file_path)
+        else:
+            return json.dumps(
+                {"success": False, "error": f"unsupported extension: {ext}"},
+                ensure_ascii=False
+            )
+
+        return json.dumps(
+            {"success": True, "doc_id": doc_id, "chunks": chunks, "count": len(chunks)},
+            ensure_ascii=False
+        )
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"parse failed: {e}"}, ensure_ascii=False)
+
+
 # ── 엔트리포인트 ──────────────────────────────────────────────────────────────

 def main():
--- a/mcp-server/uv.lock
+++ b/mcp-server/uv.lock
@@ -1226,11 +1226,14 @@ dependencies = [
    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
    { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
    { name = "openai" },
+    { name = "openpyxl" },
    { name = "paddleocr" },
    { name = "paddlepaddle" },
+    { name = "pdfplumber" },
    { name = "pillow" },
    { name = "psycopg", extra = ["binary"] },
    { name = "pymupdf" },
+    { name = "python-docx" },
    { name = "qdrant-client" },
    { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
    { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -1246,11 +1249,14 @@ requires-dist = [
    { name = "mcp", extras = ["cli"], specifier = ">=1.0.0" },
    { name = "numpy", specifier = ">=1.24.0" },
    { name = "openai", specifier = ">=1.0.0" },
+    { name = "openpyxl", specifier = ">=3.1.0" },
    { name = "paddleocr", specifier = ">=2.6.0,<2.7.0" },
    { name = "paddlepaddle", specifier = ">=2.6.0,<3.0.0" },
+    { name = "pdfplumber", specifier = ">=0.11.0" },
    { name = "pillow", specifier = ">=10.0.0" },
    { name = "psycopg", extras = ["binary"], specifier = ">=3.1.0" },
    { name = "pymupdf", specifier = ">=1.24.0" },
+    { name = "python-docx", specifier = ">=1.1.0" },
    { name = "qdrant-client", specifier = ">=1.9.0" },
    { name = "scikit-learn", specifier = ">=1.3.0" },
    { name = "sentence-transformers", specifier = ">=3.0.0" },
@@ -2597,6 +2603,33 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" },
 ]

+[[package]]
+name = "pdfminer-six"
+version = "20251230"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "charset-normalizer" },
+    { name = "cryptography" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/46/9a/d79d8fa6d47a0338846bb558b39b9963b8eb2dfedec61867c138c1b17eeb/pdfminer_six-20251230.tar.gz", hash = "sha256:e8f68a14c57e00c2d7276d26519ea64be1b48f91db1cdc776faa80528ca06c1e", size = 8511285, upload-time = "2025-12-30T15:49:13.104Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/d7/b288ea32deb752a09aab73c75e1e7572ab2a2b56c3124a5d1eb24c62ceb3/pdfminer_six-20251230-py3-none-any.whl", hash = "sha256:9ff2e3466a7dfc6de6fd779478850b6b7c2d9e9405aa2a5869376a822771f485", size = 6591909, upload-time = "2025-12-30T15:49:10.76Z" },
+]
+
+[[package]]
+name = "pdfplumber"
+version = "0.11.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pdfminer-six" },
+    { name = "pillow" },
+    { name = "pypdfium2" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/38/37/9ca3519e92a8434eb93be570b131476cc0a4e840bb39c62ddb7813a39d53/pdfplumber-0.11.9.tar.gz", hash = "sha256:481224b678b2bbdbf376e2c39bf914144eef7c3d301b4a28eebf0f7f6109d6dc", size = 102768, upload-time = "2026-01-05T08:10:29.072Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8b/c8/cdbc975f5b634e249cfa6597e37c50f3078412474f21c015e508bfbfe3c3/pdfplumber-0.11.9-py3-none-any.whl", hash = "sha256:33ec5580959ba524e9100138746e090879504c42955df1b8a997604dd326c443", size = 60045, upload-time = "2026-01-05T08:10:27.512Z" },
+]
+
 [[package]]
 name = "pillow"
 version = "12.2.0"
@@ -3156,6 +3189,35 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]

+[[package]]
+name = "pypdfium2"
+version = "5.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/3d/dc934d3b606c51c3ecc95b6731d84b7dd7ab8e513a50b0e98a4da6c8a719/pypdfium2-5.8.0.tar.gz", hash = "sha256:049397c647e50f83115ee951c49394dab9e9ba52ebdd5a11ab1109390eb3d34e", size = 271934, upload-time = "2026-05-04T17:39:43.794Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/8c/6b75b923cb81368fa3ea7c48a0616b839620a3aeff899885bd930449b89e/pypdfium2-5.8.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:f67b6c74b716d9ac725ad1af49ae786ad813ac20823d45606d59f1fc06caa8af", size = 3374554, upload-time = "2026-05-04T17:39:05.552Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/61/a885c7f36efba89ec98e3d1fe95c83b48c2d6dea321e9194ac6460e7a834/pypdfium2-5.8.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:53e82bf3e6a2da170b1bda83f93b7eec57cb6efe3cacd05cba78823879a85203", size = 2831667, upload-time = "2026-05-04T17:39:08.028Z" },
+    { url = "https://files.pythonhosted.org/packages/86/1f/04b5627f6dba312d3e707e5b019c9f24d8b03b5aa366866a9e02ec00f8d4/pypdfium2-5.8.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:085e633dcc89b65ff4035a4787e98ce7ae636836eb39c83dd0db26113d9774bc", size = 3450815, upload-time = "2026-05-04T17:39:09.551Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/77/8e3a2aba2bc4aef5abe1b1306d05b00588dc0bf7f5c850d1adf6164c786b/pypdfium2-5.8.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:bc84b7c6efede88fcfb9467f81daf416f26b973a54fc1cf4d3410d622fda6d7a", size = 3634395, upload-time = "2026-05-04T17:39:11.225Z" },
+    { url = "https://files.pythonhosted.org/packages/93/11/6f2b1847d9fa457b3b7251afc2bba2706d104a0c6f01431dfae5d679a839/pypdfium2-5.8.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63bf09b2e13ba8545c930d243f0650c664a1b51314daa3b5f38df6d1a17b4bc", size = 3617413, upload-time = "2026-05-04T17:39:13.139Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/fd/99ce639de5ca06d21743c740dd988cd209dda623bc763ae10b8a162022e1/pypdfium2-5.8.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:937881c1698456749ed203a58db1895baa5eb7178cdb837ef84867790638da28", size = 3347639, upload-time = "2026-05-04T17:39:15.086Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/47/82864cc6e26dd8969d5594c168635acb16458d35cf5fed65d6b2e32abb42/pypdfium2-5.8.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be9dc2b84a8694ad7e626bab133244e8241014d5ed1930d865a9bdf90df1e24", size = 3746404, upload-time = "2026-05-04T17:39:17.094Z" },
+    { url = "https://files.pythonhosted.org/packages/82/58/e41e49bba951f61921bac7289e67fe02af5ac57192d0bbfb5f459dc3691d/pypdfium2-5.8.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f27bd82891ae302dd02d736b14809661f6d1220ee1e96dbed9b23e2811922a3", size = 4177893, upload-time = "2026-05-04T17:39:18.729Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/15/fa7031010d5cf6853dadb4864680a0bfb7782c5bb6a1a401e0c25c4fca87/pypdfium2-5.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26c1089cdbbdc7fe1248f6d17fe3f30214be4f287dd0196b31aaee18a1564240", size = 3665152, upload-time = "2026-05-04T17:39:20.207Z" },
+    { url = "https://files.pythonhosted.org/packages/de/6a/5a3520a8b0cfa8d7fdc3f03a07ad9d6146c28ffd519330706f64fd8939a8/pypdfium2-5.8.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1c038a9290864aaa4862dd32e591993d82551ca4d152b4e8ce6d43ba37dc04a8", size = 3095365, upload-time = "2026-05-04T17:39:22.054Z" },
+    { url = "https://files.pythonhosted.org/packages/32/d3/845bae4de3cfa36865959046156edb5bf9baea400ccdecdd84fdd911b0f5/pypdfium2-5.8.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f104bc1a6d8bfc1ff088aa50db13b9729cfdb3722b44975c3c457e9a7b9c7318", size = 2961801, upload-time = "2026-05-04T17:39:23.817Z" },
+    { url = "https://files.pythonhosted.org/packages/99/76/cf54eabee4a172241dfcfe63533bd1e11e2162114a983453a5a40bfec114/pypdfium2-5.8.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:04ca7c57a553facf8d46c6ea8ba6fa557e698670cfa4a58e0e01fdae2f6be87d", size = 4133067, upload-time = "2026-05-04T17:39:25.619Z" },
+    { url = "https://files.pythonhosted.org/packages/77/66/dcf871d19187ca04ea184a99801a6e7e556d8347aa49540fee33cda6dfc5/pypdfium2-5.8.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ad42b9c22477b32dbedcbc8232833f385d92fd0cf92822547b02383cf9a476d7", size = 3749100, upload-time = "2026-05-04T17:39:27.203Z" },
+    { url = "https://files.pythonhosted.org/packages/32/67/0d456c79660959ca45ad307b4d67161d29f9ed4083ee1e8fe8c6925b7c82/pypdfium2-5.8.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:388e3119cf5ca0979b7d5f6d40b7fcd5ab49e17ed4e6de6af89ba116061acfda", size = 4339212, upload-time = "2026-05-04T17:39:29.277Z" },
+    { url = "https://files.pythonhosted.org/packages/76/89/e5b0e0f7936be341c91c0f45cd70d693878894ed62aed93a6ee32e9c43c4/pypdfium2-5.8.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:aa05bbfa485ce7916217aa78d856c9f9cd86b08b20846c650392a67975ee72e9", size = 4383943, upload-time = "2026-05-04T17:39:31.287Z" },
+    { url = "https://files.pythonhosted.org/packages/82/21/4502ed255f082f579cd3537c2971cf1a57778d43703a08bcd1a92253189f/pypdfium2-5.8.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:f0813a16bb39d5ebd173ea5484430bb67a89b4b181db0a636c73b64ad063c3ea", size = 3925680, upload-time = "2026-05-04T17:39:33.241Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/4f/2e59723e7a07779439bd885c1b4960079c9710603308888d29ac926ae69a/pypdfium2-5.8.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:a3c78f7d20dd821bec6c072efdb21a1370b9efe10fdeeb68c969e67608e25385", size = 4269560, upload-time = "2026-05-04T17:39:34.926Z" },
+    { url = "https://files.pythonhosted.org/packages/34/4e/7b6b1bde3788c8b880d4b8131d95d9d339cebafb3ad9102d82e234bb65be/pypdfium2-5.8.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:86d302e207c138c827b885a72784f7b306d840646ebeae07e8efdbc39321c629", size = 4182434, upload-time = "2026-05-04T17:39:36.624Z" },
+    { url = "https://files.pythonhosted.org/packages/11/7b/6ed4782e0d7a5278330598ce8c4b2df7255f4585a0b3d04520fa580d6507/pypdfium2-5.8.0-py3-none-win32.whl", hash = "sha256:3f25fd436920a907291462b41bdc0ab9f8235c3944b4c9c15398da595ffd1fed", size = 3636680, upload-time = "2026-05-04T17:39:38.49Z" },
+    { url = "https://files.pythonhosted.org/packages/19/55/da7223d4202b2461f4f889b0baf10dddec3db7f88e6fd8c52db4a516eecd/pypdfium2-5.8.0-py3-none-win_amd64.whl", hash = "sha256:55592af0bddd2d62bed18e0053c546c9b72041430c5115e54870f7f6163125b0", size = 3754962, upload-time = "2026-05-04T17:39:40.13Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/7a/f3dcefe6ee7389aad3ca1488c177e8fbf978206de21c7a99ccf487ea38ab/pypdfium2-5.8.0-py3-none-win_arm64.whl", hash = "sha256:3f17ed97ae8a5a1705301ca93af256a5b02f9009dee4e99c5e175831d46ebd7c", size = 3548362, upload-time = "2026-05-04T17:39:42.304Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -3168,6 +3230,19 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]

+[[package]]
+name = "python-docx"
+version = "1.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lxml" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" },
+]
+
 [[package]]
 name = "python-dotenv"
 version = "1.2.2"
--- a/mcp-server/worker/nl2sql_worker.py
+++ b/mcp-server/worker/nl2sql_worker.py
@@ -238,15 +238,14 @@ async def _query_pv_history(tag_names: list[str], time_from: str, time_to: str,
    conn = _get_db_connection()
    try:
        with conn.cursor() as cur:
-            # TimescaleDB의 time_bucket 함수 사용
            cur.execute(
                """
-                SELECT time_bucket('1 min', ts) AS time, tag_name, value
-                FROM realtime_table
-                WHERE tag_name = ANY(%s)
-                  AND ts >= %s
-                  AND ts <= %s
-                ORDER BY time DESC
+                SELECT recorded_at AS time, tagname AS tag_name, value
+                FROM history_table
+                WHERE tagname = ANY(%s)
+                  AND recorded_at >= %s
+                  AND recorded_at <= %s
+                ORDER BY recorded_at DESC, tagname
                LIMIT %s
                """,
                (tag_names, time_from, time_to, limit),
@@ -272,17 +271,25 @@ async def _get_tag_metadata(query: str, limit: int = 10) -> str:
        with conn.cursor() as cur:
            cur.execute(
                """
-                SELECT DISTINCT tag_name, unit, description
+                SELECT tagname, livevalue, timestamp, node_id
                FROM realtime_table
-                WHERE tag_name ILIKE %s
-                ORDER BY tag_name
+                WHERE tagname ILIKE %s
+                ORDER BY tagname
                LIMIT %s
                """,
                (f"%{query}%", limit),
            )
-            columns = ["tag_name", "unit", "description"]
+            columns = ["tag_name", "current_value", "last_updated", "node_id"]
            rows = cur.fetchall()
-            data = [dict(zip(columns, row)) for row in rows]
+            data = [
+                {
+                    "tag_name": r[0],
+                    "current_value": r[1],
+                    "last_updated": r[2].isoformat() if r[2] else None,
+                    "node_id": r[3],
+                }
+                for r in rows
+            ]
            return {
                "success": True,
                "query": query,