feat: Knowledge Base RAG 시스템 + 채팅 LLM 개선 (Phase 0~5 완료)
- KB RAG 전체 파이프라인: 업로드, 파싱(xlsx/pdf/docx/text), 임베딩, Qdrant 인덱싱 - KB 관리 UI(14번 탭): 로그인, 문서 목록, 업로드, 삭제, 재인덱스 - OllamaController: 한글 시스템 프롬프트, plant_context.md 외부 파일화, SSE tool_start/tool_result 이벤트 - 프론트: 툴 실행 카드, KB 인용 링크, 표 자동 렌더, 추천 질문 칩 - nl2sql_worker: history_table.recorded_at 사용, tag_metadata 응답 개선 - DB: KB 테이블 5개 DDL + 시드, pgcrypto 확장
This commit is contained in:
8
mcp-server/parsers/__init__.py
Normal file
8
mcp-server/parsers/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
"""KB 문서 파서 모음.
|
||||
|
||||
각 모듈은 `parse(path: str) -> list[dict]` 인터페이스를 제공한다.
|
||||
반환 청크는 다음 키를 가진다:
|
||||
text: str 임베딩 대상 본문 (보통 200~1500자)
|
||||
chunk_kind: str row | sheet | section | table | page | paragraph | heading
|
||||
locator: str 사람 가독 위치 문자열 (예: "sheet=Pump-A; row=12")
|
||||
"""
|
||||
41
mcp-server/parsers/docx_parser.py
Normal file
41
mcp-server/parsers/docx_parser.py
Normal file
@@ -0,0 +1,41 @@
|
||||
"""docx 청킹 — 헤딩 경로 별 청크."""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def parse(path: str) -> list[dict]:
|
||||
from docx import Document
|
||||
|
||||
doc = Document(path)
|
||||
chunks: list[dict] = []
|
||||
|
||||
cur_path: list[str] = []
|
||||
buf: list[str] = []
|
||||
|
||||
def flush():
|
||||
if buf:
|
||||
heading = " / ".join(cur_path) if cur_path else "preface"
|
||||
chunks.append({
|
||||
"text": "\n".join(buf).strip(),
|
||||
"chunk_kind": "heading",
|
||||
"locator": f"heading={heading}",
|
||||
})
|
||||
|
||||
for p in doc.paragraphs:
|
||||
text = (p.text or "").strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
style_name = (p.style.name or "").lower() if p.style else ""
|
||||
if style_name.startswith("heading"):
|
||||
flush()
|
||||
buf = []
|
||||
try:
|
||||
level = int(style_name.split()[-1])
|
||||
except (ValueError, IndexError):
|
||||
level = 1
|
||||
cur_path = cur_path[: max(0, level - 1)] + [text]
|
||||
else:
|
||||
buf.append(text)
|
||||
|
||||
flush()
|
||||
return chunks
|
||||
34
mcp-server/parsers/pdf_parser.py
Normal file
34
mcp-server/parsers/pdf_parser.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""pdf 청킹 — pdfplumber로 페이지/표 추출, 헤딩 분리 실패 시 페이지 단위 fallback."""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def parse(path: str) -> list[dict]:
|
||||
import pdfplumber
|
||||
|
||||
chunks: list[dict] = []
|
||||
with pdfplumber.open(path) as pdf:
|
||||
for pno, page in enumerate(pdf.pages, start=1):
|
||||
txt = (page.extract_text() or "").strip()
|
||||
if txt:
|
||||
chunks.append({
|
||||
"text": txt[:5000],
|
||||
"chunk_kind": "page",
|
||||
"locator": f"page={pno}",
|
||||
})
|
||||
|
||||
try:
|
||||
tables = page.extract_tables() or []
|
||||
except Exception:
|
||||
tables = []
|
||||
for ti, table in enumerate(tables, start=1):
|
||||
rows = [[(c or "").strip() for c in row] for row in table if row]
|
||||
if not rows:
|
||||
continue
|
||||
md = "\n".join(" | ".join(r) for r in rows[:200])
|
||||
chunks.append({
|
||||
"text": md,
|
||||
"chunk_kind": "table",
|
||||
"locator": f"page={pno}; table={ti}",
|
||||
})
|
||||
|
||||
return chunks
|
||||
56
mcp-server/parsers/text_parser.py
Normal file
56
mcp-server/parsers/text_parser.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""md / txt 청킹 — md는 # 헤딩 단위, txt는 빈 줄 두 개 단위."""
|
||||
from __future__ import annotations
|
||||
import os
|
||||
|
||||
|
||||
def parse(path: str) -> list[dict]:
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
||||
content = f.read()
|
||||
|
||||
if ext == ".md":
|
||||
return _parse_md(content)
|
||||
return _parse_txt(content)
|
||||
|
||||
|
||||
def _parse_md(text: str) -> list[dict]:
|
||||
chunks: list[dict] = []
|
||||
lines = text.split("\n")
|
||||
|
||||
cur_heading = "preface"
|
||||
buf: list[str] = []
|
||||
section_idx = 0
|
||||
|
||||
def flush():
|
||||
nonlocal section_idx
|
||||
body = "\n".join(buf).strip()
|
||||
if body:
|
||||
section_idx += 1
|
||||
chunks.append({
|
||||
"text": body,
|
||||
"chunk_kind": "heading",
|
||||
"locator": f"heading={cur_heading}",
|
||||
})
|
||||
|
||||
for ln in lines:
|
||||
s = ln.lstrip()
|
||||
if s.startswith("#"):
|
||||
flush()
|
||||
buf = []
|
||||
cur_heading = s.lstrip("#").strip() or "section"
|
||||
else:
|
||||
buf.append(ln)
|
||||
flush()
|
||||
return chunks
|
||||
|
||||
|
||||
def _parse_txt(text: str) -> list[dict]:
|
||||
chunks: list[dict] = []
|
||||
parts = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
for i, p in enumerate(parts, start=1):
|
||||
chunks.append({
|
||||
"text": p,
|
||||
"chunk_kind": "paragraph",
|
||||
"locator": f"paragraph={i}",
|
||||
})
|
||||
return chunks
|
||||
49
mcp-server/parsers/xlsx_parser.py
Normal file
49
mcp-server/parsers/xlsx_parser.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""xlsx 청킹 — 시트 단위(markdown) + 행 단위 둘 다 생성."""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def parse(path: str) -> list[dict]:
|
||||
from openpyxl import load_workbook
|
||||
|
||||
wb = load_workbook(path, read_only=True, data_only=True)
|
||||
chunks: list[dict] = []
|
||||
|
||||
for sheet in wb.worksheets:
|
||||
rows = list(sheet.iter_rows(values_only=True))
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
header = [str(c) if c is not None else "" for c in rows[0]]
|
||||
sheet_name = sheet.title
|
||||
|
||||
# 1) 시트 청크 — markdown 표 (선두 1000행 제한)
|
||||
body_rows = rows[1:1001]
|
||||
md_lines = ["| " + " | ".join(header) + " |",
|
||||
"| " + " | ".join(["---"] * len(header)) + " |"]
|
||||
for r in body_rows:
|
||||
cells = [str(c) if c is not None else "" for c in r]
|
||||
cells += [""] * (len(header) - len(cells))
|
||||
md_lines.append("| " + " | ".join(cells[: len(header)]) + " |")
|
||||
chunks.append({
|
||||
"text": "\n".join(md_lines),
|
||||
"chunk_kind": "sheet",
|
||||
"locator": f"sheet={sheet_name}",
|
||||
})
|
||||
|
||||
# 2) 행 청크 — 각 행을 'col=val' 형식 한 줄로
|
||||
for i, r in enumerate(rows[1:], start=2):
|
||||
parts = []
|
||||
for j, val in enumerate(r):
|
||||
if val is None or val == "":
|
||||
continue
|
||||
col = header[j] if j < len(header) and header[j] else f"col{j+1}"
|
||||
parts.append(f"{col}={val}")
|
||||
if not parts:
|
||||
continue
|
||||
chunks.append({
|
||||
"text": f"{sheet_name}: " + ", ".join(parts),
|
||||
"chunk_kind": "row",
|
||||
"locator": f"sheet={sheet_name}; row={i}",
|
||||
})
|
||||
|
||||
return chunks
|
||||
@@ -24,6 +24,10 @@ dependencies = [
|
||||
"scikit-learn>=1.3.0",
|
||||
"numpy>=1.24.0",
|
||||
"Pillow>=10.0.0",
|
||||
# KB 문서 파싱
|
||||
"openpyxl>=3.1.0",
|
||||
"python-docx>=1.1.0",
|
||||
"pdfplumber>=0.11.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -31,6 +31,15 @@ VLLM_MODEL = get_vllm_model()
|
||||
COL_CODEBASE = "ws-65f457145aee80b2" # ExperionCrawler 소스코드
|
||||
COL_OPC_DOCS = "experion-opc-docs" # Experion HS R530 OPC UA 공식 문서 (266 chunks)
|
||||
|
||||
# 사용자 KB 컬렉션 (kb_collections 시드 5종과 일치)
|
||||
KB_COLLECTIONS = {
|
||||
"system_instrument": "kb_system_instrument",
|
||||
"plant_operation": "kb_plant_operation",
|
||||
"procedure": "kb_procedure",
|
||||
"report": "kb_report",
|
||||
"vendor_doc": "kb_vendor_doc",
|
||||
}
|
||||
|
||||
# PostgreSQL 연결
|
||||
DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform")
|
||||
DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10"))
|
||||
@@ -248,6 +257,60 @@ async def _search(collection: str, query: str, top_k: int, threshold: float = 0.
|
||||
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
async def _search_kb_collection(
|
||||
qdrant_name: str,
|
||||
vec: list[float],
|
||||
top_k: int,
|
||||
tags: list[str] | None = None,
|
||||
) -> list[dict]:
|
||||
"""KB 컬렉션 1개에 대해 의미 검색. 결과를 정규화된 dict 리스트로 반환."""
|
||||
must = []
|
||||
if tags:
|
||||
must.append({"key": "tags", "match": {"any": tags}})
|
||||
|
||||
body: dict = {
|
||||
"vector": vec,
|
||||
"limit": top_k,
|
||||
"with_payload": True,
|
||||
"score_threshold": 0.20,
|
||||
}
|
||||
if must:
|
||||
body["filter"] = {"must": must}
|
||||
|
||||
def _call():
|
||||
with httpx.Client(timeout=20) as client:
|
||||
resp = client.post(f"{QDRANT_URL}/collections/{qdrant_name}/points/search", json=body)
|
||||
if resp.status_code == 404:
|
||||
return []
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", [])
|
||||
|
||||
try:
|
||||
return await asyncio.to_thread(_call)
|
||||
except Exception as e:
|
||||
logging.warning(f"[search_kb] {qdrant_name} 검색 실패: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _recency_factor(uploaded_at_iso: str | None) -> float:
|
||||
"""uploaded_at 기준 최신 가중치. 최근 7일 +10%, 30일 +5%, 90일 +2%, 그 외 1.0."""
|
||||
if not uploaded_at_iso:
|
||||
return 1.0
|
||||
try:
|
||||
from datetime import datetime, timezone
|
||||
ts = datetime.fromisoformat(uploaded_at_iso.replace("Z", "+00:00"))
|
||||
if ts.tzinfo is None:
|
||||
ts = ts.replace(tzinfo=timezone.utc)
|
||||
age = (datetime.now(timezone.utc) - ts).total_seconds() / 86400.0
|
||||
if age < 7: return 1.10
|
||||
if age < 30: return 1.05
|
||||
if age < 90: return 1.02
|
||||
return 1.0
|
||||
except Exception:
|
||||
return 1.0
|
||||
|
||||
|
||||
# ── DB 헬퍼 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
async def _get_db_connection():
|
||||
@@ -406,25 +469,161 @@ def ask_iiot_llm(question: str, context: str = "") -> str:
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
|
||||
async def rag_query(
|
||||
question: str,
|
||||
search_code: bool = False,
|
||||
search_docs: bool = True,
|
||||
search_kb: bool = False,
|
||||
kb_collections: list[str] | None = None,
|
||||
) -> str:
|
||||
"""검색 → LLM 답변 생성 (통합 RAG).
|
||||
|
||||
기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
|
||||
ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
|
||||
기본값: Experion HS R530 공식 문서만 검색.
|
||||
사용자 KB 검색을 포함하려면 search_kb=True. 코드 검색은 search_code=True.
|
||||
|
||||
Args:
|
||||
question: 질문
|
||||
search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True)
|
||||
search_code: ExperionCrawler 소스코드 검색 여부 (기본 False)
|
||||
question: 질문
|
||||
search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True)
|
||||
search_code: ExperionCrawler 소스코드 검색 여부 (기본 False)
|
||||
search_kb: 사용자 KB 검색 여부 (기본 False)
|
||||
kb_collections: 검색 대상 KB 컬렉션 키 목록. None이면 전체.
|
||||
예: ["plant_operation", "procedure"]
|
||||
"""
|
||||
context_parts: list[str] = []
|
||||
if search_docs:
|
||||
context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{await _search(COL_OPC_DOCS, question, 4)}")
|
||||
if search_code:
|
||||
context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{await _search(COL_CODEBASE, question, 3)}")
|
||||
if search_kb:
|
||||
kb_text = await _format_kb_results(question, kb_collections, top_k=6)
|
||||
context_parts.append(f"=== 사용자 지식 베이스 ===\n{kb_text}")
|
||||
return ask_iiot_llm(question, "\n\n".join(context_parts))
|
||||
|
||||
|
||||
async def _format_kb_results(
|
||||
query: str,
|
||||
collection_keys: list[str] | None,
|
||||
top_k: int,
|
||||
tags: list[str] | None = None,
|
||||
since: str | None = None,
|
||||
boost_recent: bool = True,
|
||||
) -> str:
|
||||
"""search_kb 내부 헬퍼: 다중 컬렉션 의미검색 후 인용 텍스트로 직렬화."""
|
||||
hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent)
|
||||
if not hits:
|
||||
return "관련 KB 결과 없음."
|
||||
|
||||
parts = []
|
||||
for h in hits:
|
||||
title = h.get("title") or "(제목없음)"
|
||||
loc = h.get("locator") or ""
|
||||
score = h.get("score", 0.0)
|
||||
text = (h.get("text") or "").strip()
|
||||
# 인용 헤더: "[score=0.812] 정비이력_2026Q1.xlsx > 시트:Pump-A > 행 12"
|
||||
loc_str = f" > {loc}" if loc else ""
|
||||
parts.append(f"[score={score:.3f}] {title}{loc_str}\n{text[:700]}")
|
||||
return "\n\n---\n\n".join(parts)
|
||||
|
||||
|
||||
async def _search_kb_raw(
|
||||
query: str,
|
||||
collection_keys: list[str] | None,
|
||||
top_k: int,
|
||||
tags: list[str] | None,
|
||||
since: str | None,
|
||||
boost_recent: bool,
|
||||
) -> list[dict]:
|
||||
"""KB 검색 핵심 로직 — 다중 컬렉션 의미검색 + 최신 가중치 + 후필터."""
|
||||
targets = collection_keys or list(KB_COLLECTIONS.keys())
|
||||
qdrant_names = [KB_COLLECTIONS[k] for k in targets if k in KB_COLLECTIONS]
|
||||
if not qdrant_names:
|
||||
return []
|
||||
|
||||
vec = await _embed(query)
|
||||
per_coll_k = max(top_k, 8)
|
||||
|
||||
results: list[dict] = []
|
||||
for qname in qdrant_names:
|
||||
hits = await _search_kb_collection(qname, vec, per_coll_k, tags=tags)
|
||||
for h in hits:
|
||||
p = h.get("payload", {})
|
||||
uploaded_at = p.get("uploaded_at")
|
||||
|
||||
if since and uploaded_at:
|
||||
try:
|
||||
if uploaded_at < since:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
base_score = h.get("score", 0.0)
|
||||
recency = _recency_factor(uploaded_at) if boost_recent else 1.0
|
||||
results.append({
|
||||
"score": base_score * recency,
|
||||
"raw_score": base_score,
|
||||
"doc_id": p.get("doc_id"),
|
||||
"collection_key": p.get("collection_key"),
|
||||
"title": p.get("title"),
|
||||
"text": p.get("text", ""),
|
||||
"chunk_kind": p.get("chunk_kind"),
|
||||
"locator": p.get("locator"),
|
||||
"uploaded_at": uploaded_at,
|
||||
"tags": p.get("tags") or [],
|
||||
})
|
||||
|
||||
# 점수 내림차순 정렬, 동일 doc_id 중복 dedup(최고점만)
|
||||
results.sort(key=lambda r: r["score"], reverse=True)
|
||||
seen: set[str] = set()
|
||||
unique: list[dict] = []
|
||||
for r in results:
|
||||
key = f'{r.get("doc_id")}::{r.get("locator")}'
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
unique.append(r)
|
||||
if len(unique) >= top_k:
|
||||
break
|
||||
return unique
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def search_kb(
|
||||
query: str,
|
||||
collection_keys: list[str] | None = None,
|
||||
top_k: int = 8,
|
||||
tags: list[str] | None = None,
|
||||
since: str | None = None,
|
||||
boost_recent: bool = True,
|
||||
) -> str:
|
||||
"""사용자 지식 베이스(KB) 다중 컬렉션 의미 검색.
|
||||
|
||||
관리탭에서 업로드/인덱싱한 문서에서 질의와 의미적으로 가까운 청크를 찾는다.
|
||||
|
||||
Args:
|
||||
query: 검색어 또는 자연어 질문
|
||||
collection_keys: 대상 컬렉션 키 목록. None이면 전체.
|
||||
가능한 값: system_instrument, plant_operation,
|
||||
procedure, report, vendor_doc
|
||||
top_k: 반환 결과 수 (기본 8)
|
||||
tags: 태그 필터 (any 매칭). 예: ["unit-a", "P-6201"]
|
||||
since: 이 ISO 시각 이후 업로드된 문서만. 예: "2026-04-01T00:00:00Z"
|
||||
boost_recent: True이면 uploaded_at 기준 최신 가중치 적용 (기본 True)
|
||||
|
||||
Returns:
|
||||
JSON 문자열: { success, count, hits: [{ doc_id, collection_key, title,
|
||||
text, chunk_kind, locator, score, uploaded_at, tags }, ...] }
|
||||
"""
|
||||
try:
|
||||
hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent)
|
||||
return json.dumps(
|
||||
{"success": True, "count": len(hits), "hits": hits},
|
||||
ensure_ascii=False,
|
||||
default=str,
|
||||
)
|
||||
except Exception as e:
|
||||
return json.dumps({"success": False, "error": f"search_kb 실패: {e}"}, ensure_ascii=False)
|
||||
|
||||
|
||||
# ── NL2SQL 도구 ───────────────────────────────────────────────────────────────
|
||||
|
||||
async def _execute_sql_internal(sql: str) -> str:
|
||||
@@ -1224,6 +1423,63 @@ async def parse_pid_drawing(filepath: str) -> str:
|
||||
|
||||
|
||||
|
||||
# ── KB ingest 파서 ────────────────────────────────────────────────────────────
|
||||
|
||||
@mcp.tool()
|
||||
async def parse_document(
|
||||
doc_id: str,
|
||||
title: str,
|
||||
file_path: str,
|
||||
mime_type: str = "",
|
||||
collection_key: str = "",
|
||||
chunking_policy: str = "",
|
||||
) -> str:
|
||||
"""KB ingest 파서. 파일 확장자에 따라 적절한 청킹을 수행한다.
|
||||
|
||||
Args:
|
||||
doc_id: 문서 ID (UUID 문자열)
|
||||
title: 제목 (오류 메시지에만 사용)
|
||||
file_path: 절대 경로
|
||||
mime_type: 정보용 (옵션)
|
||||
collection_key: 정보용 (옵션)
|
||||
chunking_policy: JSON 문자열, 향후 정책 분기에 사용
|
||||
|
||||
Returns:
|
||||
JSON 문자열: {"success": true, "chunks": [{"text", "chunk_kind", "locator"}, ...]}
|
||||
or {"success": false, "error": "..."}
|
||||
"""
|
||||
import os
|
||||
if not os.path.isfile(file_path):
|
||||
return json.dumps({"success": False, "error": f"file not found: {file_path}"}, ensure_ascii=False)
|
||||
|
||||
ext = os.path.splitext(file_path)[1].lower()
|
||||
try:
|
||||
if ext in (".xlsx", ".xlsm"):
|
||||
from parsers import xlsx_parser
|
||||
chunks = await asyncio.to_thread(xlsx_parser.parse, file_path)
|
||||
elif ext == ".pdf":
|
||||
from parsers import pdf_parser
|
||||
chunks = await asyncio.to_thread(pdf_parser.parse, file_path)
|
||||
elif ext == ".docx":
|
||||
from parsers import docx_parser
|
||||
chunks = await asyncio.to_thread(docx_parser.parse, file_path)
|
||||
elif ext in (".md", ".txt", ".markdown"):
|
||||
from parsers import text_parser
|
||||
chunks = await asyncio.to_thread(text_parser.parse, file_path)
|
||||
else:
|
||||
return json.dumps(
|
||||
{"success": False, "error": f"unsupported extension: {ext}"},
|
||||
ensure_ascii=False
|
||||
)
|
||||
|
||||
return json.dumps(
|
||||
{"success": True, "doc_id": doc_id, "chunks": chunks, "count": len(chunks)},
|
||||
ensure_ascii=False
|
||||
)
|
||||
except Exception as e:
|
||||
return json.dumps({"success": False, "error": f"parse failed: {e}"}, ensure_ascii=False)
|
||||
|
||||
|
||||
# ── 엔트리포인트 ──────────────────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
|
||||
75
mcp-server/uv.lock
generated
75
mcp-server/uv.lock
generated
@@ -1226,11 +1226,14 @@ dependencies = [
|
||||
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
|
||||
{ name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
||||
{ name = "openai" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "paddleocr" },
|
||||
{ name = "paddlepaddle" },
|
||||
{ name = "pdfplumber" },
|
||||
{ name = "pillow" },
|
||||
{ name = "psycopg", extra = ["binary"] },
|
||||
{ name = "pymupdf" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "qdrant-client" },
|
||||
{ name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
|
||||
{ name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
||||
@@ -1246,11 +1249,14 @@ requires-dist = [
|
||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.0.0" },
|
||||
{ name = "numpy", specifier = ">=1.24.0" },
|
||||
{ name = "openai", specifier = ">=1.0.0" },
|
||||
{ name = "openpyxl", specifier = ">=3.1.0" },
|
||||
{ name = "paddleocr", specifier = ">=2.6.0,<2.7.0" },
|
||||
{ name = "paddlepaddle", specifier = ">=2.6.0,<3.0.0" },
|
||||
{ name = "pdfplumber", specifier = ">=0.11.0" },
|
||||
{ name = "pillow", specifier = ">=10.0.0" },
|
||||
{ name = "psycopg", extras = ["binary"], specifier = ">=3.1.0" },
|
||||
{ name = "pymupdf", specifier = ">=1.24.0" },
|
||||
{ name = "python-docx", specifier = ">=1.1.0" },
|
||||
{ name = "qdrant-client", specifier = ">=1.9.0" },
|
||||
{ name = "scikit-learn", specifier = ">=1.3.0" },
|
||||
{ name = "sentence-transformers", specifier = ">=3.0.0" },
|
||||
@@ -2597,6 +2603,33 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfminer-six"
|
||||
version = "20251230"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "charset-normalizer" },
|
||||
{ name = "cryptography" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/46/9a/d79d8fa6d47a0338846bb558b39b9963b8eb2dfedec61867c138c1b17eeb/pdfminer_six-20251230.tar.gz", hash = "sha256:e8f68a14c57e00c2d7276d26519ea64be1b48f91db1cdc776faa80528ca06c1e", size = 8511285, upload-time = "2025-12-30T15:49:13.104Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/65/d7/b288ea32deb752a09aab73c75e1e7572ab2a2b56c3124a5d1eb24c62ceb3/pdfminer_six-20251230-py3-none-any.whl", hash = "sha256:9ff2e3466a7dfc6de6fd779478850b6b7c2d9e9405aa2a5869376a822771f485", size = 6591909, upload-time = "2025-12-30T15:49:10.76Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pdfplumber"
|
||||
version = "0.11.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pdfminer-six" },
|
||||
{ name = "pillow" },
|
||||
{ name = "pypdfium2" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/38/37/9ca3519e92a8434eb93be570b131476cc0a4e840bb39c62ddb7813a39d53/pdfplumber-0.11.9.tar.gz", hash = "sha256:481224b678b2bbdbf376e2c39bf914144eef7c3d301b4a28eebf0f7f6109d6dc", size = 102768, upload-time = "2026-01-05T08:10:29.072Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8b/c8/cdbc975f5b634e249cfa6597e37c50f3078412474f21c015e508bfbfe3c3/pdfplumber-0.11.9-py3-none-any.whl", hash = "sha256:33ec5580959ba524e9100138746e090879504c42955df1b8a997604dd326c443", size = 60045, upload-time = "2026-01-05T08:10:27.512Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pillow"
|
||||
version = "12.2.0"
|
||||
@@ -3156,6 +3189,35 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pypdfium2"
|
||||
version = "5.8.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/3d/dc934d3b606c51c3ecc95b6731d84b7dd7ab8e513a50b0e98a4da6c8a719/pypdfium2-5.8.0.tar.gz", hash = "sha256:049397c647e50f83115ee951c49394dab9e9ba52ebdd5a11ab1109390eb3d34e", size = 271934, upload-time = "2026-05-04T17:39:43.794Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/8c/6b75b923cb81368fa3ea7c48a0616b839620a3aeff899885bd930449b89e/pypdfium2-5.8.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:f67b6c74b716d9ac725ad1af49ae786ad813ac20823d45606d59f1fc06caa8af", size = 3374554, upload-time = "2026-05-04T17:39:05.552Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/61/a885c7f36efba89ec98e3d1fe95c83b48c2d6dea321e9194ac6460e7a834/pypdfium2-5.8.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:53e82bf3e6a2da170b1bda83f93b7eec57cb6efe3cacd05cba78823879a85203", size = 2831667, upload-time = "2026-05-04T17:39:08.028Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/86/1f/04b5627f6dba312d3e707e5b019c9f24d8b03b5aa366866a9e02ec00f8d4/pypdfium2-5.8.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:085e633dcc89b65ff4035a4787e98ce7ae636836eb39c83dd0db26113d9774bc", size = 3450815, upload-time = "2026-05-04T17:39:09.551Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a9/77/8e3a2aba2bc4aef5abe1b1306d05b00588dc0bf7f5c850d1adf6164c786b/pypdfium2-5.8.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:bc84b7c6efede88fcfb9467f81daf416f26b973a54fc1cf4d3410d622fda6d7a", size = 3634395, upload-time = "2026-05-04T17:39:11.225Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/93/11/6f2b1847d9fa457b3b7251afc2bba2706d104a0c6f01431dfae5d679a839/pypdfium2-5.8.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63bf09b2e13ba8545c930d243f0650c664a1b51314daa3b5f38df6d1a17b4bc", size = 3617413, upload-time = "2026-05-04T17:39:13.139Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ed/fd/99ce639de5ca06d21743c740dd988cd209dda623bc763ae10b8a162022e1/pypdfium2-5.8.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:937881c1698456749ed203a58db1895baa5eb7178cdb837ef84867790638da28", size = 3347639, upload-time = "2026-05-04T17:39:15.086Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fa/47/82864cc6e26dd8969d5594c168635acb16458d35cf5fed65d6b2e32abb42/pypdfium2-5.8.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be9dc2b84a8694ad7e626bab133244e8241014d5ed1930d865a9bdf90df1e24", size = 3746404, upload-time = "2026-05-04T17:39:17.094Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/58/e41e49bba951f61921bac7289e67fe02af5ac57192d0bbfb5f459dc3691d/pypdfium2-5.8.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f27bd82891ae302dd02d736b14809661f6d1220ee1e96dbed9b23e2811922a3", size = 4177893, upload-time = "2026-05-04T17:39:18.729Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b4/15/fa7031010d5cf6853dadb4864680a0bfb7782c5bb6a1a401e0c25c4fca87/pypdfium2-5.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26c1089cdbbdc7fe1248f6d17fe3f30214be4f287dd0196b31aaee18a1564240", size = 3665152, upload-time = "2026-05-04T17:39:20.207Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/6a/5a3520a8b0cfa8d7fdc3f03a07ad9d6146c28ffd519330706f64fd8939a8/pypdfium2-5.8.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1c038a9290864aaa4862dd32e591993d82551ca4d152b4e8ce6d43ba37dc04a8", size = 3095365, upload-time = "2026-05-04T17:39:22.054Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/d3/845bae4de3cfa36865959046156edb5bf9baea400ccdecdd84fdd911b0f5/pypdfium2-5.8.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f104bc1a6d8bfc1ff088aa50db13b9729cfdb3722b44975c3c457e9a7b9c7318", size = 2961801, upload-time = "2026-05-04T17:39:23.817Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/76/cf54eabee4a172241dfcfe63533bd1e11e2162114a983453a5a40bfec114/pypdfium2-5.8.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:04ca7c57a553facf8d46c6ea8ba6fa557e698670cfa4a58e0e01fdae2f6be87d", size = 4133067, upload-time = "2026-05-04T17:39:25.619Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/66/dcf871d19187ca04ea184a99801a6e7e556d8347aa49540fee33cda6dfc5/pypdfium2-5.8.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ad42b9c22477b32dbedcbc8232833f385d92fd0cf92822547b02383cf9a476d7", size = 3749100, upload-time = "2026-05-04T17:39:27.203Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/32/67/0d456c79660959ca45ad307b4d67161d29f9ed4083ee1e8fe8c6925b7c82/pypdfium2-5.8.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:388e3119cf5ca0979b7d5f6d40b7fcd5ab49e17ed4e6de6af89ba116061acfda", size = 4339212, upload-time = "2026-05-04T17:39:29.277Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/76/89/e5b0e0f7936be341c91c0f45cd70d693878894ed62aed93a6ee32e9c43c4/pypdfium2-5.8.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:aa05bbfa485ce7916217aa78d856c9f9cd86b08b20846c650392a67975ee72e9", size = 4383943, upload-time = "2026-05-04T17:39:31.287Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/82/21/4502ed255f082f579cd3537c2971cf1a57778d43703a08bcd1a92253189f/pypdfium2-5.8.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:f0813a16bb39d5ebd173ea5484430bb67a89b4b181db0a636c73b64ad063c3ea", size = 3925680, upload-time = "2026-05-04T17:39:33.241Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/4f/2e59723e7a07779439bd885c1b4960079c9710603308888d29ac926ae69a/pypdfium2-5.8.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:a3c78f7d20dd821bec6c072efdb21a1370b9efe10fdeeb68c969e67608e25385", size = 4269560, upload-time = "2026-05-04T17:39:34.926Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/34/4e/7b6b1bde3788c8b880d4b8131d95d9d339cebafb3ad9102d82e234bb65be/pypdfium2-5.8.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:86d302e207c138c827b885a72784f7b306d840646ebeae07e8efdbc39321c629", size = 4182434, upload-time = "2026-05-04T17:39:36.624Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/11/7b/6ed4782e0d7a5278330598ce8c4b2df7255f4585a0b3d04520fa580d6507/pypdfium2-5.8.0-py3-none-win32.whl", hash = "sha256:3f25fd436920a907291462b41bdc0ab9f8235c3944b4c9c15398da595ffd1fed", size = 3636680, upload-time = "2026-05-04T17:39:38.49Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/19/55/da7223d4202b2461f4f889b0baf10dddec3db7f88e6fd8c52db4a516eecd/pypdfium2-5.8.0-py3-none-win_amd64.whl", hash = "sha256:55592af0bddd2d62bed18e0053c546c9b72041430c5115e54870f7f6163125b0", size = 3754962, upload-time = "2026-05-04T17:39:40.13Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/7a/f3dcefe6ee7389aad3ca1488c177e8fbf978206de21c7a99ccf487ea38ab/pypdfium2-5.8.0-py3-none-win_arm64.whl", hash = "sha256:3f17ed97ae8a5a1705301ca93af256a5b02f9009dee4e99c5e175831d46ebd7c", size = 3548362, upload-time = "2026-05-04T17:39:42.304Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
@@ -3168,6 +3230,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-docx"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "lxml" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dotenv"
|
||||
version = "1.2.2"
|
||||
|
||||
@@ -238,15 +238,14 @@ async def _query_pv_history(tag_names: list[str], time_from: str, time_to: str,
|
||||
conn = _get_db_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
# TimescaleDB의 time_bucket 함수 사용
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT time_bucket('1 min', ts) AS time, tag_name, value
|
||||
FROM realtime_table
|
||||
WHERE tag_name = ANY(%s)
|
||||
AND ts >= %s
|
||||
AND ts <= %s
|
||||
ORDER BY time DESC
|
||||
SELECT recorded_at AS time, tagname AS tag_name, value
|
||||
FROM history_table
|
||||
WHERE tagname = ANY(%s)
|
||||
AND recorded_at >= %s
|
||||
AND recorded_at <= %s
|
||||
ORDER BY recorded_at DESC, tagname
|
||||
LIMIT %s
|
||||
""",
|
||||
(tag_names, time_from, time_to, limit),
|
||||
@@ -272,17 +271,25 @@ async def _get_tag_metadata(query: str, limit: int = 10) -> str:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT DISTINCT tag_name, unit, description
|
||||
SELECT tagname, livevalue, timestamp, node_id
|
||||
FROM realtime_table
|
||||
WHERE tag_name ILIKE %s
|
||||
ORDER BY tag_name
|
||||
WHERE tagname ILIKE %s
|
||||
ORDER BY tagname
|
||||
LIMIT %s
|
||||
""",
|
||||
(f"%{query}%", limit),
|
||||
)
|
||||
columns = ["tag_name", "unit", "description"]
|
||||
columns = ["tag_name", "current_value", "last_updated", "node_id"]
|
||||
rows = cur.fetchall()
|
||||
data = [dict(zip(columns, row)) for row in rows]
|
||||
data = [
|
||||
{
|
||||
"tag_name": r[0],
|
||||
"current_value": r[1],
|
||||
"last_updated": r[2].isoformat() if r[2] else None,
|
||||
"node_id": r[3],
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
|
||||
Reference in New Issue
Block a user