From 908bfe151fde371e731089bca07adb8dfc4b1fba Mon Sep 17 00:00:00 2001 From: windpacer Date: Wed, 13 May 2026 20:22:27 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20Knowledge=20Base=20RAG=20=EC=8B=9C?= =?UTF-8?q?=EC=8A=A4=ED=85=9C=20+=20=EC=B1=84=ED=8C=85=20LLM=20=EA=B0=9C?= =?UTF-8?q?=EC=84=A0=20(Phase=200~5=20=EC=99=84=EB=A3=8C)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - KB RAG 전체 파이프라인: 업로드, 파싱(xlsx/pdf/docx/text), 임베딩, Qdrant 인덱싱 - KB 관리 UI(14번 탭): 로그인, 문서 목록, 업로드, 삭제, 재인덱스 - OllamaController: 한글 시스템 프롬프트, plant_context.md 외부 파일화, SSE tool_start/tool_result 이벤트 - 프론트: 툴 실행 카드, KB 인용 링크, 표 자동 렌더, 추천 질문 칩 - nl2sql_worker: history_table.recorded_at 사용, tag_metadata 응답 개선 - DB: KB 테이블 5개 DDL + 시드, pgcrypto 확장 --- CLAUDE.md | 142 +++++ mcp-server/parsers/__init__.py | 8 + mcp-server/parsers/docx_parser.py | 41 ++ mcp-server/parsers/pdf_parser.py | 34 ++ mcp-server/parsers/text_parser.py | 56 ++ mcp-server/parsers/xlsx_parser.py | 49 ++ mcp-server/pyproject.toml | 4 + mcp-server/server.py | 268 ++++++++- mcp-server/uv.lock | 75 +++ mcp-server/worker/nl2sql_worker.py | 31 +- ...LM채팅+지식증강-phase5-사용자체크리스트.md | 142 +++++ plans/LLM채팅+지식증강플랜.md | 101 ++-- plans/phase5-사용자점검리스트.md | 0 prompts/plant_context.md | 27 + src/Core/Domain/Entities/ExperionEntities.cs | 78 +++ .../Database/ExperionDbContext.cs | 146 +++++ src/Infrastructure/Kb/KbAuthService.cs | 149 +++++ src/Infrastructure/Kb/KbEmbeddingClient.cs | 49 ++ src/Infrastructure/Kb/KbIngestWorker.cs | 228 ++++++++ src/Infrastructure/Kb/KbQdrantClient.cs | 86 +++ src/Infrastructure/Kb/KbStartupService.cs | 48 ++ src/Infrastructure/Kb/KbStorageService.cs | 73 +++ src/Infrastructure/Kb/PasswordHasher.cs | 55 ++ src/Web/Controllers/KbAuthController.cs | 68 +++ src/Web/Controllers/KbController.cs | 337 +++++++++++ src/Web/Controllers/OllamaController.cs | 130 ++++- src/Web/ExperionCrawler.csproj | 2 + src/Web/Program.cs | 20 + src/Web/appsettings.json | 9 + src/Web/wwwroot/css/style.css | 171 ++++++ src/Web/wwwroot/index.html | 136 +++++ src/Web/wwwroot/js/app.js | 530 +++++++++++++++++- 32 files changed, 3202 insertions(+), 91 deletions(-) create mode 100644 mcp-server/parsers/__init__.py create mode 100644 mcp-server/parsers/docx_parser.py create mode 100644 mcp-server/parsers/pdf_parser.py create mode 100644 mcp-server/parsers/text_parser.py create mode 100644 mcp-server/parsers/xlsx_parser.py create mode 100644 plans/LLM채팅+지식증강-phase5-사용자체크리스트.md create mode 100644 plans/phase5-사용자점검리스트.md create mode 100644 prompts/plant_context.md create mode 100644 src/Infrastructure/Kb/KbAuthService.cs create mode 100644 src/Infrastructure/Kb/KbEmbeddingClient.cs create mode 100644 src/Infrastructure/Kb/KbIngestWorker.cs create mode 100644 src/Infrastructure/Kb/KbQdrantClient.cs create mode 100644 src/Infrastructure/Kb/KbStartupService.cs create mode 100644 src/Infrastructure/Kb/KbStorageService.cs create mode 100644 src/Infrastructure/Kb/PasswordHasher.cs create mode 100644 src/Web/Controllers/KbAuthController.cs create mode 100644 src/Web/Controllers/KbController.cs diff --git a/CLAUDE.md b/CLAUDE.md index 815a5c5..a7c750c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,6 +7,148 @@ ## 완료된 작업 +### 기능 추가 — 로컬 LLM 채팅 + 지식 베이스(RAG) Phase 0~5 (2026-05-13) + +#### 배경 +운전원이 채팅 UI에서 자연어로 공장 상황·계기 상태·정비 이력 등을 질문하면, (a) PostgreSQL 시계열/이벤트 데이터와 (b) 관리자가 업로드한 KB 문서(Qdrant 벡터 검색)를 합성해 답하도록 통합. 별도 14번 탭 "RAG 관리"에서 관리자 비번 인증 후 문서 업로드/인덱싱/관리. + +설계서: `plans/LLM채팅+지식증강플랜.md` + +#### 아키텍처 +``` +[채팅 #13] ── /api/ollama/vllm/chat/stream ──► [OllamaController] + │ SSE: message, tool_start, tool_result + │ tool_calls 루프(최대 10라운드) + ▼ + [McpClient → Python MCP] + │ + ┌───────────────────────────────┴───────────┐ + ▼ ▼ + PostgreSQL Qdrant 컬렉션 7개 + (history/realtime/event/ ├── ws-65f457145aee80b2 (코드) + tag_metadata/kb_*) ├── experion-opc-docs (R530) + └── kb_* 5개 (사용자 KB) + +[RAG 관리 #14] ── /api/kb/{auth,upload,documents,jobs,download,...} ──► + [KbAuthController + KbController + KbIngestWorker(BackgroundService)] + │ Argon2id 비번 + 60분 세션 토큰 + │ 업로드 → storage/kb/{yyyy-MM}/{uuid}.ext (SHA256) + │ 큐 폴링 2초 → parse(MCP) → embed(Ollama 768-dim) → upsert(Qdrant) + │ attempts ≥3 = failed +``` + +#### KB 데이터 모델 (PostgreSQL, 자동 마이그레이션) +- `kb_collections` (5종 시드: system_instrument / plant_operation / procedure / report / vendor_doc) +- `kb_documents` (UUID PK + collection_key FK + status: pending/parsing/embedding/indexed/failed/disabled) +- `kb_ingest_jobs` (stage: parse, attempts, finished_at) +- `kb_admin_credential` (단일 행, Argon2id 해시) +- `kb_admin_sessions` (60분 만료) + +#### Phase별 구현 + +**Phase 0 — 사전 정비** +| 파일 | 수정 내용 | +|------|----------| +| `mcp-server/worker/nl2sql_worker.py` | `time_bucket('1 min', ts)` → history_table.recorded_at 직접 SELECT, `_get_tag_metadata`도 server.py 형식과 일치 | +| `mcp-server/llm-model.json` | 실제 서빙 모델(`Qwen3.6-27B-FP8`)과 이미 동기화 — 변경 없음 | +| `prompts/plant_context.md` | 신규 (빈 골격) — 단위/계기 약어/태그 규칙/예시 자유 작성 영역 | +| `src/Web/Controllers/OllamaController.cs` | `ComposeSystemPrompt(userPrompt, toolsEnabled)` 추가 — `BaseSystemPromptKo` + plant_context.md + `ToolGuideKo` + 사용자 입력 순서로 합성 | +| `src/Web/wwwroot/js/app.js` | 영문 하드코딩된 tool description 제거 (서버에서 합성) | +| `src/Web/appsettings.json` | `PromptsDirectory: "../../prompts"` 추가 | + +**Phase 1 — 데이터 모델 & 인증** +| 파일 | 수정 내용 | +|------|----------| +| `src/Core/Domain/Entities/ExperionEntities.cs` | `KbCollection / KbDocument / KbIngestJob / KbAdminCredential / KbAdminSession` 5개 엔티티 추가 | +| `src/Infrastructure/Database/ExperionDbContext.cs` | DbSet 5개 + OnModelCreating 인덱스 + InitializeAsync에 DDL/시드 (pgcrypto 활성화 포함) | +| `src/Infrastructure/Kb/KbQdrantClient.cs` | 신규 — `EnsureCollectionAsync`, `DeleteByDocAsync`, `UpsertAsync` | +| `src/Infrastructure/Kb/KbStartupService.cs` | 신규 IHostedService — 부팅 시 활성 컬렉션 5개 Qdrant ensure | +| `src/Infrastructure/Kb/PasswordHasher.cs` | 신규 — Argon2id (4 thread, 64MB, 3 iter) + `NewSessionToken` | +| `src/Infrastructure/Kb/KbAuthService.cs` | 신규 — `EnsureCredentialAsync` (env or 자동생성), Login/Validate/Logout/ChangePassword | +| `src/Web/Controllers/KbAuthController.cs` | 신규 — `/api/kb/auth/{login\|logout\|status\|change-password}`, `X-Kb-Token` 헤더 | +| `src/Web/ExperionCrawler.csproj` | `Konscious.Security.Cryptography.Argon2 v1.3.1` 추가 | +| `src/Web/Program.cs` | KB 서비스 등록 + 부팅 시 `EnsureCredentialAsync` 호출 | + +**Phase 2 — 업로드 & 비동기 워커** +| 파일 | 수정 내용 | +|------|----------| +| `src/Infrastructure/Kb/KbStorageService.cs` | 신규 — `storage/kb/{yyyy-MM}/{uuid}.{ext}`, SHA256 스트림 계산 | +| `src/Infrastructure/Kb/KbEmbeddingClient.cs` | 신규 — Ollama nomic-embed-text(`/api/embeddings`) 768-dim | +| `src/Infrastructure/Kb/KbIngestWorker.cs` | 신규 BackgroundService — 2초 폴링, parse→embed→index 단일 패스, attempts ≥3=failed | +| `src/Web/Controllers/KbController.cs` | 신규 — upload(multipart, RequestSizeLimit 500MB), documents 페이지네이션, jobs 조회, download(Content-Disposition), delete(Qdrant+storage 동시정리), reindex, disable, bulk-disable, purge-disabled | +| `mcp-server/parsers/` | 신규 디렉터리 — `xlsx_parser`(시트+행), `pdf_parser`(페이지+표), `docx_parser`(헤딩 path), `text_parser`(md/txt) | +| `mcp-server/server.py` | `@mcp.tool() parse_document(doc_id, title, file_path, mime_type, collection_key, chunking_policy)` 추가 | +| `mcp-server/pyproject.toml` | `openpyxl / python-docx / pdfplumber` 의존성 추가 | + +**Phase 3 — 관리 탭 #14** +| 파일 | 수정 내용 | +|------|----------| +| `src/Web/wwwroot/index.html` | 사이드바 14번 탭 + `pane-kbadmin` 섹션 (로그인 카드, 필터, 문서 테이블, 업로드/비번변경 모달) | +| `src/Web/wwwroot/js/app.js` | `kbLogin / kbLogout / kbLoadCollections / kbRefresh / kbRenderDocs / kbUpload* / kbDelete / kbReindex / kbDisable / kbBulkDisable / kbPurgeDisabled / kbChangePw*` + 1.5초 진행률 폴링 + sessionStorage 토큰 | +| `src/Web/wwwroot/css/style.css` | `.kb-login-card / .kb-main / .kb-doc-tbl / .kb-status (pending/parsing/embedding/indexed/failed/disabled 색상) / .kb-modal` | + +**Phase 4 — 다운로드 & 검색** +| 파일 | 수정 내용 | +|------|----------| +| `mcp-server/server.py` | `KB_COLLECTIONS` 상수, `_search_kb_collection` (Qdrant 단일 + tags filter), `_recency_factor` (7d+10% / 30d+5% / 90d+2%), `_search_kb_raw` (다중 컬렉션 검색→가중치→since 후필터→dedup→top_k), `@mcp.tool() search_kb`, `rag_query` 확장 (`search_kb`, `kb_collections` 인자) | + +**Phase 5 — 채팅 통합** +| 파일 | 수정 내용 | +|------|----------| +| `src/Web/Controllers/OllamaController.cs` | `EmitToolStart(id, name, argsJson)` / `EmitToolResult(id, name, ok, payload)` 헬퍼. `VllmChatStreamWithTools`의 공식 tool_calls 경로 + JSON-텍스트 폴백 경로 모두 SSE 이벤트 발행 | +| `src/Web/wwwroot/js/app.js` | SSE 파서 버그 수정 (`event:` 라인 추적), `llmAppendToolCard / llmUpdateToolCard / llmRenderToolPayload / llmRenderTable / llmRenderKbHits` 추가, `llmKbDocMap` + `llmLinkKbCitations` (제목→다운로드 링크 치환), `LLM_STARTER_CHIPS` 7종 + `llmUseChip` | +| `src/Web/wwwroot/css/style.css` | `.llm-tool-cards / .llm-tool-card (spin 애니, ok/err 색상) / .llm-tool-tbl (sticky header) / .llm-kb-hits / .kb-cite-link / .llm-chip` | + +#### 주요 설계 결정 + +| 항목 | 결정 | +|------|------| +| 관리자 권한 | 비번 인증 (Argon2id), 세션 토큰 60분, `X-Kb-Token` 헤더 | +| 초기 비번 | 환경변수 `KB_ADMIN_INITIAL_PASSWORD` 우선, 없으면 부팅 시 콘솔에 16자 랜덤 출력 | +| 컬렉션 구조 | doc_type별 5개 분리 컬렉션 (마스터 시드) + 자유 태그 | +| 임베딩 모델 | 기존 `nomic-embed-text` (768-dim) — Phase 0~5는 그대로, BGE-M3 마이그레이션은 보류 | +| 청킹 정책 | xlsx 시트+행 둘 다, pdf 페이지+표 별도, docx 헤딩 path, md/txt 단순 | +| 재인덱스/삭제 | Qdrant(`doc_id` filter)와 storage 파일 동시 정리 | +| Worker 큐 처리 | parse→embed→index 단일 패스(한 잡으로 끝까지), attempts ≥3 = failed, 2초 폴링 | +| 시스템 프롬프트 | 서버에서 합성 (한글 base + plant_context.md + tool guide + 사용자 입력) | +| SSE 이벤트 | `message` / `tool_start` / `tool_result` / `done` / `error` — 클라이언트 파서가 event-type 추적 | +| KB 인용 | search_kb 결과 title↔doc_id 매핑 누적, 본문에 등장 시 다운로드 링크로 자동 치환 | +| 자동 표 렌더 | `{success, columns, data}` 또는 `data:[{...}]` 형태 감지 시 최대 50행 HTML 테이블 | + +#### API 엔드포인트 (신규) +- `POST /api/kb/auth/login` / `logout` / `change-password`, `GET /api/kb/auth/status` +- `GET /api/kb/collections` — 활성 컬렉션 + 문서/청크 카운트 +- `POST /api/kb/upload` (multipart, admin) — 즉시 doc_id 반환, 큐 적재 +- `GET /api/kb/documents?collection=&status=&q=&page=&pageSize=` +- `GET /api/kb/documents/{id}` / `DELETE /api/kb/documents/{id}` (admin) +- `POST /api/kb/documents/{id}/reindex` / `/disable` (admin) +- `POST /api/kb/documents/bulk-disable` / `/purge-disabled` (admin) +- `GET /api/kb/jobs?docId=&stage=&pendingOnly=` +- `GET /api/kb/download/{docId}` — Content-Disposition 원본 스트림 (인증 X) + +#### MCP 도구 (신규) +- `parse_document(doc_id, title, file_path, mime_type, collection_key, chunking_policy)` — KbIngestWorker 전용 +- `search_kb(query, collection_keys?, top_k=8, tags?, since?, boost_recent=True)` — 채팅 노출 +- `rag_query` 확장 — `search_kb=False`, `kb_collections=None` 옵션 + +#### 빌드 결과 +- `dotnet build` — 경고 0건, 에러 0건 +- `mcp-server` Python 6개 파일 (server.py, nl2sql_worker.py, parsers/*) syntax OK + +#### 런타임 셋업 (코드 외) +- `cd mcp-server && uv pip install -e .` — Phase 2에서 추가된 `openpyxl/python-docx/pdfplumber` 설치 +- `mcp-server` 재시작 — `parse_document`, `search_kb` 새 도구 인식 +- 앱 첫 기동 후 콘솔의 `[Kb] 관리자 초기 비밀번호 자동 생성: XXXX` 로그 → 14번 탭에서 즉시 변경 +- Qdrant 5개 컬렉션 생성 확인 — `curl http://localhost:6333/collections` + +#### 잔여 작업 +- Phase 6 (보강 도구): `query_events`, `summarize_events`, `active_alarms`, `find_tags`, `generate_status_report`, `run_sql` LIMIT/timeout +- Phase 7 (옵션): NL2SQL 의도 라우터, 대화 요약, 에이전트 모드, KB 청크 미리보기 UI +- Phase 5 후순위: 시계열 미니 스파클라인, 툴 카드 메시지 영구 보존 +- 결정 보류: 현장 재고 데이터 출처, 임베딩 모델 BGE-M3 마이그레이션 + +--- + ### 기능 추가 — OPC UA 서버 기능 (2026-04-15) #### 배경 diff --git a/mcp-server/parsers/__init__.py b/mcp-server/parsers/__init__.py new file mode 100644 index 0000000..0824f15 --- /dev/null +++ b/mcp-server/parsers/__init__.py @@ -0,0 +1,8 @@ +"""KB 문서 파서 모음. + +각 모듈은 `parse(path: str) -> list[dict]` 인터페이스를 제공한다. +반환 청크는 다음 키를 가진다: + text: str 임베딩 대상 본문 (보통 200~1500자) + chunk_kind: str row | sheet | section | table | page | paragraph | heading + locator: str 사람 가독 위치 문자열 (예: "sheet=Pump-A; row=12") +""" diff --git a/mcp-server/parsers/docx_parser.py b/mcp-server/parsers/docx_parser.py new file mode 100644 index 0000000..8abfc88 --- /dev/null +++ b/mcp-server/parsers/docx_parser.py @@ -0,0 +1,41 @@ +"""docx 청킹 — 헤딩 경로 별 청크.""" +from __future__ import annotations + + +def parse(path: str) -> list[dict]: + from docx import Document + + doc = Document(path) + chunks: list[dict] = [] + + cur_path: list[str] = [] + buf: list[str] = [] + + def flush(): + if buf: + heading = " / ".join(cur_path) if cur_path else "preface" + chunks.append({ + "text": "\n".join(buf).strip(), + "chunk_kind": "heading", + "locator": f"heading={heading}", + }) + + for p in doc.paragraphs: + text = (p.text or "").strip() + if not text: + continue + + style_name = (p.style.name or "").lower() if p.style else "" + if style_name.startswith("heading"): + flush() + buf = [] + try: + level = int(style_name.split()[-1]) + except (ValueError, IndexError): + level = 1 + cur_path = cur_path[: max(0, level - 1)] + [text] + else: + buf.append(text) + + flush() + return chunks diff --git a/mcp-server/parsers/pdf_parser.py b/mcp-server/parsers/pdf_parser.py new file mode 100644 index 0000000..04ceab1 --- /dev/null +++ b/mcp-server/parsers/pdf_parser.py @@ -0,0 +1,34 @@ +"""pdf 청킹 — pdfplumber로 페이지/표 추출, 헤딩 분리 실패 시 페이지 단위 fallback.""" +from __future__ import annotations + + +def parse(path: str) -> list[dict]: + import pdfplumber + + chunks: list[dict] = [] + with pdfplumber.open(path) as pdf: + for pno, page in enumerate(pdf.pages, start=1): + txt = (page.extract_text() or "").strip() + if txt: + chunks.append({ + "text": txt[:5000], + "chunk_kind": "page", + "locator": f"page={pno}", + }) + + try: + tables = page.extract_tables() or [] + except Exception: + tables = [] + for ti, table in enumerate(tables, start=1): + rows = [[(c or "").strip() for c in row] for row in table if row] + if not rows: + continue + md = "\n".join(" | ".join(r) for r in rows[:200]) + chunks.append({ + "text": md, + "chunk_kind": "table", + "locator": f"page={pno}; table={ti}", + }) + + return chunks diff --git a/mcp-server/parsers/text_parser.py b/mcp-server/parsers/text_parser.py new file mode 100644 index 0000000..30f3c97 --- /dev/null +++ b/mcp-server/parsers/text_parser.py @@ -0,0 +1,56 @@ +"""md / txt 청킹 — md는 # 헤딩 단위, txt는 빈 줄 두 개 단위.""" +from __future__ import annotations +import os + + +def parse(path: str) -> list[dict]: + ext = os.path.splitext(path)[1].lower() + with open(path, "r", encoding="utf-8", errors="ignore") as f: + content = f.read() + + if ext == ".md": + return _parse_md(content) + return _parse_txt(content) + + +def _parse_md(text: str) -> list[dict]: + chunks: list[dict] = [] + lines = text.split("\n") + + cur_heading = "preface" + buf: list[str] = [] + section_idx = 0 + + def flush(): + nonlocal section_idx + body = "\n".join(buf).strip() + if body: + section_idx += 1 + chunks.append({ + "text": body, + "chunk_kind": "heading", + "locator": f"heading={cur_heading}", + }) + + for ln in lines: + s = ln.lstrip() + if s.startswith("#"): + flush() + buf = [] + cur_heading = s.lstrip("#").strip() or "section" + else: + buf.append(ln) + flush() + return chunks + + +def _parse_txt(text: str) -> list[dict]: + chunks: list[dict] = [] + parts = [p.strip() for p in text.split("\n\n") if p.strip()] + for i, p in enumerate(parts, start=1): + chunks.append({ + "text": p, + "chunk_kind": "paragraph", + "locator": f"paragraph={i}", + }) + return chunks diff --git a/mcp-server/parsers/xlsx_parser.py b/mcp-server/parsers/xlsx_parser.py new file mode 100644 index 0000000..2246bb1 --- /dev/null +++ b/mcp-server/parsers/xlsx_parser.py @@ -0,0 +1,49 @@ +"""xlsx 청킹 — 시트 단위(markdown) + 행 단위 둘 다 생성.""" +from __future__ import annotations + + +def parse(path: str) -> list[dict]: + from openpyxl import load_workbook + + wb = load_workbook(path, read_only=True, data_only=True) + chunks: list[dict] = [] + + for sheet in wb.worksheets: + rows = list(sheet.iter_rows(values_only=True)) + if not rows: + continue + + header = [str(c) if c is not None else "" for c in rows[0]] + sheet_name = sheet.title + + # 1) 시트 청크 — markdown 표 (선두 1000행 제한) + body_rows = rows[1:1001] + md_lines = ["| " + " | ".join(header) + " |", + "| " + " | ".join(["---"] * len(header)) + " |"] + for r in body_rows: + cells = [str(c) if c is not None else "" for c in r] + cells += [""] * (len(header) - len(cells)) + md_lines.append("| " + " | ".join(cells[: len(header)]) + " |") + chunks.append({ + "text": "\n".join(md_lines), + "chunk_kind": "sheet", + "locator": f"sheet={sheet_name}", + }) + + # 2) 행 청크 — 각 행을 'col=val' 형식 한 줄로 + for i, r in enumerate(rows[1:], start=2): + parts = [] + for j, val in enumerate(r): + if val is None or val == "": + continue + col = header[j] if j < len(header) and header[j] else f"col{j+1}" + parts.append(f"{col}={val}") + if not parts: + continue + chunks.append({ + "text": f"{sheet_name}: " + ", ".join(parts), + "chunk_kind": "row", + "locator": f"sheet={sheet_name}; row={i}", + }) + + return chunks diff --git a/mcp-server/pyproject.toml b/mcp-server/pyproject.toml index 7f495c6..b78bce4 100644 --- a/mcp-server/pyproject.toml +++ b/mcp-server/pyproject.toml @@ -24,6 +24,10 @@ dependencies = [ "scikit-learn>=1.3.0", "numpy>=1.24.0", "Pillow>=10.0.0", + # KB 문서 파싱 + "openpyxl>=3.1.0", + "python-docx>=1.1.0", + "pdfplumber>=0.11.0", ] [project.scripts] diff --git a/mcp-server/server.py b/mcp-server/server.py index 389e8df..1035f15 100644 --- a/mcp-server/server.py +++ b/mcp-server/server.py @@ -31,6 +31,15 @@ VLLM_MODEL = get_vllm_model() COL_CODEBASE = "ws-65f457145aee80b2" # ExperionCrawler 소스코드 COL_OPC_DOCS = "experion-opc-docs" # Experion HS R530 OPC UA 공식 문서 (266 chunks) +# 사용자 KB 컬렉션 (kb_collections 시드 5종과 일치) +KB_COLLECTIONS = { + "system_instrument": "kb_system_instrument", + "plant_operation": "kb_plant_operation", + "procedure": "kb_procedure", + "report": "kb_report", + "vendor_doc": "kb_vendor_doc", +} + # PostgreSQL 연결 DB_CONNECTION_STRING = os.environ.get("DB_CONNECTION_STRING", "postgresql://postgres:postgres@localhost:5432/iiot_platform") DB_TIMEOUT = int(os.environ.get("DB_TIMEOUT", "10")) @@ -248,6 +257,60 @@ async def _search(collection: str, query: str, top_k: int, threshold: float = 0. return "\n\n---\n\n".join(parts) + +async def _search_kb_collection( + qdrant_name: str, + vec: list[float], + top_k: int, + tags: list[str] | None = None, +) -> list[dict]: + """KB 컬렉션 1개에 대해 의미 검색. 결과를 정규화된 dict 리스트로 반환.""" + must = [] + if tags: + must.append({"key": "tags", "match": {"any": tags}}) + + body: dict = { + "vector": vec, + "limit": top_k, + "with_payload": True, + "score_threshold": 0.20, + } + if must: + body["filter"] = {"must": must} + + def _call(): + with httpx.Client(timeout=20) as client: + resp = client.post(f"{QDRANT_URL}/collections/{qdrant_name}/points/search", json=body) + if resp.status_code == 404: + return [] + resp.raise_for_status() + return resp.json().get("result", []) + + try: + return await asyncio.to_thread(_call) + except Exception as e: + logging.warning(f"[search_kb] {qdrant_name} 검색 실패: {e}") + return [] + + +def _recency_factor(uploaded_at_iso: str | None) -> float: + """uploaded_at 기준 최신 가중치. 최근 7일 +10%, 30일 +5%, 90일 +2%, 그 외 1.0.""" + if not uploaded_at_iso: + return 1.0 + try: + from datetime import datetime, timezone + ts = datetime.fromisoformat(uploaded_at_iso.replace("Z", "+00:00")) + if ts.tzinfo is None: + ts = ts.replace(tzinfo=timezone.utc) + age = (datetime.now(timezone.utc) - ts).total_seconds() / 86400.0 + if age < 7: return 1.10 + if age < 30: return 1.05 + if age < 90: return 1.02 + return 1.0 + except Exception: + return 1.0 + + # ── DB 헬퍼 ────────────────────────────────────────────────────────────────── async def _get_db_connection(): @@ -406,25 +469,161 @@ def ask_iiot_llm(question: str, context: str = "") -> str: @mcp.tool() -async def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str: +async def rag_query( + question: str, + search_code: bool = False, + search_docs: bool = True, + search_kb: bool = False, + kb_collections: list[str] | None = None, +) -> str: """검색 → LLM 답변 생성 (통합 RAG). - 기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False). - ExperionCrawler 코드도 함께 보려면 search_code=True 추가. + 기본값: Experion HS R530 공식 문서만 검색. + 사용자 KB 검색을 포함하려면 search_kb=True. 코드 검색은 search_code=True. Args: - question: 질문 - search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True) - search_code: ExperionCrawler 소스코드 검색 여부 (기본 False) + question: 질문 + search_docs: Experion HS R530 공식 문서 검색 여부 (기본 True) + search_code: ExperionCrawler 소스코드 검색 여부 (기본 False) + search_kb: 사용자 KB 검색 여부 (기본 False) + kb_collections: 검색 대상 KB 컬렉션 키 목록. None이면 전체. + 예: ["plant_operation", "procedure"] """ context_parts: list[str] = [] if search_docs: context_parts.append(f"=== Experion HS R530 공식 문서 ===\n{await _search(COL_OPC_DOCS, question, 4)}") if search_code: context_parts.append(f"=== ExperionCrawler 구현 코드 ===\n{await _search(COL_CODEBASE, question, 3)}") + if search_kb: + kb_text = await _format_kb_results(question, kb_collections, top_k=6) + context_parts.append(f"=== 사용자 지식 베이스 ===\n{kb_text}") return ask_iiot_llm(question, "\n\n".join(context_parts)) +async def _format_kb_results( + query: str, + collection_keys: list[str] | None, + top_k: int, + tags: list[str] | None = None, + since: str | None = None, + boost_recent: bool = True, +) -> str: + """search_kb 내부 헬퍼: 다중 컬렉션 의미검색 후 인용 텍스트로 직렬화.""" + hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent) + if not hits: + return "관련 KB 결과 없음." + + parts = [] + for h in hits: + title = h.get("title") or "(제목없음)" + loc = h.get("locator") or "" + score = h.get("score", 0.0) + text = (h.get("text") or "").strip() + # 인용 헤더: "[score=0.812] 정비이력_2026Q1.xlsx > 시트:Pump-A > 행 12" + loc_str = f" > {loc}" if loc else "" + parts.append(f"[score={score:.3f}] {title}{loc_str}\n{text[:700]}") + return "\n\n---\n\n".join(parts) + + +async def _search_kb_raw( + query: str, + collection_keys: list[str] | None, + top_k: int, + tags: list[str] | None, + since: str | None, + boost_recent: bool, +) -> list[dict]: + """KB 검색 핵심 로직 — 다중 컬렉션 의미검색 + 최신 가중치 + 후필터.""" + targets = collection_keys or list(KB_COLLECTIONS.keys()) + qdrant_names = [KB_COLLECTIONS[k] for k in targets if k in KB_COLLECTIONS] + if not qdrant_names: + return [] + + vec = await _embed(query) + per_coll_k = max(top_k, 8) + + results: list[dict] = [] + for qname in qdrant_names: + hits = await _search_kb_collection(qname, vec, per_coll_k, tags=tags) + for h in hits: + p = h.get("payload", {}) + uploaded_at = p.get("uploaded_at") + + if since and uploaded_at: + try: + if uploaded_at < since: + continue + except Exception: + pass + + base_score = h.get("score", 0.0) + recency = _recency_factor(uploaded_at) if boost_recent else 1.0 + results.append({ + "score": base_score * recency, + "raw_score": base_score, + "doc_id": p.get("doc_id"), + "collection_key": p.get("collection_key"), + "title": p.get("title"), + "text": p.get("text", ""), + "chunk_kind": p.get("chunk_kind"), + "locator": p.get("locator"), + "uploaded_at": uploaded_at, + "tags": p.get("tags") or [], + }) + + # 점수 내림차순 정렬, 동일 doc_id 중복 dedup(최고점만) + results.sort(key=lambda r: r["score"], reverse=True) + seen: set[str] = set() + unique: list[dict] = [] + for r in results: + key = f'{r.get("doc_id")}::{r.get("locator")}' + if key in seen: + continue + seen.add(key) + unique.append(r) + if len(unique) >= top_k: + break + return unique + + +@mcp.tool() +async def search_kb( + query: str, + collection_keys: list[str] | None = None, + top_k: int = 8, + tags: list[str] | None = None, + since: str | None = None, + boost_recent: bool = True, +) -> str: + """사용자 지식 베이스(KB) 다중 컬렉션 의미 검색. + + 관리탭에서 업로드/인덱싱한 문서에서 질의와 의미적으로 가까운 청크를 찾는다. + + Args: + query: 검색어 또는 자연어 질문 + collection_keys: 대상 컬렉션 키 목록. None이면 전체. + 가능한 값: system_instrument, plant_operation, + procedure, report, vendor_doc + top_k: 반환 결과 수 (기본 8) + tags: 태그 필터 (any 매칭). 예: ["unit-a", "P-6201"] + since: 이 ISO 시각 이후 업로드된 문서만. 예: "2026-04-01T00:00:00Z" + boost_recent: True이면 uploaded_at 기준 최신 가중치 적용 (기본 True) + + Returns: + JSON 문자열: { success, count, hits: [{ doc_id, collection_key, title, + text, chunk_kind, locator, score, uploaded_at, tags }, ...] } + """ + try: + hits = await _search_kb_raw(query, collection_keys, top_k, tags, since, boost_recent) + return json.dumps( + {"success": True, "count": len(hits), "hits": hits}, + ensure_ascii=False, + default=str, + ) + except Exception as e: + return json.dumps({"success": False, "error": f"search_kb 실패: {e}"}, ensure_ascii=False) + + # ── NL2SQL 도구 ─────────────────────────────────────────────────────────────── async def _execute_sql_internal(sql: str) -> str: @@ -1224,6 +1423,63 @@ async def parse_pid_drawing(filepath: str) -> str: +# ── KB ingest 파서 ──────────────────────────────────────────────────────────── + +@mcp.tool() +async def parse_document( + doc_id: str, + title: str, + file_path: str, + mime_type: str = "", + collection_key: str = "", + chunking_policy: str = "", +) -> str: + """KB ingest 파서. 파일 확장자에 따라 적절한 청킹을 수행한다. + + Args: + doc_id: 문서 ID (UUID 문자열) + title: 제목 (오류 메시지에만 사용) + file_path: 절대 경로 + mime_type: 정보용 (옵션) + collection_key: 정보용 (옵션) + chunking_policy: JSON 문자열, 향후 정책 분기에 사용 + + Returns: + JSON 문자열: {"success": true, "chunks": [{"text", "chunk_kind", "locator"}, ...]} + or {"success": false, "error": "..."} + """ + import os + if not os.path.isfile(file_path): + return json.dumps({"success": False, "error": f"file not found: {file_path}"}, ensure_ascii=False) + + ext = os.path.splitext(file_path)[1].lower() + try: + if ext in (".xlsx", ".xlsm"): + from parsers import xlsx_parser + chunks = await asyncio.to_thread(xlsx_parser.parse, file_path) + elif ext == ".pdf": + from parsers import pdf_parser + chunks = await asyncio.to_thread(pdf_parser.parse, file_path) + elif ext == ".docx": + from parsers import docx_parser + chunks = await asyncio.to_thread(docx_parser.parse, file_path) + elif ext in (".md", ".txt", ".markdown"): + from parsers import text_parser + chunks = await asyncio.to_thread(text_parser.parse, file_path) + else: + return json.dumps( + {"success": False, "error": f"unsupported extension: {ext}"}, + ensure_ascii=False + ) + + return json.dumps( + {"success": True, "doc_id": doc_id, "chunks": chunks, "count": len(chunks)}, + ensure_ascii=False + ) + except Exception as e: + return json.dumps({"success": False, "error": f"parse failed: {e}"}, ensure_ascii=False) + + # ── 엔트리포인트 ────────────────────────────────────────────────────────────── def main(): diff --git a/mcp-server/uv.lock b/mcp-server/uv.lock index b5ef0c8..a467e2c 100644 --- a/mcp-server/uv.lock +++ b/mcp-server/uv.lock @@ -1226,11 +1226,14 @@ dependencies = [ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "openai" }, + { name = "openpyxl" }, { name = "paddleocr" }, { name = "paddlepaddle" }, + { name = "pdfplumber" }, { name = "pillow" }, { name = "psycopg", extra = ["binary"] }, { name = "pymupdf" }, + { name = "python-docx" }, { name = "qdrant-client" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -1246,11 +1249,14 @@ requires-dist = [ { name = "mcp", extras = ["cli"], specifier = ">=1.0.0" }, { name = "numpy", specifier = ">=1.24.0" }, { name = "openai", specifier = ">=1.0.0" }, + { name = "openpyxl", specifier = ">=3.1.0" }, { name = "paddleocr", specifier = ">=2.6.0,<2.7.0" }, { name = "paddlepaddle", specifier = ">=2.6.0,<3.0.0" }, + { name = "pdfplumber", specifier = ">=0.11.0" }, { name = "pillow", specifier = ">=10.0.0" }, { name = "psycopg", extras = ["binary"], specifier = ">=3.1.0" }, { name = "pymupdf", specifier = ">=1.24.0" }, + { name = "python-docx", specifier = ">=1.1.0" }, { name = "qdrant-client", specifier = ">=1.9.0" }, { name = "scikit-learn", specifier = ">=1.3.0" }, { name = "sentence-transformers", specifier = ">=3.0.0" }, @@ -2597,6 +2603,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/2b/f8434233fab2bd66a02ec014febe4e5adced20e2693e0e90a07d118ed30e/pandas-3.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:5371b72c2d4d415d08765f32d689217a43227484e81b2305b52076e328f6f482", size = 9455341, upload-time = "2026-03-31T06:48:28.418Z" }, ] +[[package]] +name = "pdfminer-six" +version = "20251230" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "charset-normalizer" }, + { name = "cryptography" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/46/9a/d79d8fa6d47a0338846bb558b39b9963b8eb2dfedec61867c138c1b17eeb/pdfminer_six-20251230.tar.gz", hash = "sha256:e8f68a14c57e00c2d7276d26519ea64be1b48f91db1cdc776faa80528ca06c1e", size = 8511285, upload-time = "2025-12-30T15:49:13.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/d7/b288ea32deb752a09aab73c75e1e7572ab2a2b56c3124a5d1eb24c62ceb3/pdfminer_six-20251230-py3-none-any.whl", hash = "sha256:9ff2e3466a7dfc6de6fd779478850b6b7c2d9e9405aa2a5869376a822771f485", size = 6591909, upload-time = "2025-12-30T15:49:10.76Z" }, +] + +[[package]] +name = "pdfplumber" +version = "0.11.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pdfminer-six" }, + { name = "pillow" }, + { name = "pypdfium2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/37/9ca3519e92a8434eb93be570b131476cc0a4e840bb39c62ddb7813a39d53/pdfplumber-0.11.9.tar.gz", hash = "sha256:481224b678b2bbdbf376e2c39bf914144eef7c3d301b4a28eebf0f7f6109d6dc", size = 102768, upload-time = "2026-01-05T08:10:29.072Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/c8/cdbc975f5b634e249cfa6597e37c50f3078412474f21c015e508bfbfe3c3/pdfplumber-0.11.9-py3-none-any.whl", hash = "sha256:33ec5580959ba524e9100138746e090879504c42955df1b8a997604dd326c443", size = 60045, upload-time = "2026-01-05T08:10:27.512Z" }, +] + [[package]] name = "pillow" version = "12.2.0" @@ -3156,6 +3189,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pypdfium2" +version = "5.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/3d/dc934d3b606c51c3ecc95b6731d84b7dd7ab8e513a50b0e98a4da6c8a719/pypdfium2-5.8.0.tar.gz", hash = "sha256:049397c647e50f83115ee951c49394dab9e9ba52ebdd5a11ab1109390eb3d34e", size = 271934, upload-time = "2026-05-04T17:39:43.794Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/8c/6b75b923cb81368fa3ea7c48a0616b839620a3aeff899885bd930449b89e/pypdfium2-5.8.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:f67b6c74b716d9ac725ad1af49ae786ad813ac20823d45606d59f1fc06caa8af", size = 3374554, upload-time = "2026-05-04T17:39:05.552Z" }, + { url = "https://files.pythonhosted.org/packages/ef/61/a885c7f36efba89ec98e3d1fe95c83b48c2d6dea321e9194ac6460e7a834/pypdfium2-5.8.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:53e82bf3e6a2da170b1bda83f93b7eec57cb6efe3cacd05cba78823879a85203", size = 2831667, upload-time = "2026-05-04T17:39:08.028Z" }, + { url = "https://files.pythonhosted.org/packages/86/1f/04b5627f6dba312d3e707e5b019c9f24d8b03b5aa366866a9e02ec00f8d4/pypdfium2-5.8.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:085e633dcc89b65ff4035a4787e98ce7ae636836eb39c83dd0db26113d9774bc", size = 3450815, upload-time = "2026-05-04T17:39:09.551Z" }, + { url = "https://files.pythonhosted.org/packages/a9/77/8e3a2aba2bc4aef5abe1b1306d05b00588dc0bf7f5c850d1adf6164c786b/pypdfium2-5.8.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:bc84b7c6efede88fcfb9467f81daf416f26b973a54fc1cf4d3410d622fda6d7a", size = 3634395, upload-time = "2026-05-04T17:39:11.225Z" }, + { url = "https://files.pythonhosted.org/packages/93/11/6f2b1847d9fa457b3b7251afc2bba2706d104a0c6f01431dfae5d679a839/pypdfium2-5.8.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63bf09b2e13ba8545c930d243f0650c664a1b51314daa3b5f38df6d1a17b4bc", size = 3617413, upload-time = "2026-05-04T17:39:13.139Z" }, + { url = "https://files.pythonhosted.org/packages/ed/fd/99ce639de5ca06d21743c740dd988cd209dda623bc763ae10b8a162022e1/pypdfium2-5.8.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:937881c1698456749ed203a58db1895baa5eb7178cdb837ef84867790638da28", size = 3347639, upload-time = "2026-05-04T17:39:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/fa/47/82864cc6e26dd8969d5594c168635acb16458d35cf5fed65d6b2e32abb42/pypdfium2-5.8.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be9dc2b84a8694ad7e626bab133244e8241014d5ed1930d865a9bdf90df1e24", size = 3746404, upload-time = "2026-05-04T17:39:17.094Z" }, + { url = "https://files.pythonhosted.org/packages/82/58/e41e49bba951f61921bac7289e67fe02af5ac57192d0bbfb5f459dc3691d/pypdfium2-5.8.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f27bd82891ae302dd02d736b14809661f6d1220ee1e96dbed9b23e2811922a3", size = 4177893, upload-time = "2026-05-04T17:39:18.729Z" }, + { url = "https://files.pythonhosted.org/packages/b4/15/fa7031010d5cf6853dadb4864680a0bfb7782c5bb6a1a401e0c25c4fca87/pypdfium2-5.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26c1089cdbbdc7fe1248f6d17fe3f30214be4f287dd0196b31aaee18a1564240", size = 3665152, upload-time = "2026-05-04T17:39:20.207Z" }, + { url = "https://files.pythonhosted.org/packages/de/6a/5a3520a8b0cfa8d7fdc3f03a07ad9d6146c28ffd519330706f64fd8939a8/pypdfium2-5.8.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1c038a9290864aaa4862dd32e591993d82551ca4d152b4e8ce6d43ba37dc04a8", size = 3095365, upload-time = "2026-05-04T17:39:22.054Z" }, + { url = "https://files.pythonhosted.org/packages/32/d3/845bae4de3cfa36865959046156edb5bf9baea400ccdecdd84fdd911b0f5/pypdfium2-5.8.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f104bc1a6d8bfc1ff088aa50db13b9729cfdb3722b44975c3c457e9a7b9c7318", size = 2961801, upload-time = "2026-05-04T17:39:23.817Z" }, + { url = "https://files.pythonhosted.org/packages/99/76/cf54eabee4a172241dfcfe63533bd1e11e2162114a983453a5a40bfec114/pypdfium2-5.8.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:04ca7c57a553facf8d46c6ea8ba6fa557e698670cfa4a58e0e01fdae2f6be87d", size = 4133067, upload-time = "2026-05-04T17:39:25.619Z" }, + { url = "https://files.pythonhosted.org/packages/77/66/dcf871d19187ca04ea184a99801a6e7e556d8347aa49540fee33cda6dfc5/pypdfium2-5.8.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ad42b9c22477b32dbedcbc8232833f385d92fd0cf92822547b02383cf9a476d7", size = 3749100, upload-time = "2026-05-04T17:39:27.203Z" }, + { url = "https://files.pythonhosted.org/packages/32/67/0d456c79660959ca45ad307b4d67161d29f9ed4083ee1e8fe8c6925b7c82/pypdfium2-5.8.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:388e3119cf5ca0979b7d5f6d40b7fcd5ab49e17ed4e6de6af89ba116061acfda", size = 4339212, upload-time = "2026-05-04T17:39:29.277Z" }, + { url = "https://files.pythonhosted.org/packages/76/89/e5b0e0f7936be341c91c0f45cd70d693878894ed62aed93a6ee32e9c43c4/pypdfium2-5.8.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:aa05bbfa485ce7916217aa78d856c9f9cd86b08b20846c650392a67975ee72e9", size = 4383943, upload-time = "2026-05-04T17:39:31.287Z" }, + { url = "https://files.pythonhosted.org/packages/82/21/4502ed255f082f579cd3537c2971cf1a57778d43703a08bcd1a92253189f/pypdfium2-5.8.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:f0813a16bb39d5ebd173ea5484430bb67a89b4b181db0a636c73b64ad063c3ea", size = 3925680, upload-time = "2026-05-04T17:39:33.241Z" }, + { url = "https://files.pythonhosted.org/packages/7d/4f/2e59723e7a07779439bd885c1b4960079c9710603308888d29ac926ae69a/pypdfium2-5.8.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:a3c78f7d20dd821bec6c072efdb21a1370b9efe10fdeeb68c969e67608e25385", size = 4269560, upload-time = "2026-05-04T17:39:34.926Z" }, + { url = "https://files.pythonhosted.org/packages/34/4e/7b6b1bde3788c8b880d4b8131d95d9d339cebafb3ad9102d82e234bb65be/pypdfium2-5.8.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:86d302e207c138c827b885a72784f7b306d840646ebeae07e8efdbc39321c629", size = 4182434, upload-time = "2026-05-04T17:39:36.624Z" }, + { url = "https://files.pythonhosted.org/packages/11/7b/6ed4782e0d7a5278330598ce8c4b2df7255f4585a0b3d04520fa580d6507/pypdfium2-5.8.0-py3-none-win32.whl", hash = "sha256:3f25fd436920a907291462b41bdc0ab9f8235c3944b4c9c15398da595ffd1fed", size = 3636680, upload-time = "2026-05-04T17:39:38.49Z" }, + { url = "https://files.pythonhosted.org/packages/19/55/da7223d4202b2461f4f889b0baf10dddec3db7f88e6fd8c52db4a516eecd/pypdfium2-5.8.0-py3-none-win_amd64.whl", hash = "sha256:55592af0bddd2d62bed18e0053c546c9b72041430c5115e54870f7f6163125b0", size = 3754962, upload-time = "2026-05-04T17:39:40.13Z" }, + { url = "https://files.pythonhosted.org/packages/fc/7a/f3dcefe6ee7389aad3ca1488c177e8fbf978206de21c7a99ccf487ea38ab/pypdfium2-5.8.0-py3-none-win_arm64.whl", hash = "sha256:3f17ed97ae8a5a1705301ca93af256a5b02f9009dee4e99c5e175831d46ebd7c", size = 3548362, upload-time = "2026-05-04T17:39:42.304Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3168,6 +3230,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-docx" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lxml" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a9/f7/eddfe33871520adab45aaa1a71f0402a2252050c14c7e3009446c8f4701c/python_docx-1.2.0.tar.gz", hash = "sha256:7bc9d7b7d8a69c9c02ca09216118c86552704edc23bac179283f2e38f86220ce", size = 5723256, upload-time = "2025-06-16T20:46:27.921Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/00/1e03a4989fa5795da308cd774f05b704ace555a70f9bf9d3be057b680bcf/python_docx-1.2.0-py3-none-any.whl", hash = "sha256:3fd478f3250fbbbfd3b94fe1e985955737c145627498896a8a6bf81f4baf66c7", size = 252987, upload-time = "2025-06-16T20:46:22.506Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.2" diff --git a/mcp-server/worker/nl2sql_worker.py b/mcp-server/worker/nl2sql_worker.py index 632b72f..42a52c4 100644 --- a/mcp-server/worker/nl2sql_worker.py +++ b/mcp-server/worker/nl2sql_worker.py @@ -238,15 +238,14 @@ async def _query_pv_history(tag_names: list[str], time_from: str, time_to: str, conn = _get_db_connection() try: with conn.cursor() as cur: - # TimescaleDB의 time_bucket 함수 사용 cur.execute( """ - SELECT time_bucket('1 min', ts) AS time, tag_name, value - FROM realtime_table - WHERE tag_name = ANY(%s) - AND ts >= %s - AND ts <= %s - ORDER BY time DESC + SELECT recorded_at AS time, tagname AS tag_name, value + FROM history_table + WHERE tagname = ANY(%s) + AND recorded_at >= %s + AND recorded_at <= %s + ORDER BY recorded_at DESC, tagname LIMIT %s """, (tag_names, time_from, time_to, limit), @@ -272,17 +271,25 @@ async def _get_tag_metadata(query: str, limit: int = 10) -> str: with conn.cursor() as cur: cur.execute( """ - SELECT DISTINCT tag_name, unit, description + SELECT tagname, livevalue, timestamp, node_id FROM realtime_table - WHERE tag_name ILIKE %s - ORDER BY tag_name + WHERE tagname ILIKE %s + ORDER BY tagname LIMIT %s """, (f"%{query}%", limit), ) - columns = ["tag_name", "unit", "description"] + columns = ["tag_name", "current_value", "last_updated", "node_id"] rows = cur.fetchall() - data = [dict(zip(columns, row)) for row in rows] + data = [ + { + "tag_name": r[0], + "current_value": r[1], + "last_updated": r[2].isoformat() if r[2] else None, + "node_id": r[3], + } + for r in rows + ] return { "success": True, "query": query, diff --git a/plans/LLM채팅+지식증강-phase5-사용자체크리스트.md b/plans/LLM채팅+지식증강-phase5-사용자체크리스트.md new file mode 100644 index 0000000..9e4975e --- /dev/null +++ b/plans/LLM채팅+지식증강-phase5-사용자체크리스트.md @@ -0,0 +1,142 @@ +❯ 현재까지 작업한 것의 사용자가 해야할 일 정리해줘 + +● 사용자 액션 가이드 (Phase 0~5 구현물 사용 절차) + + 1. 사전 환경 셋업 (1회만) + + 1.1 Python 의존성 추가 설치 — Phase 2에서 openpyxl/python-docx/pdfplumber를 추가했으므로 + mcp-server 가상환경에 설치 필요: + cd ~/projects/ExperionCrawler/mcp-server + uv pip install -e . + + 1.2 외부 서비스 동작 확인 — 셋 다 떠 있어야 함: + curl -s http://localhost:6333/collections | jq # Qdrant + curl -s http://localhost:11434/api/tags | jq # Ollama (nomic-embed-text 필요) + curl -s http://localhost:8000/v1/models | jq # vLLM (Qwen3.6-27B-FP8) + - Ollama에 nomic-embed-text 모델이 없으면: ollama pull nomic-embed-text + + 1.3 (선택) 관리자 초기 비번을 환경변수로 미리 설정 — 콘솔 로그에 비번 노출이 싫으면: + export KB_ADMIN_INITIAL_PASSWORD='원하는초기비번' + 설정 안 하면 첫 부팅 시 자동 생성 후 콘솔에 1회 출력. + + --- + 2. 첫 부팅 + + 2.1 mcp-server 재시작 — 새 도구(parse_document, search_kb) 등록: + # 기존 프로세스 종료 후 + cd ~/projects/ExperionCrawler/mcp-server + # 기존 기동 방식 그대로 (예: uv run / python server.py --http 등) + + 2.2 ExperionCrawler 앱 빌드/기동: + cd ~/projects/ExperionCrawler + dotnet build src/Web/ExperionCrawler.csproj + dotnet run --project src/Web/ExperionCrawler.csproj + # 또는 deploy.sh 사용 + + 2.3 콘솔 로그 확인 — 다음 두 줄이 보이는지: + [Kb] 관리자 초기 비밀번호 자동 생성: XXXXXXXXXXXXXXXX ← 즉시 변경하세요 + [Kb] Qdrant 컬렉션 ensure 완료: 5건 + 랜덤 비번을 즉시 메모. (env var 사용했다면 "환경변수 사용" 로그 1줄만 출력) + + 2.4 Qdrant 컬렉션 5개 생성 확인: + curl -s http://localhost:6333/collections | jq '.result.collections[] | .name' + # kb_system_instrument, kb_plant_operation, kb_procedure, kb_report, kb_vendor_doc + 포함되어야 함 + + --- + 3. 관리자 첫 로그인 + 비번 변경 (필수) + + 1. 브라우저 → http://localhost:5000 → 사이드바 14 RAG 관리 클릭 + 2. 콘솔에서 받은 초기 비번 입력 → 로그인 + 3. 상단 [비밀번호 변경] → 새 비번 (6자 이상) 설정 → 자동 로그아웃 + 4. 새 비번으로 재로그인 + + --- + 4. (선택) plant_context.md 작성 + + prompts/plant_context.md는 현재 빈 골격입니다. 채워두면 채팅의 시스템 프롬프트에 자동 + 주입되어 답변 품질이 좋아집니다: + nano prompts/plant_context.md + 채울 항목 (파일 안 주석 참고): + - 단위 (Unit A: 압축, Unit B: 분리 …) + - 계기 prefix 약어 (FIC/PT/TI/XV …) + - 태그 명명 규칙 (소문자, .pv/.sp/.op/.instate0~7) + - 시간대 (UTC 저장, KST 표시) + - 예시 질문 → 도구 매핑 + + 수정 후 앱 재기동 없이 즉시 반영됩니다 (요청 시마다 파일 읽음). + + --- + 5. KB 문서 업로드 워크플로우 + + 1. 14번 탭 → [📁 파일 업로드] 클릭 + 2. 컬렉션 선택 (5종 중 1개): + - system_instrument — 계기 datasheet, P&ID 사양서 + - plant_operation — 재고, 생산현황, 정비이력, 교대일지 + - procedure — SOP, 정비 절차, 알람 대응 매뉴얼 + - report — 일/주/월 보고서 + - vendor_doc — 벤더 카탈로그, 매뉴얼 + 3. 제목(기본: 파일명) / 태그(콤마 구분, 예: unit-a, P-6201) 입력 + 4. 파일 선택 → [업로드] + 5. 목록에 새 행이 pending 상태로 등장 → 1.5초마다 자동 갱신 → parsing → embedding → indexed + ✓ + 6. 실패(failed) 시 해당 행의 에러 메시지 확인, [↻] 재인덱스 가능 + + 지원 형식: .xlsx / .xlsm / .pdf / .docx / .md / .txt + + --- + 6. 채팅에서 KB 활용 + + 1. 사이드바 13 로컬 LLM 채팅 클릭 + 2. LLM 종류 = vLLM 선택 (tool 사용은 vLLM 전용) + 3. 모델 = Qwen3.6-27B-FP8 선택 + 4. MCP 도구 ✓ 체크박스 켜기 + 5. welcome 화면의 추천 칩 중 하나 클릭 (예: "P-6201 펌프의 정비 이력") → 자동으로 입력창에 + 채워짐 → Enter + 6. 진행 상황 가시화: + - 회색 spinning 카드: 툴 호출 시작 + - 녹색 ✓: 성공 + 결과 (클릭하면 전체 펼침, 표/KB hits 자동 렌더) + - 빨간 ✗: 실패 + 에러 메시지 + 7. KB에서 가져온 문서 제목이 답변 본문에 점선 밑줄 링크로 자동 표시됨 → 클릭하면 원본 파일 + 다운로드 + + --- + 7. 검증 (옵션) + + 항목: 작은 txt 업로드 → 30초 내 indexed + 확인 방법: 14번 탭에서 상태 칩 색상 변화 + ──────────────────────────────────────── + 항목: 큰 PDF 실패 시 재시도 동작 + 확인 방법: jobs 테이블 attempts 누적 (앱 콘솔 로그) + ──────────────────────────────────────── + 항목: 세션 60분 자동 만료 + 확인 방법: 60분 방치 후 동작 시 재로그인 요구 + ──────────────────────────────────────── + 항목: KB 인용 다운로드 + 확인 방법: search_kb 호출 후 답변 안의 제목 링크 클릭 + ──────────────────────────────────────── + 항목: Qdrant 청크 확인 + 확인 방법: curl http://localhost:6333/collections/kb_plant_operation | jq + '.result.points_count' + + --- + 8. 운영 팁 + + - 재인덱스: 청킹 정책을 바꿨거나 파일 손상 시 → 행의 [↻] 버튼 + - 임시 숨김: 잘못 올렸지만 보관은 유지 → [🚫] (비활성화, 검색 결과에서 제외) + - 영구 삭제: [✖] 또는 하단 [비활성화 영구삭제(90일 이상)] + - 동일 제목 일괄 비활성화: 재업로드 후 구버전 한 번에 비활성화 — 하단 [🚫 동일 제목 일괄 + 비활성화] + - 로그 위치: mcp-server/logs/, dotnet 콘솔 — [Kb] [Kb][Worker] [Kb][Qdrant] 접두사로 grep + + --- + 9. 알려진 제약 / 다음 단계 + + - 시계열 시각화는 표만 자동 렌더됨 (스파클라인 보류) + - 툴 실행 카드는 스트리밍 중에만 보임 — 메시지 영구 보존은 Phase 6 이후 + - 이벤트/알람 도구 (active_alarms 등), find_tags, generate_status_report 매크로는 Phase + 6에서 추가 예정 — 현재 추천 칩 중 일부는 모델이 직접 SQL을 짜서 답변 + - run_sql 안전장치 (자동 LIMIT, statement_timeout)도 Phase 6 + + --- + 막힘이 있는 단계가 있으면 어디서 멈췄는지 알려주시면 함께 보겠습니다. \ No newline at end of file diff --git a/plans/LLM채팅+지식증강플랜.md b/plans/LLM채팅+지식증강플랜.md index 9f0518f..0dc6fa4 100644 --- a/plans/LLM채팅+지식증강플랜.md +++ b/plans/LLM채팅+지식증강플랜.md @@ -64,20 +64,20 @@ ### 2.1 채팅 페이지 개선 방향 (별도 사용자 결정 대기 항목 있음) -| # | 항목 | 합의 | -|---|---|---| -| C1 | 추천 질문 칩(welcome 화면) | 추천 — 구현 예정 | -| C2 | plant_context.md 시스템 프롬프트 주입 | 추천 — 구현 예정 | -| C3 | event MCP 툴 3종 (`query_events`/`summarize_events`/`active_alarms`) | 추천 — 구현 예정 | -| C4 | SSE `tool_start`/`tool_result` 이벤트 + UI 가시화 | 추천 — 구현 예정 | -| C5 | 테이블/시계열 자동 렌더링 | 추천 — 구현 예정 | -| C6 | `generate_status_report` 매크로 툴 | 추천 — 구현 예정 | -| C7 | 태그 시맨틱 검색(`find_tags`) | 추천 — 구현 예정 | -| C8 | SQL 안전장치 (LIMIT 자동/statement_timeout) | 추천 — 구현 예정 | -| C9 | NL2SQL 의도 라우터 | 검토 후 결정 | -| C10 | 대화 요약/압축 | 후순위 | -| C11 | 에이전트 모드(자율 멀티스텝) | 후순위 | -| C12 | 위 1.3 결함 픽스 | 즉시 진행 | +| # | 항목 | 합의 | 상태 (2026-05-13) | +|---|---|---|---| +| C1 | 추천 질문 칩(welcome 화면) | 추천 — 구현 예정 | ✅ 완료 (Phase 5.5) | +| C2 | plant_context.md 시스템 프롬프트 주입 | 추천 — 구현 예정 | ✅ 완료 (Phase 0.3, 빈 골격 생성) | +| C3 | event MCP 툴 3종 (`query_events`/`summarize_events`/`active_alarms`) | 추천 — 구현 예정 | ⏳ Phase 6.1 | +| C4 | SSE `tool_start`/`tool_result` 이벤트 + UI 가시화 | 추천 — 구현 예정 | ✅ 완료 (Phase 5.1, 5.2) | +| C5 | 테이블/시계열 자동 렌더링 | 추천 — 구현 예정 | ✅ 표 완료 / 스파클라인 보류 (Phase 5.4) | +| C6 | `generate_status_report` 매크로 툴 | 추천 — 구현 예정 | ⏳ Phase 6.3 | +| C7 | 태그 시맨틱 검색(`find_tags`) | 추천 — 구현 예정 | ⏳ Phase 6.2 | +| C8 | SQL 안전장치 (LIMIT 자동/statement_timeout) | 추천 — 구현 예정 | ⏳ Phase 6.4 | +| C9 | NL2SQL 의도 라우터 | 검토 후 결정 | ⏳ 보류 (Phase 7.1) | +| C10 | 대화 요약/압축 | 후순위 | ⏳ 보류 (Phase 7.2) | +| C11 | 에이전트 모드(자율 멀티스텝) | 후순위 | ⏳ 보류 (Phase 7.3) | +| C12 | 위 1.3 결함 픽스 | 즉시 진행 | ✅ 완료 (Phase 0.1, 0.2) | ### 2.2 지식 증강(RAG ingest) 결정 @@ -477,56 +477,51 @@ rag_query(question: str, ## 4. 구현 순서 (Todo) -### Phase 0 — 사전 정비 (반나절) -0.1 `mcp-server/worker/nl2sql_worker.py:244` `time_bucket('1 min', ts)` 버그 수정 -0.2 `mcp-server/llm-model.json` 모델명을 실제 vLLM 서빙명과 일치 -0.3 `OllamaController.cs:608` 시스템 프롬프트 한글화 + plant_context.md 외부 파일화 +### Phase 0 — 사전 정비 (반나절) ✅ 완료 +0.1 ✅ `mcp-server/worker/nl2sql_worker.py:244` `time_bucket('1 min', ts)` 버그 수정 (history_table.recorded_at 사용, `_get_tag_metadata`도 같이 수정) +0.2 ✅ `mcp-server/llm-model.json` — 실제 서빙 `Qwen3.6-27B-FP8`과 이미 동기화 (memory만 갱신) +0.3 ✅ `OllamaController.ComposeSystemPrompt(...)` — 한글 base + `prompts/plant_context.md`(빈 골격) + tool guide 합성, app.js 영문 하드코딩 제거 -### Phase 1 — 데이터 모델 & 인증 (1일) -1.1 PostgreSQL 마이그레이션: `kb_collections`, `kb_documents`, `kb_ingest_jobs`, - `kb_admin_credential`, `kb_admin_sessions` 테이블 생성 -1.2 시드 데이터 INSERT: kb_collections 5건 (system_instrument, plant_operation, procedure, - report, vendor_doc) -1.3 Qdrant 컬렉션 5개 생성 (kb_system_instrument, kb_plant_operation, kb_procedure, - kb_report, kb_vendor_doc) — 임베딩 차원에 맞춰 -1.4 `KbAuthController` (login/logout/status/change-password) + Argon2 해시 유틸 -1.5 첫 실행 시 초기 비번 시드 로직 +### Phase 1 — 데이터 모델 & 인증 (1일) ✅ 완료 +1.1 ✅ DDL 5개 + 시드 5건 (ExperionDbContext.InitializeAsync 자동 적용, pgcrypto 활성화) +1.2 ✅ KbStartupService — 부팅 시 활성 컬렉션 5개 Qdrant idempotent ensure (`KbQdrantClient.EnsureCollectionAsync`) +1.3 ✅ KbAuthController (login/logout/status/change-password) + `PasswordHasher` (Konscious Argon2id), `X-Kb-Token` 헤더, 초기 비번 env or 자동 생성 -### Phase 2 — 업로드 & 비동기 워커 (2일) -2.1 `KbController.Upload` — multipart 수신 → storage 저장 → kb_documents/kb_ingest_jobs INSERT -2.2 `KbIngestWorker (BackgroundService)` — 큐 폴링 + 단계별 처리 -2.3 MCP `parse_document` — xlsx (행+시트), pdf (섹션+표), docx, md/txt -2.4 MCP `_embed` 배치 호출 + Qdrant upsert (collection_key 기반 라우팅) -2.5 `KbController.Documents/Jobs` — 목록·상세·진행률 폴링 +### Phase 2 — 업로드 & 비동기 워커 (2일) ✅ 완료 +2.1 ✅ KbController.Upload — multipart 수신, `storage/kb/{yyyy-MM}/{uuid}.{ext}` 저장, SHA256, kb_documents/kb_ingest_jobs INSERT +2.2 ✅ KbIngestWorker — 2초 폴링, parse→embed→index 단일 패스, attempts ≥3=failed +2.3 ✅ MCP `parse_document` + `parsers/{xlsx,pdf,docx,text}_parser.py` (행+시트 / 페이지+표 / 헤딩 path / md헤딩·txt단락) +2.4 ✅ KbEmbeddingClient (Ollama nomic-embed-text 768-dim) + KbQdrantClient.UpsertAsync (collection_key 라우팅) +2.5 ✅ Documents/Jobs/Download/Delete/Reindex/Disable/BulkDisable/PurgeDisabled -### Phase 3 — 관리 탭 #14 (1일) -3.1 사이드바 14번 탭 추가, `
` 신설 -3.2 비번 입력 → 토큰 받아 sessionStorage 저장 -3.3 컬렉션 필터, 상태/태그 필터, 검색 -3.4 업로드 모달(드래그앤드롭 + collection_key 드롭다운 + 태그) -3.5 목록, 상세 보기, 삭제, 재인덱스, 일괄 비활성화, 비활성화 영구삭제 -3.6 1초 폴링으로 ingesting 진행률 표시 +### Phase 3 — 관리 탭 #14 (1일) ✅ 완료 +3.1 ✅ 사이드바 14번 + pane-kbadmin +3.2 ✅ 비번 로그인 → sessionStorage 토큰 (X-Kb-Token) +3.3 ✅ 컬렉션/상태/제목 필터 +3.4 ✅ 업로드 모달 (collection_key 드롭다운 강제 + 제목 + 태그) — 드래그앤드롭은 후순위 +3.5 ✅ 목록, 다운로드, 삭제, 재인덱스, 일괄 비활성화, 비활성화 영구삭제 +3.6 ✅ 1.5초 폴링 (pending/parsing/embedding 있을 때만 새로고침) -### Phase 4 — 다운로드 & 검색 (반나절) -4.1 `/api/kb/download/{docId}` — 원본 스트림, Content-Disposition -4.2 MCP `search_kb` — 다중 컬렉션 + uploaded_at 최신 가중치 + 태그 필터 -4.3 기존 `rag_query` 확장: `search_kb` 통합 옵션 +### Phase 4 — 다운로드 & 검색 (반나절) ✅ 완료 +4.1 ✅ /api/kb/download/{docId} — Content-Disposition + MIME (KbController.Download) +4.2 ✅ MCP `search_kb` — `_search_kb_collection`(태그 filter), `_recency_factor`(7d+10%/30d+5%/90d+2%), `_search_kb_raw`(다중 컬렉션 + since 후필터 + doc_id::locator dedup) +4.3 ✅ rag_query 확장 — `search_kb`, `kb_collections` 인자 + `_format_kb_results` 인용 직렬화 -### Phase 5 — 채팅 통합 (1~2일) -5.1 SSE 이벤트 추가: `tool_start`, `tool_result` (백엔드 `VllmChatStreamWithTools` 안) -5.2 프론트 채팅 메시지에 툴 실행 카드 렌더 (접이식) -5.3 모델 인용 자동 → 다운로드 링크 치환 -5.4 테이블/시계열 자동 렌더 (`{success, columns, data}` JSON 감지) -5.5 추천 질문 칩(welcome 화면) -5.6 system prompt 합성 로직 (plant_context.md + 도구 가이드 + 사용자 입력) +### Phase 5 — 채팅 통합 (1~2일) ✅ 완료 +5.1 ✅ SSE `tool_start`/`tool_result` 이벤트 — `EmitToolStart/EmitToolResult` 헬퍼, 공식 tool_calls 경로 + JSON-텍스트 폴백 경로 둘 다 발행 +5.2 ✅ 툴 실행 카드 (접이식, running/ok/err 색상, spin 애니메이션) +5.3 ✅ KB 인용 자동 링크 — `llmKbDocMap`에 search_kb hits title→docId 누적, `llmLinkKbCitations`로 본문 치환 +5.4 ✅ 표 자동 렌더 (`{success, columns, data}` 및 `data:[{...}]` 감지, 최대 50행) / 스파클라인은 보류 +5.5 ✅ 추천 질문 칩 7종 (활성 알람, Unit A 요약, FIC-6113 추이, 디지털 이벤트, 정비 이력, 주간 보고, find_tags) +5.6 ✅ Phase 0.3에서 이미 완료 -### Phase 6 — 보강 도구 (1일) +### Phase 6 — 보강 도구 (1일) ⏳ 미구현 6.1 MCP `query_events`, `summarize_events`, `active_alarms` (event_history_table 기반) 6.2 MCP `find_tags` — tag_metadata 시맨틱 검색 (별도 Qdrant 컬렉션 또는 KB와 통합) 6.3 MCP `generate_status_report` — 매크로 툴 6.4 `run_sql` LIMIT 자동 + `SET LOCAL statement_timeout = 10s` -### Phase 7 — 운영 보강 (옵션) +### Phase 7 — 운영 보강 (옵션) ⏳ 미구현 7.1 NL2SQL 의도 라우터 7.2 대화 요약/압축 (장기 세션) 7.3 에이전트 모드 (자율 멀티스텝 계획) diff --git a/plans/phase5-사용자점검리스트.md b/plans/phase5-사용자점검리스트.md new file mode 100644 index 0000000..e69de29 diff --git a/prompts/plant_context.md b/prompts/plant_context.md new file mode 100644 index 0000000..391ae32 --- /dev/null +++ b/prompts/plant_context.md @@ -0,0 +1,27 @@ +# 플랜트 운전 컨텍스트 + +> 본 파일은 LLM 채팅의 시스템 프롬프트에 자동 주입됩니다. +> 운영 환경에 맞춰 단위(Area / Unit), 계기 prefix, 태그 명명 규칙, 예시 질문 등을 채워주세요. + +## 단위(Area / Unit) + + + + +## 계기 명명 약어 + + + + +## 태그 명명 규칙 + + + + +## 시간대 + + + +## 예시 질문 / 의도 라우팅 + + diff --git a/src/Core/Domain/Entities/ExperionEntities.cs b/src/Core/Domain/Entities/ExperionEntities.cs index 85d5643..9793260 100644 --- a/src/Core/Domain/Entities/ExperionEntities.cs +++ b/src/Core/Domain/Entities/ExperionEntities.cs @@ -150,6 +150,84 @@ public class TagMetadata [Column("loaded_at")] public DateTime LoadedAt { get; set; } = DateTime.UtcNow; } +// ── Knowledge Base (RAG) ───────────────────────────────────────────────────── + +/// kb_collections — 컬렉션 레지스트리(seed: system_instrument 등 5종) +[Table("kb_collections")] +public class KbCollection +{ + [Key] + [Column("collection_key")] public string CollectionKey { get; set; } = string.Empty; + [Column("display_name")] public string DisplayName { get; set; } = string.Empty; + [Column("qdrant_name")] public string QdrantName { get; set; } = string.Empty; + [Column("chunking_policy", TypeName = "jsonb")] + public string ChunkingPolicy { get; set; } = "{}"; + [Column("description")] public string? Description { get; set; } + [Column("is_active")] public bool IsActive { get; set; } = true; + [Column("created_at")] public DateTime CreatedAt { get; set; } = DateTime.UtcNow; +} + +/// kb_documents — 업로드된 원본 문서 메타 +[Table("kb_documents")] +public class KbDocument +{ + [Key] + [Column("id")] public Guid Id { get; set; } = Guid.NewGuid(); + [Column("collection_key")] public string CollectionKey { get; set; } = string.Empty; + [Column("title")] public string Title { get; set; } = string.Empty; + [Column("original_path")] public string OriginalPath { get; set; } = string.Empty; + [Column("file_sha256")] public string FileSha256 { get; set; } = string.Empty; + [Column("file_size")] public long? FileSize { get; set; } + [Column("mime_type")] public string? MimeType { get; set; } + [Column("tags")] public string[] Tags { get; set; } = Array.Empty(); + [Column("status")] public string Status { get; set; } = "pending"; + // pending / parsing / embedding / indexed / failed / disabled + [Column("chunk_count")] public int ChunkCount { get; set; } + [Column("error_message")] public string? ErrorMessage { get; set; } + [Column("uploaded_by")] public string? UploadedBy { get; set; } + [Column("uploaded_at")] public DateTime UploadedAt { get; set; } = DateTime.UtcNow; + [Column("indexed_at")] public DateTime? IndexedAt { get; set; } + [Column("disabled_at")] public DateTime? DisabledAt { get; set; } +} + +/// kb_ingest_jobs — 비동기 처리 큐 +[Table("kb_ingest_jobs")] +public class KbIngestJob +{ + [Key] + [Column("id")] public long Id { get; set; } + [Column("doc_id")] public Guid DocId { get; set; } + [Column("stage")] public string Stage { get; set; } = "parse"; // parse / embed / index + [Column("attempts")] public int Attempts { get; set; } + [Column("last_error")] public string? LastError { get; set; } + [Column("enqueued_at")] public DateTime EnqueuedAt { get; set; } = DateTime.UtcNow; + [Column("started_at")] public DateTime? StartedAt { get; set; } + [Column("finished_at")] public DateTime? FinishedAt { get; set; } +} + +/// kb_admin_credential — 관리자 비밀번호(단일 행) +[Table("kb_admin_credential")] +public class KbAdminCredential +{ + [Key] + [Column("id")] public int Id { get; set; } = 1; + [Column("password_hash")] public string PasswordHash { get; set; } = string.Empty; + [Column("salt")] public string Salt { get; set; } = string.Empty; + [Column("algorithm")] public string Algorithm { get; set; } = "argon2id"; + [Column("updated_at")] public DateTime UpdatedAt { get; set; } = DateTime.UtcNow; +} + +/// kb_admin_sessions — 관리자 세션 토큰 +[Table("kb_admin_sessions")] +public class KbAdminSession +{ + [Key] + [Column("token")] public string Token { get; set; } = string.Empty; + [Column("issued_at")] public DateTime IssuedAt { get; set; } = DateTime.UtcNow; + [Column("expires_at")] public DateTime ExpiresAt { get; set; } + [Column("client_ip")] public string? ClientIp { get; set; } +} + /// event_history_table — 디지털 포인트 상태 변경 이벤트 [Table("event_history_table")] public class EventHistoryRecord diff --git a/src/Infrastructure/Database/ExperionDbContext.cs b/src/Infrastructure/Database/ExperionDbContext.cs index ce0c9ca..7433fca 100644 --- a/src/Infrastructure/Database/ExperionDbContext.cs +++ b/src/Infrastructure/Database/ExperionDbContext.cs @@ -30,6 +30,13 @@ public class ExperionDbContext : DbContext public DbSet PidGraphStatuses => Set(); public DbSet EventHistoryRecords => Set(); + // ── Knowledge Base ──────────────────────────────────────────────────────── + public DbSet KbCollections => Set(); + public DbSet KbDocuments => Set(); + public DbSet KbIngestJobs => Set(); + public DbSet KbAdminCredentials => Set(); + public DbSet KbAdminSessions => Set(); + protected override void OnModelCreating(ModelBuilder modelBuilder) { modelBuilder.Entity(e => @@ -180,6 +187,37 @@ public class ExperionDbContext : DbContext entity.HasIndex(e => new { e.Area, e.EventTime }); entity.HasIndex(e => new { e.EventType, e.EventTime }); }); + + // ── Knowledge Base ─────────────────────────────────────────────────── + modelBuilder.Entity(e => + { + e.HasKey(x => x.CollectionKey); + }); + + modelBuilder.Entity(e => + { + e.HasKey(x => x.Id); + e.Property(x => x.Tags).HasColumnType("text[]"); + e.HasIndex(x => new { x.CollectionKey, x.Status, x.UploadedAt }); + e.HasIndex(x => x.Title); + }); + + modelBuilder.Entity(e => + { + e.HasKey(x => x.Id); + e.HasIndex(x => new { x.Stage, x.FinishedAt }); + }); + + modelBuilder.Entity(e => + { + e.HasKey(x => x.Id); + }); + + modelBuilder.Entity(e => + { + e.HasKey(x => x.Token); + e.HasIndex(x => x.ExpiresAt); + }); } } @@ -381,6 +419,114 @@ public class ExperionDbService : IExperionDbService // 참고: 하이퍼테이블 생성 후 보존 정책, 압축 정책, 연속 집계 설정은 // CreateHypertableAsync() 메서드에서 선택적으로 설정 가능 + // ── Knowledge Base (RAG) 테이블 ────────────────────────────────── + await _ctx.Database.ExecuteSqlRawAsync( + "CREATE EXTENSION IF NOT EXISTS \"pgcrypto\""); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE TABLE IF NOT EXISTS kb_collections ( + collection_key TEXT PRIMARY KEY, + display_name TEXT NOT NULL, + qdrant_name TEXT NOT NULL UNIQUE, + chunking_policy JSONB NOT NULL DEFAULT '{}'::jsonb, + description TEXT, + is_active BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE TABLE IF NOT EXISTS kb_documents ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + collection_key TEXT NOT NULL REFERENCES kb_collections(collection_key), + title TEXT NOT NULL, + original_path TEXT NOT NULL, + file_sha256 TEXT NOT NULL, + file_size BIGINT, + mime_type TEXT, + tags TEXT[] NOT NULL DEFAULT '{}', + status TEXT NOT NULL DEFAULT 'pending', + chunk_count INTEGER NOT NULL DEFAULT 0, + error_message TEXT, + uploaded_by TEXT, + uploaded_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + indexed_at TIMESTAMPTZ, + disabled_at TIMESTAMPTZ + ) + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE INDEX IF NOT EXISTS idx_kb_docs_coll_status + ON kb_documents(collection_key, status, uploaded_at DESC); + CREATE INDEX IF NOT EXISTS idx_kb_docs_title + ON kb_documents(title); + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE TABLE IF NOT EXISTS kb_ingest_jobs ( + id BIGSERIAL PRIMARY KEY, + doc_id UUID NOT NULL REFERENCES kb_documents(id) ON DELETE CASCADE, + stage TEXT NOT NULL, + attempts INTEGER NOT NULL DEFAULT 0, + last_error TEXT, + enqueued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + started_at TIMESTAMPTZ, + finished_at TIMESTAMPTZ + ) + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE INDEX IF NOT EXISTS idx_kb_jobs_pending + ON kb_ingest_jobs(stage, finished_at) + WHERE finished_at IS NULL; + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE TABLE IF NOT EXISTS kb_admin_credential ( + id INTEGER PRIMARY KEY DEFAULT 1 CHECK (id = 1), + password_hash TEXT NOT NULL, + salt TEXT NOT NULL, + algorithm TEXT NOT NULL DEFAULT 'argon2id', + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE TABLE IF NOT EXISTS kb_admin_sessions ( + token TEXT PRIMARY KEY, + issued_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + expires_at TIMESTAMPTZ NOT NULL, + client_ip TEXT + ) + """); + + await _ctx.Database.ExecuteSqlRawAsync(""" + CREATE INDEX IF NOT EXISTS idx_kb_sessions_expires + ON kb_admin_sessions(expires_at); + """); + + // ── 시드: kb_collections 5종 ───────────────────────────────────── + await _ctx.Database.ExecuteSqlRawAsync(""" + INSERT INTO kb_collections (collection_key, display_name, qdrant_name, chunking_policy, description) + VALUES + ('system_instrument', '시스템 & 계기 정보', 'kb_system_instrument', + '{"pdf":"section+table","xlsx":"row+sheet","docx":"heading"}'::jsonb, + '계기 datasheet, P&ID, 사양서, 노드맵'), + ('plant_operation', '공장 운전 정보', 'kb_plant_operation', + '{"xlsx":"row","docx":"heading","md":"heading"}'::jsonb, + '재고, 생산현황, 고장이력, 교대일지'), + ('procedure', '절차서/SOP', 'kb_procedure', + '{"docx":"heading","md":"heading","pdf":"section"}'::jsonb, + 'SOP, 정비 절차, 알람 대응 매뉴얼'), + ('report', '보고서', 'kb_report', + '{"pdf":"section+table","docx":"heading"}'::jsonb, + '일/주/월 보고, 사고보고, 분석보고'), + ('vendor_doc', '벤더 자료', 'kb_vendor_doc', + '{"pdf":"section+table","docx":"heading"}'::jsonb, + '카탈로그, 매뉴얼, 인증서') + ON CONFLICT (collection_key) DO NOTHING + """); + _logger.LogInformation("[ExperionDb] 데이터베이스 초기화 완료 (TimeScaleDB 활성화)"); return true; } diff --git a/src/Infrastructure/Kb/KbAuthService.cs b/src/Infrastructure/Kb/KbAuthService.cs new file mode 100644 index 0000000..cbb2a32 --- /dev/null +++ b/src/Infrastructure/Kb/KbAuthService.cs @@ -0,0 +1,149 @@ +using ExperionCrawler.Core.Domain.Entities; +using ExperionCrawler.Infrastructure.Database; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; + +namespace ExperionCrawler.Infrastructure.Kb; + +public sealed record KbLoginResult(bool Success, string? Token, DateTime? ExpiresAt, string? Error); + +public interface IKbAuthService +{ + Task EnsureCredentialAsync(CancellationToken ct = default); + Task LoginAsync(string password, string? clientIp, CancellationToken ct = default); + Task ValidateAsync(string? token, CancellationToken ct = default); + Task LogoutAsync(string token, CancellationToken ct = default); + Task ChangePasswordAsync(string oldPassword, string newPassword, CancellationToken ct = default); +} + +public sealed class KbAuthService : IKbAuthService +{ + private readonly ExperionDbContext _db; + private readonly IConfiguration _config; + private readonly ILogger _logger; + + public KbAuthService(ExperionDbContext db, IConfiguration config, ILogger logger) + { + _db = db; + _config = config; + _logger = logger; + } + + private int SessionMinutes => + int.TryParse(_config["Kb:AdminSessionMinutes"], out var m) && m > 0 ? m : 60; + + public async Task EnsureCredentialAsync(CancellationToken ct = default) + { + var existing = await _db.KbAdminCredentials.FirstOrDefaultAsync(ct); + if (existing != null) return; + + var initial = _config["Kb:AdminInitialPassword"] + ?? Environment.GetEnvironmentVariable("KB_ADMIN_INITIAL_PASSWORD"); + + bool generated = false; + if (string.IsNullOrWhiteSpace(initial)) + { + initial = Guid.NewGuid().ToString("N")[..16]; + generated = true; + } + + var (hash, salt) = PasswordHasher.Hash(initial); + _db.KbAdminCredentials.Add(new KbAdminCredential + { + Id = 1, + PasswordHash = hash, + Salt = salt, + Algorithm = "argon2id", + UpdatedAt = DateTime.UtcNow + }); + await _db.SaveChangesAsync(ct); + + if (generated) + { + _logger.LogWarning("[Kb] 관리자 초기 비밀번호 자동 생성: {Pw} ← 즉시 변경하세요", initial); + } + else + { + _logger.LogInformation("[Kb] 관리자 초기 비밀번호 설정 (환경변수 사용)"); + } + } + + public async Task LoginAsync(string password, string? clientIp, CancellationToken ct = default) + { + var cred = await _db.KbAdminCredentials.FirstOrDefaultAsync(ct); + if (cred == null) return new(false, null, null, "credential not initialized"); + + if (!PasswordHasher.Verify(password, cred.PasswordHash, cred.Salt)) + return new(false, null, null, "invalid password"); + + var token = PasswordHasher.NewSessionToken(); + var expires = DateTime.UtcNow.AddMinutes(SessionMinutes); + _db.KbAdminSessions.Add(new KbAdminSession + { + Token = token, + IssuedAt = DateTime.UtcNow, + ExpiresAt = expires, + ClientIp = clientIp + }); + await _db.SaveChangesAsync(ct); + + await PurgeExpiredAsync(ct); + return new(true, token, expires, null); + } + + public async Task ValidateAsync(string? token, CancellationToken ct = default) + { + if (string.IsNullOrWhiteSpace(token)) return false; + var s = await _db.KbAdminSessions.FirstOrDefaultAsync(x => x.Token == token, ct); + if (s == null) return false; + if (s.ExpiresAt < DateTime.UtcNow) + { + _db.KbAdminSessions.Remove(s); + await _db.SaveChangesAsync(ct); + return false; + } + return true; + } + + public async Task LogoutAsync(string token, CancellationToken ct = default) + { + var s = await _db.KbAdminSessions.FirstOrDefaultAsync(x => x.Token == token, ct); + if (s != null) + { + _db.KbAdminSessions.Remove(s); + await _db.SaveChangesAsync(ct); + } + } + + public async Task ChangePasswordAsync(string oldPassword, string newPassword, CancellationToken ct = default) + { + var cred = await _db.KbAdminCredentials.FirstOrDefaultAsync(ct); + if (cred == null) return false; + if (!PasswordHasher.Verify(oldPassword, cred.PasswordHash, cred.Salt)) return false; + if (string.IsNullOrWhiteSpace(newPassword) || newPassword.Length < 6) return false; + + var (hash, salt) = PasswordHasher.Hash(newPassword); + cred.PasswordHash = hash; + cred.Salt = salt; + cred.Algorithm = "argon2id"; + cred.UpdatedAt = DateTime.UtcNow; + + var now = DateTime.UtcNow; + var sessions = await _db.KbAdminSessions.ToListAsync(ct); + _db.KbAdminSessions.RemoveRange(sessions); + await _db.SaveChangesAsync(ct); + return true; + } + + private async Task PurgeExpiredAsync(CancellationToken ct) + { + var now = DateTime.UtcNow; + var expired = await _db.KbAdminSessions.Where(s => s.ExpiresAt < now).ToListAsync(ct); + if (expired.Count > 0) + { + _db.KbAdminSessions.RemoveRange(expired); + await _db.SaveChangesAsync(ct); + } + } +} diff --git a/src/Infrastructure/Kb/KbEmbeddingClient.cs b/src/Infrastructure/Kb/KbEmbeddingClient.cs new file mode 100644 index 0000000..3cfd9f9 --- /dev/null +++ b/src/Infrastructure/Kb/KbEmbeddingClient.cs @@ -0,0 +1,49 @@ +using System.Net.Http.Json; +using System.Text.Json; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; + +namespace ExperionCrawler.Infrastructure.Kb; + +/// +/// Ollama nomic-embed-text(768-dim) 임베딩 클라이언트. /api/embeddings 직접 호출. +/// +public sealed class KbEmbeddingClient +{ + private readonly HttpClient _http; + private readonly string _model; + private readonly ILogger _logger; + + public KbEmbeddingClient(IHttpClientFactory factory, IConfiguration config, ILogger logger) + { + _http = factory.CreateClient("Ollama"); + _model = config["Kb:EmbeddingModel"] ?? "nomic-embed-text"; + _logger = logger; + } + + public async Task EmbedAsync(string text, CancellationToken ct = default) + { + try + { + var res = await _http.PostAsJsonAsync("/api/embeddings", new { model = _model, prompt = text }, ct); + if (!res.IsSuccessStatusCode) + { + _logger.LogWarning("[Kb][Embed] HTTP {Code}", (int)res.StatusCode); + return null; + } + using var doc = JsonDocument.Parse(await res.Content.ReadAsStringAsync(ct)); + if (!doc.RootElement.TryGetProperty("embedding", out var arr)) return null; + var len = arr.GetArrayLength(); + var vec = new float[len]; + int i = 0; + foreach (var e in arr.EnumerateArray()) + vec[i++] = (float)e.GetDouble(); + return vec; + } + catch (Exception ex) + { + _logger.LogWarning(ex, "[Kb][Embed] 실패"); + return null; + } + } +} diff --git a/src/Infrastructure/Kb/KbIngestWorker.cs b/src/Infrastructure/Kb/KbIngestWorker.cs new file mode 100644 index 0000000..6861798 --- /dev/null +++ b/src/Infrastructure/Kb/KbIngestWorker.cs @@ -0,0 +1,228 @@ +using System.Text.Json; +using ExperionCrawler.Core.Domain.Entities; +using ExperionCrawler.Infrastructure.Database; +using ExperionCrawler.Infrastructure.Mcp; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; + +namespace ExperionCrawler.Infrastructure.Kb; + +/// +/// kb_ingest_jobs 큐를 폴링하며 parse → embed → index 단계를 수행. +/// 한 잡(stage='parse')을 픽업해서 단일 트랜잭션 안에서 끝까지 진행한다. +/// +public sealed class KbIngestWorker : BackgroundService +{ + private readonly IServiceProvider _sp; + private readonly KbQdrantClient _qdrant; + private readonly KbEmbeddingClient _embed; + private readonly McpClient _mcp; + private readonly KbStorageService _storage; + private readonly ILogger _logger; + private readonly TimeSpan _pollInterval; + private readonly int _maxAttempts; + + public KbIngestWorker( + IServiceProvider sp, + KbQdrantClient qdrant, + KbEmbeddingClient embed, + McpClient mcp, + KbStorageService storage, + IConfiguration config, + ILogger logger) + { + _sp = sp; + _qdrant = qdrant; + _embed = embed; + _mcp = mcp; + _storage = storage; + _logger = logger; + + var sec = int.TryParse(config["Kb:WorkerPollIntervalSeconds"], out var s) && s > 0 ? s : 2; + _pollInterval = TimeSpan.FromSeconds(sec); + _maxAttempts = int.TryParse(config["Kb:MaxAttempts"], out var m) && m > 0 ? m : 3; + } + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + _logger.LogInformation("[Kb][Worker] 시작 (poll {Sec}s, maxAttempts {N})", _pollInterval.TotalSeconds, _maxAttempts); + while (!stoppingToken.IsCancellationRequested) + { + try + { + var processed = await ProcessOneAsync(stoppingToken); + if (!processed) + await Task.Delay(_pollInterval, stoppingToken); + } + catch (OperationCanceledException) { break; } + catch (Exception ex) + { + _logger.LogError(ex, "[Kb][Worker] 루프 오류"); + try { await Task.Delay(_pollInterval, stoppingToken); } catch { break; } + } + } + } + + private async Task ProcessOneAsync(CancellationToken ct) + { + using var scope = _sp.CreateScope(); + var db = scope.ServiceProvider.GetRequiredService(); + + var job = await db.KbIngestJobs + .Where(j => j.FinishedAt == null && j.Stage == "parse" && j.Attempts < _maxAttempts) + .OrderBy(j => j.EnqueuedAt) + .FirstOrDefaultAsync(ct); + + if (job == null) return false; + + job.StartedAt = DateTime.UtcNow; + job.Attempts++; + await db.SaveChangesAsync(ct); + + var doc = await db.KbDocuments.FirstOrDefaultAsync(d => d.Id == job.DocId, ct); + if (doc == null) + { + job.FinishedAt = DateTime.UtcNow; + job.LastError = "document not found"; + await db.SaveChangesAsync(ct); + return true; + } + + var coll = await db.KbCollections.FirstOrDefaultAsync(c => c.CollectionKey == doc.CollectionKey, ct); + if (coll == null) + { + await FailAsync(db, job, doc, "collection not found", ct); + return true; + } + + try + { + // 1) parse + doc.Status = "parsing"; + doc.ErrorMessage = null; + await db.SaveChangesAsync(ct); + + var abs = _storage.Resolve(doc.OriginalPath); + var chunks = await ParseAsync(doc, coll, abs, ct); + if (chunks == null || chunks.Count == 0) + throw new Exception("파싱 결과 청크 0건"); + + _logger.LogInformation("[Kb][Worker] {Id} parse: {N} chunks", doc.Id, chunks.Count); + + // 2) embed + doc.Status = "embedding"; + await db.SaveChangesAsync(ct); + + var points = new List(chunks.Count); + foreach (var c in chunks) + { + var vec = await _embed.EmbedAsync(c.Text, ct); + if (vec == null) throw new Exception("임베딩 실패(부분)"); + points.Add(new QdrantPoint + { + id = Guid.NewGuid(), + vector = vec, + payload = new Dictionary + { + ["doc_id"] = doc.Id.ToString(), + ["collection_key"] = doc.CollectionKey, + ["title"] = doc.Title, + ["text"] = c.Text, + ["chunk_kind"] = c.ChunkKind, + ["locator"] = c.Locator, + ["uploaded_at"] = doc.UploadedAt.ToString("O"), + ["tags"] = doc.Tags + } + }); + } + + // 3) index + doc.Status = "indexed"; // 낙관적 - 실패 시 catch에서 되돌림 + var ok = await _qdrant.UpsertAsync(coll.QdrantName, points, ct); + if (!ok) throw new Exception("Qdrant upsert 실패"); + + doc.ChunkCount = chunks.Count; + doc.IndexedAt = DateTime.UtcNow; + doc.ErrorMessage = null; + + job.FinishedAt = DateTime.UtcNow; + job.LastError = null; + await db.SaveChangesAsync(ct); + _logger.LogInformation("[Kb][Worker] {Id} indexed ({N} chunks)", doc.Id, chunks.Count); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "[Kb][Worker] {Id} 처리 실패 (attempt {A}/{M})", doc.Id, job.Attempts, _maxAttempts); + await FailAsync(db, job, doc, ex.Message, ct); + } + + return true; + } + + private async Task FailAsync(ExperionDbContext db, KbIngestJob job, KbDocument doc, string error, CancellationToken ct) + { + job.LastError = error; + if (job.Attempts >= _maxAttempts) + { + job.FinishedAt = DateTime.UtcNow; + doc.Status = "failed"; + doc.ErrorMessage = error; + } + else + { + // 재시도 대기 — finished_at NULL, attempts 누적 + } + await db.SaveChangesAsync(ct); + } + + private sealed record ParsedChunk(string Text, string ChunkKind, string Locator); + + private async Task?> ParseAsync(KbDocument doc, KbCollection coll, string absPath, CancellationToken ct) + { + var args = new Dictionary + { + ["doc_id"] = doc.Id.ToString(), + ["title"] = doc.Title, + ["file_path"] = absPath, + ["mime_type"] = doc.MimeType ?? "", + ["collection_key"] = doc.CollectionKey, + ["chunking_policy"] = coll.ChunkingPolicy + }; + + var raw = await _mcp.CallToolAsync("parse_document", args, ct); + if (string.IsNullOrWhiteSpace(raw)) + throw new Exception("MCP parse_document 응답 없음"); + + // FastMCP는 텍스트 컨텐츠를 단순 JSON 문자열로 돌려준다. + try + { + using var jdoc = JsonDocument.Parse(raw); + if (jdoc.RootElement.TryGetProperty("success", out var s) && s.ValueKind == JsonValueKind.False) + { + var err = jdoc.RootElement.TryGetProperty("error", out var e) ? e.GetString() : "unknown"; + throw new Exception($"MCP parse_document 실패: {err}"); + } + + if (!jdoc.RootElement.TryGetProperty("chunks", out var arr) || arr.ValueKind != JsonValueKind.Array) + return new List(); + + var list = new List(); + foreach (var c in arr.EnumerateArray()) + { + var text = c.TryGetProperty("text", out var t) ? (t.GetString() ?? "") : ""; + if (string.IsNullOrWhiteSpace(text)) continue; + var kind = c.TryGetProperty("chunk_kind", out var k) ? (k.GetString() ?? "section") : "section"; + var loc = c.TryGetProperty("locator", out var l) ? (l.GetString() ?? "") : ""; + list.Add(new ParsedChunk(text, kind, loc)); + } + return list; + } + catch (JsonException) + { + throw new Exception("MCP parse_document JSON 파싱 실패"); + } + } +} diff --git a/src/Infrastructure/Kb/KbQdrantClient.cs b/src/Infrastructure/Kb/KbQdrantClient.cs new file mode 100644 index 0000000..eaee129 --- /dev/null +++ b/src/Infrastructure/Kb/KbQdrantClient.cs @@ -0,0 +1,86 @@ +using System.Net; +using System.Net.Http.Json; +using System.Text; +using System.Text.Json; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.Logging; + +namespace ExperionCrawler.Infrastructure.Kb; + +/// +/// Qdrant HTTP API 래퍼. nomic-embed-text 임베딩이 768-dim 이므로 동일하게 가정. +/// +public sealed class KbQdrantClient +{ + private readonly HttpClient _http; + private readonly ILogger _logger; + private readonly int _vectorSize; + + public KbQdrantClient(IConfiguration config, ILogger logger) + { + var baseUrl = config["Kb:QdrantUrl"] ?? "http://localhost:6333"; + _http = new HttpClient { BaseAddress = new Uri(baseUrl), Timeout = TimeSpan.FromSeconds(30) }; + _vectorSize = int.TryParse(config["Kb:VectorSize"], out var v) ? v : 768; + _logger = logger; + } + + public async Task EnsureCollectionAsync(string name, CancellationToken ct = default) + { + var get = await _http.GetAsync($"/collections/{name}", ct); + if (get.StatusCode == HttpStatusCode.OK) return true; + if (get.StatusCode != HttpStatusCode.NotFound) + { + _logger.LogWarning("[Kb][Qdrant] 컬렉션 조회 실패 {Name} HTTP {Code}", name, (int)get.StatusCode); + } + + var body = new + { + vectors = new { size = _vectorSize, distance = "Cosine" } + }; + var res = await _http.PutAsJsonAsync($"/collections/{name}", body, ct); + if (res.IsSuccessStatusCode) + { + _logger.LogInformation("[Kb][Qdrant] 컬렉션 생성 {Name} (size={Size})", name, _vectorSize); + return true; + } + var err = await res.Content.ReadAsStringAsync(ct); + _logger.LogError("[Kb][Qdrant] 컬렉션 생성 실패 {Name} HTTP {Code}: {Err}", name, (int)res.StatusCode, err); + return false; + } + + public async Task DeleteByDocAsync(string collection, Guid docId, CancellationToken ct = default) + { + var body = new + { + filter = new + { + must = new[] + { + new { key = "doc_id", match = new { value = docId.ToString() } } + } + } + }; + var res = await _http.PostAsJsonAsync($"/collections/{collection}/points/delete?wait=true", body, ct); + return res.IsSuccessStatusCode; + } + + public async Task UpsertAsync(string collection, IEnumerable points, CancellationToken ct = default) + { + var body = new { points }; + var res = await _http.PutAsJsonAsync($"/collections/{collection}/points?wait=true", body, ct); + if (!res.IsSuccessStatusCode) + { + var err = await res.Content.ReadAsStringAsync(ct); + _logger.LogError("[Kb][Qdrant] upsert 실패 {Name} HTTP {Code}: {Err}", collection, (int)res.StatusCode, err); + return false; + } + return true; + } +} + +public sealed class QdrantPoint +{ + public Guid id { get; set; } + public float[] vector { get; set; } = Array.Empty(); + public Dictionary payload { get; set; } = new(); +} diff --git a/src/Infrastructure/Kb/KbStartupService.cs b/src/Infrastructure/Kb/KbStartupService.cs new file mode 100644 index 0000000..a785149 --- /dev/null +++ b/src/Infrastructure/Kb/KbStartupService.cs @@ -0,0 +1,48 @@ +using ExperionCrawler.Infrastructure.Database; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; + +namespace ExperionCrawler.Infrastructure.Kb; + +/// +/// 앱 기동 시 kb_collections.is_active=TRUE 인 컬렉션에 대해 Qdrant 컬렉션을 idempotent 생성. +/// +public sealed class KbStartupService : IHostedService +{ + private readonly IServiceProvider _sp; + private readonly KbQdrantClient _qdrant; + private readonly ILogger _logger; + + public KbStartupService(IServiceProvider sp, KbQdrantClient qdrant, ILogger logger) + { + _sp = sp; + _qdrant = qdrant; + _logger = logger; + } + + public async Task StartAsync(CancellationToken cancellationToken) + { + try + { + using var scope = _sp.CreateScope(); + var db = scope.ServiceProvider.GetRequiredService(); + var active = await db.KbCollections + .Where(c => c.IsActive) + .Select(c => c.QdrantName) + .ToListAsync(cancellationToken); + + foreach (var name in active) + await _qdrant.EnsureCollectionAsync(name, cancellationToken); + + _logger.LogInformation("[Kb] Qdrant 컬렉션 ensure 완료: {Count}건", active.Count); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "[Kb] Qdrant 컬렉션 ensure 실패 — 추후 첫 사용 시 재시도 필요"); + } + } + + public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; +} diff --git a/src/Infrastructure/Kb/KbStorageService.cs b/src/Infrastructure/Kb/KbStorageService.cs new file mode 100644 index 0000000..0d5b973 --- /dev/null +++ b/src/Infrastructure/Kb/KbStorageService.cs @@ -0,0 +1,73 @@ +using System.Security.Cryptography; +using Microsoft.Extensions.Configuration; + +namespace ExperionCrawler.Infrastructure.Kb; + +public sealed record KbStoredFile(string AbsolutePath, string RelativePath, string Sha256, long Size); + +public sealed class KbStorageService +{ + private readonly string _root; + + public KbStorageService(IConfiguration config) + { + var configured = config["Kb:StorageRoot"] ?? "../../storage/kb"; + _root = Path.IsPathRooted(configured) + ? configured + : Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), configured)); + Directory.CreateDirectory(_root); + } + + public string Root => _root; + + public async Task SaveAsync(Stream input, string originalFileName, CancellationToken ct = default) + { + var now = DateTime.UtcNow; + var monthDir = Path.Combine(_root, $"{now:yyyy-MM}"); + Directory.CreateDirectory(monthDir); + + var id = Guid.NewGuid(); + var ext = Path.GetExtension(originalFileName); + if (string.IsNullOrWhiteSpace(ext)) ext = ""; + var fileName = $"{id:N}{ext}"; + var abs = Path.Combine(monthDir, fileName); + + using var fs = new FileStream(abs, FileMode.CreateNew, FileAccess.Write, FileShare.None, 64 * 1024, true); + using var sha = SHA256.Create(); + var buffer = new byte[64 * 1024]; + long total = 0; + int read; + while ((read = await input.ReadAsync(buffer.AsMemory(0, buffer.Length), ct)) > 0) + { + sha.TransformBlock(buffer, 0, read, null, 0); + await fs.WriteAsync(buffer.AsMemory(0, read), ct); + total += read; + } + sha.TransformFinalBlock(Array.Empty(), 0, 0); + var hashHex = Convert.ToHexString(sha.Hash!).ToLowerInvariant(); + + var rel = Path.Combine($"{now:yyyy-MM}", fileName).Replace('\\', '/'); + return new KbStoredFile(abs, rel, hashHex, total); + } + + public string Resolve(string relativePath) + { + if (Path.IsPathRooted(relativePath)) return relativePath; + return Path.Combine(_root, relativePath); + } + + public bool Delete(string relativePath) + { + try + { + var p = Resolve(relativePath); + if (File.Exists(p)) + { + File.Delete(p); + return true; + } + } + catch { } + return false; + } +} diff --git a/src/Infrastructure/Kb/PasswordHasher.cs b/src/Infrastructure/Kb/PasswordHasher.cs new file mode 100644 index 0000000..4779576 --- /dev/null +++ b/src/Infrastructure/Kb/PasswordHasher.cs @@ -0,0 +1,55 @@ +using System.Security.Cryptography; +using System.Text; +using Konscious.Security.Cryptography; + +namespace ExperionCrawler.Infrastructure.Kb; + +public static class PasswordHasher +{ + private const int SaltBytes = 16; + private const int HashBytes = 32; + private const int DegreeOfParallelism = 4; + private const int MemoryKb = 65536; + private const int Iterations = 3; + + public static (string HashB64, string SaltB64) Hash(string password) + { + var salt = RandomNumberGenerator.GetBytes(SaltBytes); + return (HashWithSalt(password, salt), Convert.ToBase64String(salt)); + } + + public static bool Verify(string password, string hashB64, string saltB64) + { + try + { + var salt = Convert.FromBase64String(saltB64); + var expected = Convert.FromBase64String(hashB64); + var actualB64 = HashWithSalt(password, salt); + var actual = Convert.FromBase64String(actualB64); + return CryptographicOperations.FixedTimeEquals(actual, expected); + } + catch + { + return false; + } + } + + private static string HashWithSalt(string password, byte[] salt) + { + using var argon = new Argon2id(Encoding.UTF8.GetBytes(password)) + { + Salt = salt, + DegreeOfParallelism = DegreeOfParallelism, + MemorySize = MemoryKb, + Iterations = Iterations + }; + return Convert.ToBase64String(argon.GetBytes(HashBytes)); + } + + public static string NewSessionToken() + { + Span buf = stackalloc byte[32]; + RandomNumberGenerator.Fill(buf); + return Convert.ToHexString(buf).ToLowerInvariant(); + } +} diff --git a/src/Web/Controllers/KbAuthController.cs b/src/Web/Controllers/KbAuthController.cs new file mode 100644 index 0000000..cc9a651 --- /dev/null +++ b/src/Web/Controllers/KbAuthController.cs @@ -0,0 +1,68 @@ +using ExperionCrawler.Infrastructure.Kb; +using Microsoft.AspNetCore.Mvc; + +namespace ExperionCrawler.Web.Controllers; + +[ApiController] +[Route("api/kb/auth")] +public class KbAuthController : ControllerBase +{ + private readonly IKbAuthService _auth; + private readonly ILogger _logger; + + public KbAuthController(IKbAuthService auth, ILogger logger) + { + _auth = auth; + _logger = logger; + } + + public sealed record LoginRequest(string Password); + public sealed record ChangePasswordRequest(string OldPassword, string NewPassword); + + [HttpPost("login")] + public async Task Login([FromBody] LoginRequest req, CancellationToken ct) + { + if (req == null || string.IsNullOrWhiteSpace(req.Password)) + return Ok(new { success = false, error = "password is required" }); + + var ip = HttpContext.Connection.RemoteIpAddress?.ToString(); + var result = await _auth.LoginAsync(req.Password, ip, ct); + if (!result.Success) + return Ok(new { success = false, error = result.Error ?? "login failed" }); + + return Ok(new { success = true, token = result.Token, expiresAt = result.ExpiresAt }); + } + + [HttpPost("logout")] + public async Task Logout(CancellationToken ct) + { + var token = Request.Headers["X-Kb-Token"].ToString(); + if (!string.IsNullOrEmpty(token)) + await _auth.LogoutAsync(token, ct); + return Ok(new { success = true }); + } + + [HttpGet("status")] + public async Task Status(CancellationToken ct) + { + var token = Request.Headers["X-Kb-Token"].ToString(); + var valid = await _auth.ValidateAsync(token, ct); + return Ok(new { valid }); + } + + [HttpPost("change-password")] + public async Task ChangePassword([FromBody] ChangePasswordRequest req, CancellationToken ct) + { + var token = Request.Headers["X-Kb-Token"].ToString(); + if (!await _auth.ValidateAsync(token, ct)) + return Unauthorized(new { success = false, error = "invalid token" }); + + if (req == null || string.IsNullOrWhiteSpace(req.OldPassword) || string.IsNullOrWhiteSpace(req.NewPassword)) + return Ok(new { success = false, error = "passwords required" }); + if (req.NewPassword.Length < 6) + return Ok(new { success = false, error = "new password must be at least 6 chars" }); + + var ok = await _auth.ChangePasswordAsync(req.OldPassword, req.NewPassword, ct); + return Ok(new { success = ok }); + } +} diff --git a/src/Web/Controllers/KbController.cs b/src/Web/Controllers/KbController.cs new file mode 100644 index 0000000..1805430 --- /dev/null +++ b/src/Web/Controllers/KbController.cs @@ -0,0 +1,337 @@ +using ExperionCrawler.Core.Domain.Entities; +using ExperionCrawler.Infrastructure.Database; +using ExperionCrawler.Infrastructure.Kb; +using Microsoft.AspNetCore.Mvc; +using Microsoft.EntityFrameworkCore; + +namespace ExperionCrawler.Web.Controllers; + +[ApiController] +[Route("api/kb")] +public class KbController : ControllerBase +{ + private readonly ExperionDbContext _db; + private readonly KbStorageService _storage; + private readonly KbQdrantClient _qdrant; + private readonly IKbAuthService _auth; + private readonly ILogger _logger; + + public KbController( + ExperionDbContext db, + KbStorageService storage, + KbQdrantClient qdrant, + IKbAuthService auth, + ILogger logger) + { + _db = db; + _storage = storage; + _qdrant = qdrant; + _auth = auth; + _logger = logger; + } + + private async Task RequireAdminAsync(CancellationToken ct) + { + var token = Request.Headers["X-Kb-Token"].ToString(); + return await _auth.ValidateAsync(token, ct); + } + + [HttpGet("collections")] + public async Task GetCollections(CancellationToken ct) + { + var items = await _db.KbCollections + .Where(c => c.IsActive) + .OrderBy(c => c.CollectionKey) + .Select(c => new + { + key = c.CollectionKey, + name = c.DisplayName, + qdrant = c.QdrantName, + description = c.Description + }) + .ToListAsync(ct); + + var docCounts = await _db.KbDocuments + .Where(d => d.Status != "disabled") + .GroupBy(d => d.CollectionKey) + .Select(g => new { Key = g.Key, Count = g.Count(), Chunks = g.Sum(x => x.ChunkCount) }) + .ToListAsync(ct); + + var byKey = docCounts.ToDictionary(x => x.Key, x => (x.Count, x.Chunks)); + var result = items.Select(c => + { + byKey.TryGetValue(c.key, out var counts); + return new { c.key, c.name, c.qdrant, c.description, docCount = counts.Count, chunkCount = counts.Chunks }; + }); + return Ok(new { success = true, items = result }); + } + + [HttpPost("upload")] + [RequestSizeLimit(500_000_000)] + public async Task Upload( + [FromForm] IFormFile file, + [FromForm] string collectionKey, + [FromForm] string? title, + [FromForm] string? tags, + CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + + if (file == null || file.Length == 0) + return BadRequest(new { success = false, error = "file required" }); + if (string.IsNullOrWhiteSpace(collectionKey)) + return BadRequest(new { success = false, error = "collectionKey required" }); + + var coll = await _db.KbCollections.FirstOrDefaultAsync(c => c.CollectionKey == collectionKey, ct); + if (coll == null) return BadRequest(new { success = false, error = "unknown collectionKey" }); + + await using var stream = file.OpenReadStream(); + var stored = await _storage.SaveAsync(stream, file.FileName, ct); + + var tagArr = (tags ?? "") + .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries) + .Where(t => t.Length > 0).ToArray(); + + var doc = new KbDocument + { + Id = Guid.NewGuid(), + CollectionKey = collectionKey, + Title = string.IsNullOrWhiteSpace(title) ? Path.GetFileNameWithoutExtension(file.FileName) : title.Trim(), + OriginalPath = stored.RelativePath, + FileSha256 = stored.Sha256, + FileSize = stored.Size, + MimeType = file.ContentType, + Tags = tagArr, + Status = "pending", + ChunkCount = 0, + UploadedAt = DateTime.UtcNow, + UploadedBy = HttpContext.Connection.RemoteIpAddress?.ToString() + }; + _db.KbDocuments.Add(doc); + + _db.KbIngestJobs.Add(new KbIngestJob + { + DocId = doc.Id, + Stage = "parse", + EnqueuedAt = DateTime.UtcNow + }); + await _db.SaveChangesAsync(ct); + + _logger.LogInformation("[Kb] 업로드 {Id} {Title} ({Size} bytes)", doc.Id, doc.Title, doc.FileSize); + return Ok(new { success = true, docId = doc.Id, status = doc.Status }); + } + + [HttpGet("documents")] + public async Task GetDocuments( + [FromQuery] string? collection, + [FromQuery] string? status, + [FromQuery] string? q, + [FromQuery] int page = 0, + [FromQuery] int pageSize = 50, + CancellationToken ct = default) + { + page = Math.Max(0, page); + pageSize = Math.Clamp(pageSize, 1, 200); + + var query = _db.KbDocuments.AsNoTracking().AsQueryable(); + if (!string.IsNullOrWhiteSpace(collection)) query = query.Where(d => d.CollectionKey == collection); + if (!string.IsNullOrWhiteSpace(status)) query = query.Where(d => d.Status == status); + if (!string.IsNullOrWhiteSpace(q)) + { + var like = $"%{q}%"; + query = query.Where(d => EF.Functions.ILike(d.Title, like)); + } + + var total = await query.CountAsync(ct); + var items = await query + .OrderByDescending(d => d.UploadedAt) + .Skip(page * pageSize) + .Take(pageSize) + .Select(d => new + { + id = d.Id, + title = d.Title, + collection = d.CollectionKey, + tags = d.Tags, + status = d.Status, + chunkCount = d.ChunkCount, + fileSize = d.FileSize, + uploadedAt = d.UploadedAt, + indexedAt = d.IndexedAt, + errorMessage = d.ErrorMessage + }) + .ToListAsync(ct); + return Ok(new { success = true, total, page, pageSize, items }); + } + + [HttpGet("documents/{id:guid}")] + public async Task GetDocument(Guid id, CancellationToken ct) + { + var d = await _db.KbDocuments.AsNoTracking().FirstOrDefaultAsync(x => x.Id == id, ct); + if (d == null) return NotFound(new { success = false }); + return Ok(new + { + success = true, + item = new + { + id = d.Id, + title = d.Title, + collection = d.CollectionKey, + tags = d.Tags, + status = d.Status, + chunkCount = d.ChunkCount, + fileSize = d.FileSize, + mimeType = d.MimeType, + uploadedAt = d.UploadedAt, + indexedAt = d.IndexedAt, + disabledAt = d.DisabledAt, + originalPath = d.OriginalPath, + fileSha256 = d.FileSha256, + errorMessage = d.ErrorMessage + } + }); + } + + [HttpGet("jobs")] + public async Task GetJobs( + [FromQuery] Guid? docId, + [FromQuery] string? stage, + [FromQuery] bool pendingOnly = false, + CancellationToken ct = default) + { + var q = _db.KbIngestJobs.AsNoTracking().AsQueryable(); + if (docId.HasValue) q = q.Where(j => j.DocId == docId.Value); + if (!string.IsNullOrWhiteSpace(stage)) q = q.Where(j => j.Stage == stage); + if (pendingOnly) q = q.Where(j => j.FinishedAt == null); + + var items = await q.OrderByDescending(j => j.EnqueuedAt).Take(200).Select(j => new + { + id = j.Id, + docId = j.DocId, + stage = j.Stage, + attempts = j.Attempts, + lastError = j.LastError, + enqueuedAt = j.EnqueuedAt, + startedAt = j.StartedAt, + finishedAt = j.FinishedAt + }).ToListAsync(ct); + return Ok(new { success = true, items }); + } + + [HttpGet("download/{id:guid}")] + public async Task Download(Guid id, CancellationToken ct) + { + var d = await _db.KbDocuments.AsNoTracking().FirstOrDefaultAsync(x => x.Id == id, ct); + if (d == null) return NotFound(); + var abs = _storage.Resolve(d.OriginalPath); + if (!System.IO.File.Exists(abs)) return NotFound(); + + var stream = new FileStream(abs, FileMode.Open, FileAccess.Read, FileShare.Read, 64 * 1024, true); + var ext = Path.GetExtension(abs); + var fileName = string.IsNullOrEmpty(ext) ? d.Title : d.Title + ext; + return File(stream, d.MimeType ?? "application/octet-stream", fileName); + } + + [HttpDelete("documents/{id:guid}")] + public async Task Delete(Guid id, CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + + var d = await _db.KbDocuments.FirstOrDefaultAsync(x => x.Id == id, ct); + if (d == null) return NotFound(new { success = false }); + + var coll = await _db.KbCollections.FirstOrDefaultAsync(c => c.CollectionKey == d.CollectionKey, ct); + if (coll != null) + await _qdrant.DeleteByDocAsync(coll.QdrantName, d.Id, ct); + + _storage.Delete(d.OriginalPath); + + _db.KbDocuments.Remove(d); + await _db.SaveChangesAsync(ct); + return Ok(new { success = true }); + } + + [HttpPost("documents/{id:guid}/reindex")] + public async Task Reindex(Guid id, CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + + var d = await _db.KbDocuments.FirstOrDefaultAsync(x => x.Id == id, ct); + if (d == null) return NotFound(new { success = false }); + + var coll = await _db.KbCollections.FirstOrDefaultAsync(c => c.CollectionKey == d.CollectionKey, ct); + if (coll != null) + await _qdrant.DeleteByDocAsync(coll.QdrantName, d.Id, ct); + + d.Status = "pending"; + d.ChunkCount = 0; + d.ErrorMessage = null; + d.IndexedAt = null; + + _db.KbIngestJobs.Add(new KbIngestJob { DocId = d.Id, Stage = "parse" }); + await _db.SaveChangesAsync(ct); + return Ok(new { success = true }); + } + + [HttpPost("documents/{id:guid}/disable")] + public async Task Disable(Guid id, CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + + var d = await _db.KbDocuments.FirstOrDefaultAsync(x => x.Id == id, ct); + if (d == null) return NotFound(new { success = false }); + d.Status = "disabled"; + d.DisabledAt = DateTime.UtcNow; + await _db.SaveChangesAsync(ct); + return Ok(new { success = true }); + } + + public sealed record BulkDisableRequest(string Title); + + [HttpPost("documents/bulk-disable")] + public async Task BulkDisable([FromBody] BulkDisableRequest req, CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + if (req == null || string.IsNullOrWhiteSpace(req.Title)) + return BadRequest(new { success = false, error = "title required" }); + + var rows = await _db.KbDocuments + .Where(d => d.Title == req.Title && d.Status != "disabled") + .ExecuteUpdateAsync(set => set + .SetProperty(x => x.Status, _ => "disabled") + .SetProperty(x => x.DisabledAt, _ => DateTime.UtcNow), ct); + return Ok(new { success = true, affected = rows }); + } + + public sealed record PurgeRequest(int? OlderThanDays); + + [HttpPost("documents/purge-disabled")] + public async Task PurgeDisabled([FromBody] PurgeRequest req, CancellationToken ct) + { + if (!await RequireAdminAsync(ct)) + return Unauthorized(new { success = false, error = "unauthorized" }); + + var cutoff = req?.OlderThanDays is int days && days > 0 + ? DateTime.UtcNow.AddDays(-days) + : (DateTime?)null; + + var query = _db.KbDocuments.Where(d => d.Status == "disabled"); + if (cutoff.HasValue) query = query.Where(d => d.DisabledAt != null && d.DisabledAt < cutoff); + + var victims = await query.ToListAsync(ct); + foreach (var d in victims) + { + var coll = await _db.KbCollections.FirstOrDefaultAsync(c => c.CollectionKey == d.CollectionKey, ct); + if (coll != null) await _qdrant.DeleteByDocAsync(coll.QdrantName, d.Id, ct); + _storage.Delete(d.OriginalPath); + } + _db.KbDocuments.RemoveRange(victims); + await _db.SaveChangesAsync(ct); + return Ok(new { success = true, deleted = victims.Count }); + } +} diff --git a/src/Web/Controllers/OllamaController.cs b/src/Web/Controllers/OllamaController.cs index 314ff83..cb998d6 100644 --- a/src/Web/Controllers/OllamaController.cs +++ b/src/Web/Controllers/OllamaController.cs @@ -62,6 +62,107 @@ public class OllamaController : ControllerBase return new OllamaConfig(); } + string PlantContextPath + { + get + { + var dir = _config["PromptsDirectory"] ?? "../../prompts"; + if (!Path.IsPathRooted(dir)) + dir = Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), dir)); + return Path.Combine(dir, "plant_context.md"); + } + } + + string LoadPlantContext() + { + try + { + var p = PlantContextPath; + if (System.IO.File.Exists(p)) + return System.IO.File.ReadAllText(p).Trim(); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "[OllamaController] plant_context.md 로드 실패"); + } + return ""; + } + + private const string BaseSystemPromptKo = + "당신은 ExperionCrawler 시스템의 운전 보조 AI입니다.\n" + + "한국어로 정확하고 간결하게 답변합니다. 추측하지 말고, 확실치 않으면 도구로 확인합니다.\n" + + "수치/시각은 그대로 인용하고, 표/시계열 결과는 가능한 한 표 형식으로 정리해 보여줍니다."; + + private const string ToolGuideKo = + "\n\n## 사용 가능한 MCP 도구\n" + + "- run_sql: PostgreSQL SELECT 실행 (LIMIT 권장)\n" + + "- query_pv_history: 태그 PV 이력 조회 (history_table, recorded_at)\n" + + "- get_tag_metadata: 태그명 패턴 매칭 검색 (realtime_table)\n" + + "- list_drawings: P&ID 도면 목록 (node_map_master)\n" + + "- query_with_nl: 자연어 → SQL 변환 후 실행\n" + + "사용자가 태그 값/이력/DB 정보를 물으면 알맞은 도구를 function calling으로 호출하세요.\n" + + "도구 결과의 JSON은 그대로 노출하지 말고, 사람이 읽기 쉬운 표/요약으로 변환합니다.\n" + + "DB 시계열 컬럼은 history_table.recorded_at 이며, time_bucket() 대신 date_trunc 또는 to_timestamp(FLOOR(EPOCH/N*60)*N*60) 공식을 사용합니다."; + + private async Task EmitToolStart(string toolCallId, string name, string argsJson) + { + try + { + object argsObj; + try { argsObj = JsonSerializer.Deserialize(argsJson) ?? new { }; } + catch { argsObj = argsJson; } + var data = JsonSerializer.Serialize(new { id = toolCallId, name, args = argsObj }); + await Response.WriteAsync($"event: tool_start\ndata: {data}\n\n"); + await Response.Body.FlushAsync(); + } + catch { } + } + + private async Task EmitToolResult(string toolCallId, string name, bool ok, string payload) + { + try + { + const int previewMax = 600; + var preview = payload.Length > previewMax ? payload.Substring(0, previewMax) + "…" : payload; + var data = JsonSerializer.Serialize(new + { + id = toolCallId, + name, + ok, + preview, + length = payload.Length, + payload // 전체 JSON 그대로(테이블/시계열 자동 렌더에 사용) + }); + await Response.WriteAsync($"event: tool_result\ndata: {data}\n\n"); + await Response.Body.FlushAsync(); + } + catch { } + } + + string ComposeSystemPrompt(string? userPrompt, bool toolsEnabled) + { + var sb = new StringBuilder(); + sb.Append(BaseSystemPromptKo); + + var plant = LoadPlantContext(); + if (!string.IsNullOrWhiteSpace(plant)) + { + sb.Append("\n\n## 플랜트 컨텍스트\n"); + sb.Append(plant); + } + + if (toolsEnabled) + sb.Append(ToolGuideKo); + + if (!string.IsNullOrWhiteSpace(userPrompt)) + { + sb.Append("\n\n## 사용자 추가 지침\n"); + sb.Append(userPrompt.Trim()); + } + + return sb.ToString(); + } + [HttpGet("models")] public async Task GetModels() { @@ -102,7 +203,7 @@ public class OllamaController : ControllerBase { model = req.Model, messages = req.Messages, - system = req.SystemPrompt, + system = ComposeSystemPrompt(req.SystemPrompt, toolsEnabled: false), stream = false }; var content = new StringContent( @@ -147,7 +248,7 @@ public class OllamaController : ControllerBase { model = req.Model, messages = req.Messages, - system = req.SystemPrompt, + system = ComposeSystemPrompt(req.SystemPrompt, toolsEnabled: false), stream = true }; var content = new StringContent( @@ -324,8 +425,9 @@ public class OllamaController : ControllerBase { var model = req.Model; var msgList = new List(); - if (req.SystemPrompt != null) - msgList.Add(new { role = "system", content = req.SystemPrompt }); + var sysPrompt = ComposeSystemPrompt(req.SystemPrompt, toolsEnabled: false); + if (!string.IsNullOrEmpty(sysPrompt)) + msgList.Add(new { role = "system", content = sysPrompt }); foreach (var m in req.Messages) msgList.Add(m); var payload = new @@ -400,8 +502,9 @@ public class OllamaController : ControllerBase private async Task VllmChatStreamSimple(OllamaChatRequest req) { var msgList = new List(); - if (req.SystemPrompt != null) - msgList.Add(new { role = "system", content = req.SystemPrompt }); + var sysPrompt = ComposeSystemPrompt(req.SystemPrompt, toolsEnabled: false); + if (!string.IsNullOrEmpty(sysPrompt)) + msgList.Add(new { role = "system", content = sysPrompt }); foreach (var m in req.Messages) msgList.Add(m); var payload = new @@ -469,8 +572,9 @@ public class OllamaController : ControllerBase private async Task VllmChatStreamWithTools(OllamaChatRequest req) { var messages = new List(); - if (req.SystemPrompt != null) - messages.Add(new { role = "system", content = req.SystemPrompt }); + var sysPrompt = ComposeSystemPrompt(req.SystemPrompt, toolsEnabled: true); + if (!string.IsNullOrEmpty(sysPrompt)) + messages.Add(new { role = "system", content = sysPrompt }); foreach (var m in req.Messages) messages.Add(m); @@ -562,6 +666,8 @@ public class OllamaController : ControllerBase var funcName = func?.GetType().GetProperty("name")?.GetValue(func) as string ?? ""; var funcArgs = func?.GetType().GetProperty("arguments")?.GetValue(func) as string ?? "{}"; + await EmitToolStart(tcId, funcName, funcArgs); + try { var args = JsonSerializer.Deserialize>(funcArgs) @@ -569,6 +675,8 @@ public class OllamaController : ControllerBase var toolResult = await _mcpClient.CallToolAsync(funcName, args, HttpContext.RequestAborted); + await EmitToolResult(tcId, funcName, ok: true, payload: toolResult); + messages.Add(new { role = "tool", @@ -578,6 +686,7 @@ public class OllamaController : ControllerBase } catch (Exception ex) { + await EmitToolResult(tcId, funcName, ok: false, payload: ex.Message); messages.Add(new { role = "tool", @@ -666,15 +775,20 @@ public class OllamaController : ControllerBase if (detectedTool != null && args.Count > 0) { + var pseudoId = $"jsontc_{toolRound}_{Guid.NewGuid():N}"; + var argsJson = JsonSerializer.Serialize(args); + await EmitToolStart(pseudoId, detectedTool, argsJson); try { var toolResult = await _mcpClient.CallToolAsync(detectedTool, args, HttpContext.RequestAborted); + await EmitToolResult(pseudoId, detectedTool, ok: true, payload: toolResult); messages.Add(new { role = "assistant", content = stopContent }); messages.Add(new { role = "user", content = $"[{detectedTool} 실행 결과]\n{toolResult}\n\n위 결과를 바탕으로 사용자의 질문에 자연어로 답변해주세요." }); continue; } catch (Exception ex) { + await EmitToolResult(pseudoId, detectedTool, ok: false, payload: ex.Message); _logger.LogWarning(ex, "[OllamaController] 텍스트 형식 도구 호출 실패: {Tool}", detectedTool); } } diff --git a/src/Web/ExperionCrawler.csproj b/src/Web/ExperionCrawler.csproj index 7a6ad82..8745131 100644 --- a/src/Web/ExperionCrawler.csproj +++ b/src/Web/ExperionCrawler.csproj @@ -33,6 +33,8 @@ + + diff --git a/src/Web/Program.cs b/src/Web/Program.cs index 7324dc4..6d29f26 100644 --- a/src/Web/Program.cs +++ b/src/Web/Program.cs @@ -3,6 +3,7 @@ using ExperionCrawler.Core.Application.Services; using ExperionCrawler.Infrastructure.Certificates; using ExperionCrawler.Infrastructure.Csv; using ExperionCrawler.Infrastructure.Database; +using ExperionCrawler.Infrastructure.Kb; using ExperionCrawler.Infrastructure.Mcp; using ExperionCrawler.Infrastructure.OpcUa; using ExperionCrawler.Web; @@ -119,6 +120,14 @@ builder.Services.AddSingleton(); +// ── Knowledge Base (RAG) ────────────────────────────────────────────────────── +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddScoped(); +builder.Services.AddHostedService(); +builder.Services.AddHostedService(); + // ── Ollama HttpClient ───────────────────────────────────────────────────────── builder.Services.AddHttpClient("Ollama", c => { @@ -149,6 +158,17 @@ try { var db = scope.ServiceProvider.GetRequiredService(); await db.InitializeAsync(); + + try + { + var kbAuth = scope.ServiceProvider.GetRequiredService(); + await kbAuth.EnsureCredentialAsync(); + } + catch (Exception kbEx) + { + var lg = app.Services.GetRequiredService>(); + lg.LogWarning(kbEx, "[Kb] 관리자 비밀번호 초기화 실패"); + } } } catch (Exception ex) diff --git a/src/Web/appsettings.json b/src/Web/appsettings.json index 9d5cb4e..4670f9d 100644 --- a/src/Web/appsettings.json +++ b/src/Web/appsettings.json @@ -38,6 +38,15 @@ "McpServer": { "WorkingDirectory": "../../mcp-server" }, + "PromptsDirectory": "../../prompts", + "Kb": { + "QdrantUrl": "http://localhost:6333", + "VectorSize": 768, + "StorageRoot": "../../storage/kb", + "AdminSessionMinutes": 60, + "WorkerPollIntervalSeconds": 2, + "MaxAttempts": 3 + }, "Kestrel": { "Endpoints": { "Http": { diff --git a/src/Web/wwwroot/css/style.css b/src/Web/wwwroot/css/style.css index ecdf44e..f2d2de4 100644 --- a/src/Web/wwwroot/css/style.css +++ b/src/Web/wwwroot/css/style.css @@ -1897,3 +1897,174 @@ tr:last-child td { border-bottom: none; } .llm-sidebar { display: none; } .llm-layout { height: calc(100vh - var(--sw) - 120px); } } + +/* ═══════════════════════════════════════════════════════ + 14 RAG 관리 (KB Admin) + ══════════════════════════════════════════════════════ */ +.kb-login-card { max-width: 460px; } +.kb-main { display: flex; flex-direction: column; gap: 12px; } +.kb-topbar { + display: flex; justify-content: space-between; align-items: center; + padding: 8px 12px; background: var(--s1); border: 1px solid var(--bd1); + border-radius: var(--rm); +} +.kb-session { display: flex; align-items: center; gap: 10px; font-size: 12px; color: var(--t1); } +.kb-actions { display: flex; gap: 8px; } +.kb-filters { padding: 12px; } +.kb-msg { font-size: 12px; color: var(--t2); margin-left: 8px; } +.kb-stats { font-size: 12px; color: var(--t2); padding: 4px 6px; } + +.kb-doc-tbl { width: 100%; font-size: 12px; } +.kb-doc-tbl th, .kb-doc-tbl td { padding: 8px 10px; border-bottom: 1px solid var(--bd1); } +.kb-doc-tbl th { background: var(--s1); text-align: left; font-weight: 600; color: var(--t1); } +.kb-doc-tbl td.mono { font-family: var(--ffm); font-size: 11px; color: var(--t2); } + +.kb-tag { + display: inline-block; padding: 1px 6px; margin: 0 2px 2px 0; + background: var(--s2); border: 1px solid var(--bd1); border-radius: 8px; + font-size: 11px; color: var(--t1); +} + +.kb-status { + display: inline-block; padding: 2px 8px; border-radius: 4px; + font-size: 11px; font-weight: 600; text-transform: uppercase; +} +.kb-st-pending { background: #3a3a55; color: #aab2d4; } +.kb-st-parsing { background: #4a4a1a; color: #f3d76b; } +.kb-st-embedding { background: #4a4a1a; color: #f3d76b; } +.kb-st-indexed { background: #1f4a2a; color: #6bd58b; } +.kb-st-failed { background: #5a1f1f; color: #f37070; } +.kb-st-disabled { background: #303032; color: #888; } + +.kb-err { + font-size: 11px; color: #f37070; margin-top: 2px; + max-width: 220px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; +} + +/* 모달 */ +.kb-modal { + position: fixed; inset: 0; z-index: 950; + background: rgba(0,0,0,.55); + display: flex; align-items: center; justify-content: center; +} +.kb-modal.hidden { display: none; } +.kb-modal-body { + background: var(--s2); border: 1px solid var(--bd2); + border-radius: var(--rl); padding: 22px; + width: 460px; max-width: 92vw; max-height: 90vh; overflow-y: auto; +} +.kb-modal-title { font-weight: 700; font-size: 15px; margin-bottom: 14px; color: var(--t0); } + +/* ═══════════════════════════════════════════════════════ + Phase 5 — 채팅 통합 (툴 카드 / KB 인용 / 표 / 추천 칩) + ══════════════════════════════════════════════════════ */ + +/* 툴 카드 컨테이너 — assistant 메시지 버블 위 */ +.llm-tool-cards { + display: flex; flex-direction: column; gap: 6px; + margin: 4px 0 8px 0; +} + +.llm-tool-card { + border: 1px solid var(--bd1); + border-radius: var(--rm); + background: var(--s1); + font-size: 12px; + overflow: hidden; +} +.llm-tool-card.running .llm-tool-icon { animation: spin 1.4s linear infinite; } +.llm-tool-card.ok { border-color: #2a5a3a; } +.llm-tool-card.err { border-color: #5a2a2a; } + +.llm-tool-head { + display: flex; align-items: center; gap: 8px; + padding: 6px 10px; cursor: pointer; + background: var(--s2); + user-select: none; +} +.llm-tool-head:hover { background: var(--s3, #2a2a2e); } +.llm-tool-icon { font-size: 13px; } +.llm-tool-name { font-weight: 600; color: var(--t0); } +.llm-tool-args { + flex: 1; color: var(--t2); font-size: 11px; + overflow: hidden; text-overflow: ellipsis; white-space: nowrap; +} +.llm-tool-status { font-size: 11px; color: var(--t2); flex-shrink: 0; } +.llm-tool-card.ok .llm-tool-status { color: #6bd58b; } +.llm-tool-card.err .llm-tool-status { color: #f37070; } + +.llm-tool-body { display: none; padding: 8px 10px; } +.llm-tool-card.open .llm-tool-body { display: block; } +.llm-tool-raw { + margin: 0; padding: 6px 8px; max-height: 240px; overflow: auto; + font-family: var(--ffm); font-size: 11px; + background: var(--s0); border: 1px solid var(--bd1); border-radius: 4px; + white-space: pre-wrap; word-break: break-all; +} +.llm-tool-err { + color: #f37070; font-family: var(--ffm); font-size: 12px; + padding: 6px 8px; +} +.llm-tool-more { + font-size: 11px; color: var(--t2); margin-top: 4px; text-align: right; +} + +/* 툴 결과 표 */ +.llm-tool-tbl-wrap { max-height: 320px; overflow: auto; border: 1px solid var(--bd1); border-radius: 4px; } +.llm-tool-tbl { + width: 100%; border-collapse: collapse; font-size: 11px; font-family: var(--ffm); +} +.llm-tool-tbl th, .llm-tool-tbl td { + padding: 4px 8px; border-bottom: 1px solid var(--bd1); + text-align: left; white-space: nowrap; +} +.llm-tool-tbl th { + background: var(--s2); position: sticky; top: 0; + font-weight: 600; color: var(--t1); +} + +/* KB 검색 결과 (search_kb 카드 안) */ +.llm-kb-hits { display: flex; flex-direction: column; gap: 6px; } +.llm-kb-hit { + padding: 6px 8px; background: var(--s0); + border: 1px solid var(--bd1); border-radius: 4px; +} +.llm-kb-head { font-size: 12px; color: var(--t0); margin-bottom: 2px; } +.llm-kb-head .mono { color: var(--t2); margin-right: 6px; } +.llm-kb-snip { font-size: 11px; color: var(--t1); line-height: 1.4; } + +/* KB 인용 링크 (본문 안) */ +.kb-cite-link { + color: var(--a); text-decoration: none; + border-bottom: 1px dashed var(--a); + padding-bottom: 1px; +} +.kb-cite-link:hover { color: #fff; border-bottom-style: solid; } + +/* welcome 추천 질문 칩 */ +.llm-chip-row { + display: flex; flex-wrap: wrap; gap: 8px; + justify-content: center; margin-top: 16px; + max-width: 720px; +} +.llm-chip { + padding: 6px 12px; + background: var(--s2); + border: 1px solid var(--bd1); + border-radius: 16px; + color: var(--t0); + font-family: var(--ff); + font-size: 12px; + cursor: pointer; + transition: all .15s; +} +.llm-chip:hover { + background: var(--s3, var(--s2)); + border-color: var(--a); + transform: translateY(-1px); +} + +@keyframes spin { + from { transform: rotate(0deg); } + to { transform: rotate(360deg); } +} diff --git a/src/Web/wwwroot/index.html b/src/Web/wwwroot/index.html index d132566..3f8679c 100644 --- a/src/Web/wwwroot/index.html +++ b/src/Web/wwwroot/index.html @@ -84,6 +84,10 @@ 13 로컬 LLM 채팅 +
@@ -1314,6 +1318,138 @@
+ +
+
+
+

RAG 관리

+

지식 베이스 문서 업로드 / 인덱싱 / 관리 — 관리자 비밀번호 필요.

+
+
KB / RAG
+
+ + + + + + + + + + + + +
+ diff --git a/src/Web/wwwroot/js/app.js b/src/Web/wwwroot/js/app.js index 949003f..83eaddc 100644 --- a/src/Web/wwwroot/js/app.js +++ b/src/Web/wwwroot/js/app.js @@ -3297,6 +3297,26 @@ let llmType = localStorage.getItem('llmType') || 'ollama'; let llmUseTools = localStorage.getItem('llmUseTools') === 'true'; let llmMcpTools = []; +// ── Phase 5.5: welcome 화면 추천 질문 ─────────────────── +const LLM_STARTER_CHIPS = [ + '지금 활성 알람을 보여줘', + 'Unit A의 24시간 운전 상황을 요약해줘', + 'FIC-6113.PV 최근 1시간 추이', + '오늘 발생한 디지털 이벤트 정리', + 'P-6201 펌프의 정비 이력', + '이번 주 보고서를 작성해줘', + '냉각수 펌프 토출 압력 태그를 찾아줘' +]; + +function llmUseChip(btn) { + const input = document.getElementById('llm-input'); + if (!input) return; + input.value = btn.textContent; + input.focus(); + input.style.height = 'auto'; + input.style.height = Math.min(input.scrollHeight, 150) + 'px'; +} + // ── 초기화 (탭 진입 시 API 호출 없음) ────────────────── document.querySelectorAll('[data-tab="llmchat"]').forEach(item => { item.addEventListener('click', () => { @@ -3401,11 +3421,15 @@ function llmRenderMessages() { const sess = llmGetActiveSession(); if (!sess || sess.messages.length === 0) { + const chips = LLM_STARTER_CHIPS.map(q => + `` + ).join(''); el.innerHTML = `
💬
새 대화를 시작하세요
-
모델을 선택하고 메시지를 입력하세요
+
모델을 선택하고 메시지를 입력하세요. 또는 아래 추천 질문을 클릭하세요.
+
${chips}
`; if (sess) { @@ -3449,7 +3473,167 @@ function llmFormatMessage(text) { return `\x00B${blocks.length - 1}\x00`; }); text = esc(text).replace(/\n/g, '
'); - return text.replace(/\x00B(\d+)\x00/g, (_, i) => blocks[+i]); + text = text.replace(/\x00B(\d+)\x00/g, (_, i) => blocks[+i]); + return llmLinkKbCitations(text); +} + +/* ── Phase 5: KB 인용 → 다운로드 링크 치환 ─────────────── */ +let llmKbDocMap = {}; // title → docId (search_kb 결과로 누적) + +function llmRegisterKbHits(hits) { + if (!Array.isArray(hits)) return; + for (const h of hits) { + if (h && h.title && h.doc_id) llmKbDocMap[h.title] = h.doc_id; + } +} + +function llmLinkKbCitations(html) { + const titles = Object.keys(llmKbDocMap); + if (titles.length === 0) return html; + // 긴 제목부터 매칭 (부분문자열 겹침 회피) + titles.sort((a, b) => b.length - a.length); + for (const title of titles) { + const docId = llmKbDocMap[title]; + const safeTitle = esc(title).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const re = new RegExp(safeTitle, 'g'); + html = html.replace(re, + `${esc(title)} ⬇`); + } + return html; +} + +/* ── Phase 5: 툴 실행 카드 ─────────────────────────────── */ +function llmEnsureStreamingMsgEl() { + let msgEl = document.getElementById('llm-streaming-msg'); + if (msgEl) return msgEl; + const messagesEl = document.getElementById('llm-messages'); + msgEl = document.createElement('div'); + msgEl.className = 'llm-msg assistant'; + msgEl.id = 'llm-streaming-msg'; + msgEl.innerHTML = ` +
AI
+
+ `; + messagesEl.appendChild(msgEl); + return msgEl; +} + +function llmEnsureToolCardContainer() { + const msgEl = llmEnsureStreamingMsgEl(); + let cont = msgEl.querySelector('.llm-tool-cards'); + if (!cont) { + cont = document.createElement('div'); + cont.className = 'llm-tool-cards'; + const bubble = msgEl.querySelector('.llm-msg-bubble'); + msgEl.insertBefore(cont, bubble); + } + return cont; +} + +function llmAppendToolCard(id, name, args) { + const cont = llmEnsureToolCardContainer(); + const argSummary = llmSummarizeArgs(args); + const card = document.createElement('div'); + card.className = 'llm-tool-card running'; + card.dataset.toolId = id; + card.innerHTML = ` +
+ + ${esc(name)} + ${esc(argSummary)} + 실행 중… +
+
+
결과 대기 중…
+
+ `; + cont.appendChild(card); + const messagesEl = document.getElementById('llm-messages'); + if (messagesEl) messagesEl.scrollTop = messagesEl.scrollHeight; +} + +function llmUpdateToolCard(id, name, ok, preview, length, payload) { + const cont = document.querySelector('.llm-tool-cards'); + if (!cont) return; + const card = cont.querySelector(`.llm-tool-card[data-tool-id="${CSS.escape(id)}"]`); + if (!card) return; + card.classList.remove('running'); + card.classList.add(ok ? 'ok' : 'err'); + const st = card.querySelector('.llm-tool-status'); + if (st) st.textContent = ok ? `완료 · ${length}자` : '실패'; + const body = card.querySelector('.llm-tool-body'); + if (body) body.innerHTML = llmRenderToolPayload(name, ok, preview, payload); + + // search_kb 결과면 인용 매핑 등록 + if (ok && name === 'search_kb') { + try { + const parsed = JSON.parse(payload); + if (parsed.success && Array.isArray(parsed.hits)) llmRegisterKbHits(parsed.hits); + } catch {} + } +} + +function llmSummarizeArgs(args) { + if (!args) return ''; + if (typeof args === 'string') { + return args.length > 100 ? args.slice(0, 100) + '…' : args; + } + try { + const s = JSON.stringify(args); + return s.length > 100 ? s.slice(0, 100) + '…' : s; + } catch { return ''; } +} + +function llmRenderToolPayload(name, ok, preview, payload) { + if (!ok) return `
${esc(preview || '오류')}
`; + // JSON 응답이면 표/시계열 자동 렌더 시도 + try { + const j = JSON.parse(payload || preview); + // search_kb 형태 + if (Array.isArray(j.hits)) return llmRenderKbHits(j.hits); + // run_sql/query_with_nl 형태: {success, columns:[], data:[{...}, ...]} + if (j.success && Array.isArray(j.columns) && Array.isArray(j.data)) { + return llmRenderTable(j.columns, j.data); + } + // query_pv_history 형태: {success, data:[{tag_name, timestamp, value}, ...]} + if (j.success && Array.isArray(j.data) && j.data.length > 0 && typeof j.data[0] === 'object') { + const cols = Object.keys(j.data[0]); + return llmRenderTable(cols, j.data); + } + } catch {} + return `
${esc((preview || '').slice(0, 800))}
`; +} + +function llmRenderTable(columns, data) { + if (!data || data.length === 0) return '
결과 0건
'; + const limit = Math.min(data.length, 50); + const ths = columns.map(c => `${esc(c)}`).join(''); + const rows = data.slice(0, limit).map(row => { + const tds = columns.map(c => { + const v = row[c]; + return `${v == null ? '' : esc(String(v))}`; + }).join(''); + return `${tds}`; + }).join(''); + const more = data.length > limit ? `
…나머지 ${data.length - limit}건 생략
` : ''; + return `
${ths}${rows}
${more}`; +} + +function llmRenderKbHits(hits) { + if (!hits || hits.length === 0) return '
검색 결과 0건
'; + llmRegisterKbHits(hits); + return '
' + hits.slice(0, 8).map(h => { + const score = (h.score || 0).toFixed(3); + const link = h.doc_id + ? `${esc(h.title || '(제목없음)')} ⬇` + : esc(h.title || '(제목없음)'); + const loc = h.locator ? ` · ${esc(h.locator)}` : ''; + const snippet = (h.text || '').slice(0, 200).replace(/\s+/g, ' '); + return `
+
${score} ${link}${loc}
+
${esc(snippet)}…
+
`; + }).join('') + '
'; } // ── 모델 목록 로드 ───────────────────────────────────── @@ -3602,10 +3786,6 @@ async function llmSend() { }; if (llmType === 'vllm' && llmUseTools && llmMcpTools.length > 0) { requestBody.tools = llmMcpTools; - if (!requestBody.systemPrompt) { - requestBody.systemPrompt = ''; - } - requestBody.systemPrompt += '\n\nYou have access to MCP tools for querying the Experion database: run_sql (execute SQL), query_pv_history (tag PV history), get_tag_metadata (tag metadata search), list_drawings (P&ID drawings), query_with_nl (natural language to SQL). When the user asks about tag values, history, or database information, call the appropriate tool using the function calling API.'; } const res = await fetch(`${prefix}/chat/stream`, { method: 'POST', @@ -3637,18 +3817,41 @@ async function llmSend() { for (const part of parts) { if (streamDone) break; const lines = part.split('\n'); + let eventType = 'message'; let eventData = ''; for (const line of lines) { - if (line.startsWith('data: ')) { + if (line.startsWith('event: ')) { + eventType = line.slice(7).trim(); + } else if (line.startsWith('data: ')) { eventData = line.slice(6); - } else if (line.startsWith('event: error')) { - throw new Error(eventData || '스트리밍 오류'); - } else if (line.startsWith('event: done')) { - streamDone = true; - break; } } + if (eventType === 'error') { + throw new Error(eventData || '스트리밍 오류'); + } + if (eventType === 'done') { + streamDone = true; + break; + } + + if (eventType === 'tool_start') { + try { + const t = JSON.parse(eventData); + llmAppendToolCard(t.id, t.name, t.args); + } catch {} + continue; + } + + if (eventType === 'tool_result') { + try { + const t = JSON.parse(eventData); + llmUpdateToolCard(t.id, t.name, t.ok, t.preview, t.length, t.payload); + } catch {} + continue; + } + + // 일반 message 이벤트 if (eventData && eventData !== '{}') { try { const json = JSON.parse(eventData); @@ -3838,3 +4041,306 @@ function llmExportAll() { a.click(); URL.revokeObjectURL(url); } + +/* ═══════════════════════════════════════════════════════ + 14 RAG 관리 (KB Admin) + ══════════════════════════════════════════════════════ */ + +let kbToken = sessionStorage.getItem('kbToken') || ''; +let kbExpiresAt = sessionStorage.getItem('kbExpiresAt') || ''; +let kbCollections = []; +let kbPollTimer = null; + +function kbHeaders(extra) { + const h = { ...(extra || {}) }; + if (kbToken) h['X-Kb-Token'] = kbToken; + return h; +} + +async function kbFetch(method, path, body, opt) { + const init = { method, headers: kbHeaders({ 'Content-Type': 'application/json' }), ...(opt || {}) }; + if (body !== undefined && body !== null) init.body = JSON.stringify(body); + const res = await fetch(path, init); + let data = null; + try { data = await res.json(); } catch { /* ignore */ } + return { ok: res.ok, status: res.status, data }; +} + +// ── 탭 클릭 핸들러 (API 호출 없음, 세션 검증만) ─────── +document.querySelectorAll('[data-tab="kbadmin"]').forEach(item => { + item.addEventListener('click', async () => { + if (kbToken) { + const r = await kbFetch('GET', '/api/kb/auth/status'); + if (r.ok && r.data && r.data.valid) { + kbShowMain(); + } else { + kbShowLogin('세션이 만료되었습니다. 다시 로그인하세요.'); + } + } else { + kbShowLogin(''); + } + }); +}); + +function kbShowLogin(msg) { + kbToken = ''; kbExpiresAt = ''; + sessionStorage.removeItem('kbToken'); sessionStorage.removeItem('kbExpiresAt'); + document.getElementById('kb-login-card').classList.remove('hidden'); + document.getElementById('kb-main').classList.add('hidden'); + const m = document.getElementById('kb-login-msg'); + if (m) m.textContent = msg || ''; + kbStopPoll(); +} + +function kbShowMain() { + document.getElementById('kb-login-card').classList.add('hidden'); + document.getElementById('kb-main').classList.remove('hidden'); + kbUpdateSessionInfo(); + kbLoadCollections().then(() => kbRefresh()); + kbStartPoll(); +} + +function kbUpdateSessionInfo() { + const el = document.getElementById('kb-session-info'); + if (!el) return; + if (kbExpiresAt) { + const t = new Date(kbExpiresAt); + el.textContent = `세션 만료: ${t.toLocaleTimeString('ko-KR')}`; + } else { + el.textContent = '세션: --:--'; + } +} + +async function kbLogin() { + const pw = document.getElementById('kb-pw').value; + const msg = document.getElementById('kb-login-msg'); + if (!pw) { msg.textContent = '비밀번호를 입력하세요.'; return; } + msg.textContent = '로그인 중...'; + const r = await fetch('/api/kb/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ password: pw }) + }); + const data = await r.json().catch(() => ({})); + if (!r.ok || !data.success) { + msg.textContent = '❌ ' + (data.error || '로그인 실패'); + return; + } + kbToken = data.token; + kbExpiresAt = data.expiresAt; + sessionStorage.setItem('kbToken', kbToken); + sessionStorage.setItem('kbExpiresAt', kbExpiresAt || ''); + document.getElementById('kb-pw').value = ''; + msg.textContent = ''; + kbShowMain(); +} + +async function kbLogout() { + if (kbToken) await kbFetch('POST', '/api/kb/auth/logout'); + kbShowLogin('로그아웃되었습니다.'); +} + +// ── 컬렉션 ───────────────────────────────────────────── +async function kbLoadCollections() { + const r = await kbFetch('GET', '/api/kb/collections'); + if (!r.ok || !r.data || !r.data.success) return; + kbCollections = r.data.items || []; + const fSel = document.getElementById('kb-f-coll'); + const uSel = document.getElementById('kb-up-coll'); + fSel.innerHTML = '' + + kbCollections.map(c => ``).join(''); + uSel.innerHTML = '' + + kbCollections.map(c => ``).join(''); +} + +// ── 목록 ─────────────────────────────────────────────── +async function kbRefresh() { + const coll = document.getElementById('kb-f-coll').value; + const status = document.getElementById('kb-f-status').value; + const q = document.getElementById('kb-f-q').value.trim(); + const qs = new URLSearchParams(); + if (coll) qs.set('collection', coll); + if (status) qs.set('status', status); + if (q) qs.set('q', q); + qs.set('pageSize', '200'); + + const r = await kbFetch('GET', '/api/kb/documents?' + qs.toString()); + if (!r.ok || !r.data || !r.data.success) { + document.getElementById('kb-doc-table').innerHTML = '
조회 실패
'; + return; + } + kbRenderDocs(r.data.items, r.data.total); +} + +function kbRenderDocs(items, total) { + const stats = document.getElementById('kb-doc-stats'); + stats.textContent = `총 ${total}건`; + const tbl = document.getElementById('kb-doc-table'); + if (!items || items.length === 0) { + tbl.innerHTML = '
문서 없음
'; + return; + } + const collMap = Object.fromEntries(kbCollections.map(c => [c.key, c.name])); + const rows = items.map(d => { + const tags = (d.tags || []).map(t => `${t}`).join(' '); + const dt = d.uploadedAt ? new Date(d.uploadedAt).toLocaleString('ko-KR') : ''; + const size = d.fileSize ? kbFmtSize(d.fileSize) : ''; + return ` + ${kbShortId(d.id)} + ${kbEscape(d.title)} + ${collMap[d.collection] || d.collection} + ${tags} + ${size} + ${d.status}${d.errorMessage ? `
${kbEscape(d.errorMessage.slice(0,60))}…
`:''} + ${d.chunkCount || 0} + ${dt} + + + + + + + `; + }).join(''); + tbl.innerHTML = ` + + + ${rows}
ID제목컬렉션태그크기상태청크업로드액션
`; +} + +function kbShortId(id) { return (id || '').replace(/-/g, '').slice(0, 8); } +function kbEscape(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({ '&': '&', '<': '<', '>': '>', '"': '"', "'": ''' })[c]); } +function kbFmtSize(n) { + if (n < 1024) return n + 'B'; + if (n < 1024 * 1024) return (n / 1024).toFixed(1) + 'K'; + if (n < 1024 * 1024 * 1024) return (n / 1024 / 1024).toFixed(1) + 'M'; + return (n / 1024 / 1024 / 1024).toFixed(2) + 'G'; +} + +// ── 업로드 모달 ──────────────────────────────────────── +function kbUploadOpen() { + document.getElementById('kb-up-msg').textContent = ''; + document.getElementById('kb-up-title').value = ''; + document.getElementById('kb-up-tags').value = ''; + document.getElementById('kb-up-file').value = ''; + document.getElementById('kb-upload-modal').classList.remove('hidden'); +} +function kbUploadClose() { + document.getElementById('kb-upload-modal').classList.add('hidden'); +} +async function kbUploadSubmit() { + const coll = document.getElementById('kb-up-coll').value; + const title = document.getElementById('kb-up-title').value.trim(); + const tags = document.getElementById('kb-up-tags').value.trim(); + const fileInput = document.getElementById('kb-up-file'); + const msg = document.getElementById('kb-up-msg'); + if (!coll) { msg.textContent = '❌ 컬렉션을 선택하세요.'; return; } + if (!fileInput.files || fileInput.files.length === 0) { msg.textContent = '❌ 파일을 선택하세요.'; return; } + const fd = new FormData(); + fd.append('file', fileInput.files[0]); + fd.append('collectionKey', coll); + if (title) fd.append('title', title); + if (tags) fd.append('tags', tags); + msg.textContent = '업로드 중...'; + const r = await fetch('/api/kb/upload', { method: 'POST', headers: kbHeaders(), body: fd }); + const data = await r.json().catch(() => ({})); + if (!r.ok || !data.success) { + msg.textContent = '❌ ' + (data.error || ('HTTP ' + r.status)); + return; + } + msg.textContent = '✅ 업로드 완료. 인덱싱 진행 중...'; + setTimeout(() => kbUploadClose(), 600); + kbRefresh(); +} + +// ── 액션 ─────────────────────────────────────────────── +function kbDownload(id) { + window.open('/api/kb/download/' + id, '_blank'); +} + +async function kbReindex(id) { + if (!confirm('재인덱싱하시겠습니까? (Qdrant 기존 청크 삭제 후 다시 처리)')) return; + const r = await kbFetch('POST', '/api/kb/documents/' + id + '/reindex'); + if (!r.ok) alert('실패: ' + (r.data && r.data.error ? r.data.error : r.status)); + kbRefresh(); +} + +async function kbDisable(id) { + if (!confirm('이 문서를 비활성화하시겠습니까?')) return; + const r = await kbFetch('POST', '/api/kb/documents/' + id + '/disable'); + if (!r.ok) alert('실패'); + kbRefresh(); +} + +async function kbDelete(id, title) { + if (!confirm(`삭제하시겠습니까?\n${title}\n(Qdrant 청크와 원본 파일도 함께 삭제됩니다)`)) return; + const r = await kbFetch('DELETE', '/api/kb/documents/' + id); + if (!r.ok) alert('실패'); + kbRefresh(); +} + +async function kbBulkDisable() { + const title = prompt('일괄 비활성화할 제목을 정확히 입력하세요:'); + if (!title) return; + const r = await kbFetch('POST', '/api/kb/documents/bulk-disable', { title }); + if (r.ok && r.data && r.data.success) alert(`${r.data.affected}건 비활성화 완료`); + else alert('실패'); + kbRefresh(); +} + +async function kbPurgeDisabled() { + const ds = prompt('비활성화 후 며칠 지난 문서를 영구삭제할까요? (공백이면 모든 disabled 삭제)', '90'); + let body = {}; + if (ds && ds.trim()) { + const n = parseInt(ds, 10); + if (isNaN(n) || n < 0) { alert('숫자를 입력하세요.'); return; } + body.olderThanDays = n; + } + if (!confirm('정말 영구삭제하시겠습니까? (되돌릴 수 없습니다)')) return; + const r = await kbFetch('POST', '/api/kb/documents/purge-disabled', body); + if (r.ok && r.data && r.data.success) alert(`${r.data.deleted}건 영구삭제 완료`); + else alert('실패'); + kbRefresh(); +} + +// ── 비밀번호 변경 ────────────────────────────────────── +function kbChangePwOpen() { + document.getElementById('kb-pw-old').value = ''; + document.getElementById('kb-pw-new').value = ''; + document.getElementById('kb-pw-msg').textContent = ''; + document.getElementById('kb-pw-modal').classList.remove('hidden'); +} +function kbChangePwClose() { + document.getElementById('kb-pw-modal').classList.add('hidden'); +} +async function kbChangePwSubmit() { + const oldPw = document.getElementById('kb-pw-old').value; + const newPw = document.getElementById('kb-pw-new').value; + const msg = document.getElementById('kb-pw-msg'); + if (!oldPw || !newPw) { msg.textContent = '❌ 비밀번호를 입력하세요.'; return; } + if (newPw.length < 6) { msg.textContent = '❌ 새 비밀번호는 6자 이상.'; return; } + const r = await kbFetch('POST', '/api/kb/auth/change-password', { oldPassword: oldPw, newPassword: newPw }); + if (r.ok && r.data && r.data.success) { + msg.textContent = '✅ 변경 완료. 다시 로그인해 주세요.'; + setTimeout(() => { kbChangePwClose(); kbLogout(); }, 800); + } else { + msg.textContent = '❌ ' + (r.data && r.data.error ? r.data.error : '변경 실패'); + } +} + +// ── 진행률 폴링 (활성 ingest가 있으면 1초마다 새로고침) ─ +function kbStartPoll() { + kbStopPoll(); + kbPollTimer = setInterval(async () => { + if (!kbToken) return; + if (document.getElementById('pane-kbadmin').classList.contains('active') === false) return; + const r = await kbFetch('GET', '/api/kb/documents?status=parsing&pageSize=1'); + const r2 = await kbFetch('GET', '/api/kb/documents?status=embedding&pageSize=1'); + const r3 = await kbFetch('GET', '/api/kb/documents?status=pending&pageSize=1'); + const active = [r, r2, r3].some(x => x.ok && x.data && x.data.total > 0); + if (active) kbRefresh(); + }, 1500); +} +function kbStopPoll() { + if (kbPollTimer) { clearInterval(kbPollTimer); kbPollTimer = null; } +}