Honeywell HC900을 Modbus TCP로 직접 폴링 → gRPC → C# 크롤러 → PostgreSQL. 기존 Experion OPC UA 데이터 경로를 HC900 직접 통신으로 대체. - industrial-comm/cpp: C++ Modbus 게이트웨이 (gRPC 서버) - src: C# .NET 8 ASP.NET Core 크롤러 + 웹 UI (3-Layer) - mcp-server: Python FastMCP (RAG/NL2SQL/P&ID) - 다중 컨트롤러(N-Controller) 지원 Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
42 lines
1.1 KiB
Python
42 lines
1.1 KiB
Python
"""docx 청킹 — 헤딩 경로 별 청크."""
|
|
from __future__ import annotations
|
|
|
|
|
|
def parse(path: str) -> list[dict]:
|
|
from docx import Document
|
|
|
|
doc = Document(path)
|
|
chunks: list[dict] = []
|
|
|
|
cur_path: list[str] = []
|
|
buf: list[str] = []
|
|
|
|
def flush():
|
|
if buf:
|
|
heading = " / ".join(cur_path) if cur_path else "preface"
|
|
chunks.append({
|
|
"text": "\n".join(buf).strip(),
|
|
"chunk_kind": "heading",
|
|
"locator": f"heading={heading}",
|
|
})
|
|
|
|
for p in doc.paragraphs:
|
|
text = (p.text or "").strip()
|
|
if not text:
|
|
continue
|
|
|
|
style_name = (p.style.name or "").lower() if p.style else ""
|
|
if style_name.startswith("heading"):
|
|
flush()
|
|
buf = []
|
|
try:
|
|
level = int(style_name.split()[-1])
|
|
except (ValueError, IndexError):
|
|
level = 1
|
|
cur_path = cur_path[: max(0, level - 1)] + [text]
|
|
else:
|
|
buf.append(text)
|
|
|
|
flush()
|
|
return chunks
|