feat: P&ID 그래프 파이프라인 및 MCP 서버 개선

- P&ID 그래프 파이프라인 구현 (py) - pid_geometric_extractor.py: 기하학적 특징 추출 - pid_intelligent_mapper.py: 태그 매핑 - pid_topology_builder.py: 위상 구축 - test_pipeline_phase2.py, test_pipeline_phase3.py: 테스트 - MCP 서버 개선 - server.py: 멀티프로세싱 지원 - pipeline/: 분석, 추출, 매핑, 위상 모듈 추가 - C# P&ID 그래프 서비스 - PidGraphDtos.cs: DTO 정의 - PidGraphService.cs: 비즈니스 로직 - PidGraphController.cs: API 컨트롤러 - OPC UA 서비스 개선 - ExperionOpcServerService.cs - ExperionRealtimeService.cs - ExperionFastService.cs - MCP 클라이언트 및 호스팅 서비스 개선 - McpClient.cs - McpServerHostedService.cs - 웹 UI 개선 - pid_graph_view.html: P&ID 그래프 뷰어 - pid-viewer.js: 뷰어 로직 - app.js: 메인 앱 - pid_graph.css: 스타일 - 프로젝트 설정 업데이트 - ExperionCrawler.csproj - Program.cs
feat: implement P&ID extraction and tag mapping, update MCP server and web UI
2026-05-03 03:50:20 +09:00 · 2026-05-02 14:56:04 +09:00
141 changed files with 4516352 additions and 252 deletions
--- a/.roo/rules-code/roo-rules.md
+++ b/.roo/rules-code/roo-rules.md
@@ -60,14 +60,19 @@ For multi-step tasks, state a brief plan:

 Strong success criteria let you loop independently. Weak criteria ("make it work") require constant clarification.

-## 5. Backup + Diff Before Edit
+## 5. Save the token & time - Roo code must keep this rule not API
+- "Do not summarize the code or changes after completing a task"
+- "Once the code is written, do not repeat the explanation"
+- "Only output the final file content if necessary"
+
+## 6. Backup + Diff Before Edit

 **기존 파일을 수정하기 전에 반드시 다음 두 단계를 수행할 것.**

 ### Step 1 — 백업
-수정 대상 파일을 `.rooBackup/` 폴더에 원본 그대로 저장한다.
+수정 대상 파일을 `.rooBackup/` 폴더에 현재날짜와 시간으로 폴더를 만들고 그 폴더에 수정전 원본 그대로 저장한다.

- 저장 경로: `.rooBackup/<원본경로>/<파일명>` (디렉토리 구조 유지)
+- 저장 경로: `.rooBackup/<날짜-시간>/<원본경로>/<파일명>`
 - 예: `src/Web/wwwroot/js/app.js` → `.rooBackup/src/Web/wwwroot/js/app.js`
 - 백업 후 "백업 완료: `.rooBackup/...`" 를 출력할 것

--- a/.rooBackup/2026-04-30-15-03-15/src/Infrastructure/Database/ExperionDbContext.cs
+++ b/.rooBackup/2026-04-30-15-03-15/src/Infrastructure/Database/ExperionDbContext.cs
--- a/.rooBackup/2026-04-30-204126/src/Infrastructure/Mcp/McpServerHostedService.cs
+++ b/.rooBackup/2026-04-30-204126/src/Infrastructure/Mcp/McpServerHostedService.cs
@@ -0,0 +1,103 @@
+using System.Diagnostics;
+
+namespace ExperionCrawler.Infrastructure.Mcp;
+
+public class McpServerHostedService : IHostedService
+{
+    private readonly McpClient _mcpClient;
+    private readonly ILogger<McpServerHostedService> _logger;
+    private readonly string _workingDirectory;
+    private Process? _process;
+
+    public McpServerHostedService(
+        McpClient mcpClient,
+        ILogger<McpServerHostedService> logger,
+        IConfiguration config)
+    {
+        _mcpClient = mcpClient;
+        _logger = logger;
+        var dir = config["McpServer:WorkingDirectory"] ?? "../../mcp-server";
+        _workingDirectory = Path.IsPathRooted(dir)
+            ? dir
+            : Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), dir));
+    }
+
+    public async Task StartAsync(CancellationToken cancellationToken)
+    {
+        // 이미 외부에서 실행 중이면 새 프로세스 띄우지 않음
+        if (await _mcpClient.PingAsync())
+        {
+            _logger.LogInformation("[McpServer] 이미 실행 중 (localhost:5001) — 기존 프로세스 사용");
+            return;
+        }
+
+        if (!Directory.Exists(_workingDirectory))
+        {
+            _logger.LogWarning("[McpServer] 디렉터리 없음: {Dir} — MCP 서버 시작 스킵", _workingDirectory);
+            return;
+        }
+
+        _logger.LogInformation("[McpServer] Python MCP 서버 시작 중... ({Dir})", _workingDirectory);
+
+        _process = new Process
+        {
+            StartInfo = new ProcessStartInfo
+            {
+                FileName = "uv",
+                Arguments = "run server.py --http",
+                WorkingDirectory = _workingDirectory,
+                UseShellExecute = false,
+            }
+        };
+
+        try
+        {
+            _process.Start();
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "[McpServer] 프로세스 시작 실패 (uv 설치 여부 확인)");
+            return;
+        }
+
+        // 최대 30초 대기 (1초 간격 health check)
+        for (int i = 0; i < 30; i++)
+        {
+            try { await Task.Delay(1000, cancellationToken); } catch { return; }
+            if (_process.HasExited)
+            {
+                _logger.LogWarning("[McpServer] 프로세스가 예기치 않게 종료됨 (exit code: {Code})", _process.ExitCode);
+                return;
+            }
+            if (await _mcpClient.PingAsync())
+            {
+                _logger.LogInformation("[McpServer] 준비 완료 (localhost:5001, {Sec}초 소요)", i + 1);
+                return;
+            }
+        }
+        _logger.LogWarning("[McpServer] 30초 내 응답 없음 — 백그라운드에서 계속 기다림");
+    }
+
+    public Task StopAsync(CancellationToken cancellationToken)
+    {
+        try
+        {
+            if (_process is { HasExited: false })
+            {
+                _process.Kill(entireProcessTree: true);
+                _process.WaitForExit(3000);
+                _logger.LogInformation("[McpServer] Python MCP 서버 종료됨");
+            }
+        }
+        catch (Exception ex)
+        {
+            _logger.LogWarning(ex, "[McpServer] 종료 중 오류");
+        }
+        finally
+        {
+            _process?.Dispose();
+            _process = null;
+        }
+        return Task.CompletedTask;
+    }
+}
--- a/.rooBackup/2026-05-02_0133/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0133/futurePlan/End-to-End
@@ -0,0 +1,137 @@
+# 🛠️ Graph Pipeline Phase 1: 기하학적 데이터 추출 (Geometric Extraction)
+
+이 문서는 P&ID Graph Pipeline의 첫 번째 단계인 **기하학적 데이터 추출**의 상세 구현 계획을 다룹니다. 목표는 단순한 텍스트 추출을 넘어, 도면 내 모든 객체의 **물리적 위치(좌표)**와 **기하학적 속성**을 보존하여 이후 위상 모델링(Topology Modeling)이 가능하도록 하는 것입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `ezdxf` | DXF 파일 파싱 및 엔티티 추출 | 핵심 라이브러리 |
+| `shapely` | 기하학적 연산 (Intersection, Distance, Bounding Box) | 좌표 기반 분석 필수 |
+| `numpy` | 대량의 좌표 데이터 계산 및 행렬 연산 | 성능 최적화 |
+| `pandas` | 추출된 객체 데이터의 구조화 및 CSV/JSON 저장 | 데이터 관리 |
+| `pydantic` | 추출 데이터의 스키마 정의 및 유효성 검증 | 데이터 무결성 보장 |
+| `pytesseract` / `pdf2image` | PDF 도면의 영역 기반 OCR 추출 | PDF 처리 시 필요 |
+
+### 1.2 설치 명령어
+```bash
+pip install ezdxf shapely numpy pandas pydantic pytesseract pdf2image
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 데이터 모델 (Schema)
+모든 추출 객체는 다음과 같은 공통 속성을 갖는 `GeometricEntity` 모델을 따릅니다.
+
+```python
+from pydantic import BaseModel
+from typing import List, Optional, Union, Tuple
+
+class BoundingBox(BaseModel):
+    min_x: float
+    min_y: float
+    max_x: float
+    max_y: float
+    center: Tuple[float, float]
+
+class GeometricEntity(BaseModel):
+    entity_id: str
+    entity_type: str  # TEXT, LINE, CIRCLE, POLYLINE, ARC
+    layer: str
+    bbox: BoundingBox
+    properties: dict  # 텍스트 값, 색상, 선 굵기 등
+    coordinates: List[Tuple[float, float]]  # 시작점, 끝점 또는 정점 리스트
+```
+
+### 2.2 처리 파이프라인 흐름
+1.  **DXF Load:** `ezdxf.readfile()`을 통해 도면 로드.
+2.  **Entity Iteration:** 모든 레이어의 엔티티를 순회하며 타입별 분류.
+3.  **Coordinate Extraction:** 
+    *   `TEXT`: 삽입점(Insertion Point) 및 텍스트 길이를 이용한 BBox 계산.
+    *   `LINE`: 시작점(Start)과 끝점(End) 추출.
+    *   `POLYLINE`: 모든 정점(Vertices) 리스트 추출.
+    *   `CIRCLE/ARC`: 중심점(Center)과 반지름(Radius) 추출.
+4.  **Spatial Normalization:** 도면 좌표계를 분석 시스템 좌표계로 정규화.
+5.  **Structured Export:** JSON 또는 DB(PostgreSQL/PostGIS)에 저장.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 DXF 기하학적 추출 핵심 코드
+```python
+import ezdxf
+from shapely.geometry import box, LineString, Point
+from typing import List
+
+class PidGeometricExtractor:
+    def __init__(self, file_path: str):
+        self.doc = ezdxf.readfile(file_path)
+        self.msp = self.doc.modelspace()
+
+    def get_bbox(self, entity):
+        """엔티티의 Bounding Box를 계산하여 shapely box 객체로 반환"""
+        if entity.dxftype() == 'TEXT':
+            # 텍스트의 경우 삽입점과 텍스트 길이를 기반으로 단순화된 BBox 생성
+            p = entity.dxf.insert
+            return box(p.x, p.y, p.x + 10, p.y + 5) # 실제로는 폰트 크기 반영 필요
+        elif entity.dxftype() == 'LINE':
+            start = entity.dxf.start
+            end = entity.dxf.end
+            return box(min(start.x, end.x), min(start.y, end.y), 
+                      max(start.x, end.x), max(start.y, end.y))
+        # ... 기타 타입 구현
+        return None
+
+    def extract_all(self) -> List[dict]:
+        results = []
+        for entity in self.msp:
+            bbox_obj = self.get_bbox(entity)
+            if bbox_obj:
+                results.append({
+                    "id": entity.dxf.handle,
+                    "type": entity.dxftype(),
+                    "layer": entity.dxf.layer,
+                    "bbox": {
+                        "min_x": bbox_obj.bounds[0],
+                        "min_y": bbox_obj.bounds[1],
+                        "max_x": bbox_obj.bounds[2],
+                        "max_y": bbox_obj.bounds[3]
+                    },
+                    "value": getattr(entity.dxf, 'text', None)
+                })
+        return results
+
+# 사용 예시
+extractor = PidGeometricExtractor("plant_drawing.dxf")
+geometric_data = extractor.extract_all()
+```
+
+### 3.2 유틸리티 함수: 인접성 체크 (Proximity Utility)
+추후 2단계(위상 모델링)에서 사용할 핵심 유틸리티입니다.
+
+```python
+from shapely.geometry import Point
+
+def is_near(entity_a_bbox, entity_b_bbox, threshold=5.0):
+    """두 객체의 Bounding Box 간의 최단 거리가 임계값 이내인지 확인"""
+    return entity_a_bbox.distance(entity_b_bbox) <= threshold
+
+def is_inside(point, bbox):
+    """특정 점이 Bounding Box 내부에 있는지 확인"""
+    return bbox.contains(Point(point))
+```
+
+---
+
+## 🚀 4. Phase 1 완료 기준 (Definition of Done)
+
+- [ ] DXF 파일 내 모든 `TEXT`, `LINE`, `POLYLINE`의 좌표 데이터가 누락 없이 추출되는가?
+- [ ] 각 객체별로 정확한 `Bounding Box`가 계산되어 저장되는가?
+- [ ] 추출된 데이터가 `GeometricEntity` 스키마에 맞게 JSON 형태로 저장되는가?
+- [ ] (선택 사항) PDF 도면의 경우 OCR을 통해 텍스트의 좌표값이 추출되는가?
--- a/.rooBackup/2026-05-02_0139/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0139/futurePlan/End-to-End
@@ -0,0 +1,126 @@
+# 🕸️ Graph Pipeline Phase 2: 위상 모델링 (Topology Modeling)
+
+이 문서는 P&ID Graph Pipeline의 두 번째 단계인 **위상 모델링**의 상세 구현 계획을 다룹니다. 1단계에서 추출한 기하학적 객체(좌표, BBox)를 기반으로, 설비 간의 **연결성(Connectivity)**과 **흐름(Flow)**을 정의하는 지식 그래프(Knowledge Graph)를 구축하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `networkx` | 그래프 데이터 구조 생성 및 알고리즘 분석 | 핵심 라이브러리 |
+| `shapely` | 객체 간 거리 계산 및 포함 관계 분석 | 1단계와 연계 |
+| `scikit-learn` | (선택) KD-Tree를 이용한 고속 근접 이웃 검색 | 대규모 도면 최적화 |
+| `matplotlib` | 생성된 그래프의 위상 구조 시각화 검증 | 디버깅용 |
+
+### 1.2 설치 명령어
+```bash
+pip install networkx shapely scikit-learn matplotlib
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 그래프 정의 (Graph Definition)
+*   **노드 (Nodes):**
+    *   `Equipment`: 펌프, 탱크, 열교환기 등 (속성: ID, 타입, BBox)
+    *   `Instrument`: 전송기, 밸브, 게이지 등 (속성: ID, 타입, BBox)
+    *   `Tag`: 텍스트 기반 태그 (속성: TagName, Value)
+*   **엣지 (Edges):**
+    *   `Pipe`: 설비-설비, 설비-계기 간의 물리적 연결 (속성: LineNumber, 방향성)
+    *   `Association`: 태그-설비 간의 논리적 연결 (속성: 관계 타입 - 예: 'belongs_to')
+
+### 2.2 위상 추론 로직 (Topology Inference)
+1.  **태그-설비 결합 (Tag-to-Entity Binding):**
+    *   태그 텍스트의 BBox와 가장 가까운 심볼(Equipment/Instrument)을 찾아 `Association` 엣지를 생성합니다.
+2.  **배관 연결성 분석 (Line Connectivity):**
+    *   `LINE` 또는 `POLYLINE`의 끝점이 특정 설비의 BBox 내부에 있거나 임계 거리($\epsilon$) 이내에 있으면 두 노드를 `Pipe` 엣지로 연결합니다.
+3.  **흐름 방향성 부여 (Flow Direction):**
+    *   화살표 심볼의 방향 또는 공정 흐름 규칙을 분석하여 엣지에 `source` $\rightarrow$ `target` 방향을 설정합니다.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 그래프 구축 핵심 코드
+```python
+import networkx as nx
+from shapely.geometry import box, Point
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data):
+        self.data = geometric_data  # Phase 1에서 추출된 JSON 데이터
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 모든 객체를 노드로 추가
+        for item in self.data:
+            self.G.add_node(item['id'], 
+                           type=item['type'], 
+                           bbox=box(*item['bbox'].values()), 
+                           value=item.get('value'))
+
+        # 2. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] != 'TEXT']
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 3. 배관 기반 물리적 연결 (Pipe)
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'POLYLINE']]
+        for line in lines:
+            connected_nodes = self._find_connected_nodes(line, equipments)
+            if len(connected_nodes) >= 2:
+                # 라인을 통해 연결된 두 설비 간 엣지 생성
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < 50.0 else None # 임계값 50.0
+
+    def _find_connected_nodes(self, line_id, equipment_ids):
+        # 라인의 시작/끝점이 어떤 설비 BBox에 포함되는지 확인
+        # (실제 구현 시 line의 coordinates 활용)
+        return [eq for eq in equipment_ids if self.G.nodes[eq]['bbox'].intersects(self.G.nodes[line_id]['bbox'])]
+
+# 실행
+builder = PidTopologyBuilder(geometric_data)
+builder.build_graph()
+graph = builder.G
+```
+
+### 3.2 위상 분석 유틸리티: 영향도 분석 (Impact Analysis)
+```python
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
+
+# 예: P-101 펌프 고장 시 영향 분석
+affected = analyze_impact(graph, "node_P101")
+print(f"Impacted Equipment: {affected}")
+```
+
+---
+
+## 🚀 4. Phase 2 완료 기준 (Definition of Done)
+
+- [ ] 모든 설비와 계기가 그래프의 **노드(Node)**로 변환되었는가?
+- [ ] 태그와 설비 간의 **논리적 연결(Association)**이 정확하게 매핑되었는가?
+- [ ] 배관(Line)을 통해 설비 간의 **물리적 연결(Pipe Edge)**이 생성되었는가?
+- [ ] `nx.descendants` 등을 통해 특정 노드로부터의 **흐름 추적(Flow Tracing)**이 가능한가?
+- [ ] 생성된 그래프 구조가 JSON(GraphML 등) 형태로 저장되어 Phase 3로 전달 가능한가?
--- a/.rooBackup/2026-05-02_0146/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0146/futurePlan/End-to-End
@@ -0,0 +1,125 @@
+# 🧠 Graph Pipeline Phase 3: 지능형 매핑 및 검증 (Intelligent Mapping & Validation)
+
+이 문서는 P&ID Graph Pipeline의 세 번째 단계인 **지능형 매핑 및 검증**의 상세 구현 계획을 다룹니다. 2단계에서 구축한 위상 그래프(Topology Graph)를 활용하여, 도면 상의 가상 노드들을 실제 Experion 시스템의 **실시간 태그(Real-time Tags)**와 정밀하게 연결하고 그 타당성을 검증하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `openai` / `langchain` | LLM API 연동 및 프롬프트 체이닝 | 매핑 추론 및 검증 핵심 |
+| `fuzzywuzzy` / `rapidfuzz` | 태그 이름 간의 문자열 유사도 계산 | 1차 후보군 추출용 |
+| `networkx` | 그래프 기반 인접 노드(Context) 추출 | 2단계 그래프 활용 |
+| `pydantic` | 매핑 결과의 구조화 및 유효성 검사 | 데이터 정규화 |
+| `requests` | ExperionCrawler API (C#)와 통신 | 실제 태그 리스트 조회 |
+
+### 1.2 설치 명령어
+```bash
+pip install openai langchain rapidfuzz networkx pydantic requests
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 매핑 파이프라인 (Mapping Pipeline)
+단순 이름 매칭의 한계를 극복하기 위해 **[후보 추출 $\rightarrow$ 맥락 분석 $\rightarrow$ LLM 확정]**의 3단계 프로세스를 거칩니다.
+
+1.  **1차 후보 추출 (Candidate Generation):**
+    *   도면의 태그 텍스트와 Experion 시스템의 전체 태그 리스트를 `RapidFuzz`로 비교하여 유사도 상위 N개를 추출합니다.
+2.  **맥락 정보 수집 (Context Gathering):**
+    *   해당 노드의 그래프 상 인접 노드(1-hop, 2-hop) 정보를 수집합니다.
+    *   예: "현재 노드는 `PT-101`이며, 상류에 `P-101(Pump)`이 있고 하류에 `V-101(Valve)`이 있음."
+3.  **LLM 기반 최종 매핑 (LLM-based Resolution):**
+    *   후보 태그 리스트와 위상 맥락을 LLM에게 전달하여 가장 타당한 태그를 선택하게 합니다.
+
+### 2.2 상호 검증 로직 (Cross-Validation)
+매핑된 결과가 실제 공정 데이터와 일치하는지 검증합니다.
+*   **위상적 일관성:** 도면에서 `A $\rightarrow$ B` 순서라면, 실제 데이터에서도 `A`의 변화가 `B`에 영향을 주는지 상관관계 분석.
+*   **속성 일치성:** 도면의 심볼 타입(예: Pressure Transmitter)과 실제 태그의 속성(예: Engineering Unit = 'bar' 또는 'psi')이 일치하는지 확인.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 맥락 기반 매핑 엔진
+```python
+import networkx as nx
+from rapidfuzz import process, fuzz
+from openai import OpenAI
+
+client = OpenAI(api_key="your-api-key")
+
+class IntelligentMapper:
+    def __init__(self, graph, system_tags):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+
+    def get_node_context(self, node_id):
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            context.append(f"Connected to {attr.get('value', n)} (Type: {attr.get('type')})")
+        return ", ".join(context)
+
+    def resolve_tag(self, node_id):
+        # 1. 1차 후보 추출 (Fuzzy Matching)
+        tag_text = self.graph.nodes[node_id].get('value', '')
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        
+        # 2. 맥락 정보 수집
+        context = self.get_node_context(node_id)
+        
+        # 3. LLM에게 최종 판단 요청
+        prompt = f"""
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        위 맥락을 고려할 때 가장 적절한 시스템 태그 하나만 반환하세요. 
+        이유가 불분명하면 'UNKNOWN'을 반환하세요.
+        """
+        
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content
+
+# 사용 예시
+mapper = IntelligentMapper(graph, ["FIC-101.PV", "PT-101.PV", "P-101.STATUS"])
+final_tag = mapper.resolve_tag("node_tag_123")
+print(f"Resolved Tag: {final_tag}")
+```
+
+### 3.2 검증 유틸리티: 속성 일치 확인
+```python
+def validate_mapping(resolved_tag, symbol_type, tag_metadata):
+    """심볼 타입과 실제 태그 메타데이터의 일치 여부 검증"""
+    type_map = {
+        "Pressure Transmitter": ["pressure", "bar", "psi", "pa"],
+        "Flow Meter": ["flow", "m3/h", "lpm"],
+        "Temperature Sensor": ["temp", "celsius", "k"]
+    }
+    
+    expected_keywords = type_map.get(symbol_type, [])
+    actual_desc = tag_metadata.get('description', '').lower()
+    
+    # 메타데이터 설명에 기대 키워드가 포함되어 있는지 확인
+    is_valid = any(kw in actual_desc for kw in expected_keywords)
+    return is_valid
+```
+
+---
+
+## 🚀 4. Phase 3 완료 기준 (Definition of Done)
+
+- [ ] 모든 도면 노드에 대해 **1차 후보군(Candidates)**이 자동으로 생성되는가?
+- [ ] `NetworkX` 그래프를 통해 **인접 노드 맥락(Context)**이 정확히 추출되는가?
+- [ ] LLM이 맥락을 반영하여 **최종 태그를 결정**하고 그 근거를 제시하는가?
+- [ ] 매핑된 태그의 **메타데이터(Unit, Description)**와 도면 심볼 타입 간의 일치성이 검증되는가?
+- [ ] 최종 매핑 결과가 `(도면노드ID, 시스템태그, 신뢰도, 검증결과)` 형태로 저장되는가?
--- a/.rooBackup/2026-05-02_0152/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0152/futurePlan/End-to-End
@@ -0,0 +1,103 @@
+# 🎨 Graph Pipeline Phase 4: 활용 및 시각화 (Application & Visualization)
+
+이 문서는 P&ID Graph Pipeline의 최종 단계인 **활용 및 시각화**의 상세 구현 계획을 다룹니다. 앞선 단계에서 구축한 [기하학적 데이터 $\rightarrow$ 위상 그래프 $\rightarrow$ 시스템 태그 매핑] 결과물을 결합하여, 운영자가 도면 상에서 실시간 공정 상태를 파악하고 장애 영향도를 분석할 수 있는 인터페이스를 구현하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 기술 스택
+
+### 1.1 프론트엔드 (Visualization)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `SVG / Canvas API` | P&ID 도면 렌더링 및 데이터 오버레이 | 벡터 기반 정밀 렌더링 |
+| `Cytoscape.js` / `D3.js` | 위상 그래프 시각화 및 인터랙티브 탐색 | 그래프 분석 뷰어 |
+| `Vue.js` / `React` | 전체 UI 프레임워크 및 상태 관리 | `src/Web` 구조와 통합 |
+| `Axios` / `WebSocket` | 실시간 OPC UA 데이터 수신 및 API 통신 | 실시간 업데이트 |
+
+### 1.2 백엔드 (API & Analysis)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `ASP.NET Core` | Graph API 및 분석 엔드포인트 제공 | `ExperionCrawler` 메인 서버 |
+| `NetworkX` (Python) | 영향도 분석 및 경로 추적 알고리즘 실행 | 분석 엔진 (Phase 2 활용) |
+| `FastAPI` / `Flask` | Python 분석 엔진과 C# 서버 간의 브릿지 | 분석 마이크로서비스 |
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 실시간 데이터 오버레이 (Real-time Overlay)
+도면의 좌표 정보와 매핑된 시스템 태그를 연결하여 실시간 값을 표시합니다.
+1.  **매핑 데이터 로드:** `(도면노드ID, 시스템태그, 좌표)` 리스트를 프론트엔드로 전달.
+2.  **실시간 스트리밍:** `OPC UA` $\rightarrow$ `C# Server` $\rightarrow$ `WebSocket` $\rightarrow$ `Frontend`.
+3.  **동적 렌더링:** 태그 값이 변경되면 해당 좌표의 SVG 요소 색상을 변경하거나 툴팁에 현재 값을 표시.
+
+### 2.2 영향도 분석 엔진 (Impact Analysis Engine)
+특정 설비의 이상 발생 시 하류(Downstream) 영향을 계산합니다.
+1.  **분석 요청:** 사용자가 도면에서 특정 노드(예: 펌프 P-101)를 클릭.
+2.  **그래프 탐색:** Python 분석 엔진에서 `nx.descendants(G, 'P-101')` 실행.
+3.  **결과 반환:** 영향받는 모든 노드 ID 리스트와 경로(Path)를 반환.
+4.  **시각적 강조:** 도면 상에서 영향 경로를 하이라이트(예: 빨간색 선) 처리.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 [Backend] 영향도 분석 API (C# $\rightarrow$ Python Bridge)
+```csharp
+// src/Web/Controllers/PidGraphController.cs
+[HttpGet("impact/{nodeId}")]
+public async Task<IActionResult> GetImpactAnalysis(string nodeId)
+{
+    // Python 분석 마이크로서비스에 요청
+    var response = await _httpClient.GetAsync($"http://python-analysis-api/impact/{nodeId}");
+    var result = await response.Content.ReadFromJsonAsync<ImpactResult>();
+    
+    return Ok(result);
+}
+```
+
+### 3.2 [Frontend] SVG 데이터 오버레이 (JavaScript)
+```javascript
+// src/Web/wwwroot/js/pid-viewer.js
+async function updateRealtimeValues(tagData) {
+    // tagData: { "PT-101.PV": 12.5, "FT-101.PV": 150.2 }
+    for (const [tag, value] of Object.entries(tagData)) {
+        const element = document.getElementById(`tag-node-${tag}`);
+        if (element) {
+            // 값에 따라 색상 변경 (예: 임계치 초과 시 빨간색)
+            element.style.fill = value > threshold ? 'red' : 'green';
+            element.setAttribute('data-value', value);
+            
+            // 툴팁 업데이트
+            const tooltip = document.getElementById('pid-tooltip');
+            tooltip.innerText = `${tag}: ${value}`;
+        }
+    }
+}
+```
+
+### 3.3 [Analysis] 경로 추적 유틸리티 (Python)
+```python
+import networkx as nx
+
+def get_propagation_path(graph, start_node, end_node):
+    """장애 전파 경로를 최단 경로 기반으로 추출"""
+    try:
+        path = nx.shortest_path(graph, source=start_node, target=end_node)
+        return path
+    except nx.NetworkXNoPath:
+        return None
+
+# 예: P-101에서 V-105까지의 영향 경로 추출
+path = get_propagation_path(topology_graph, "P-101", "V-105")
+```
+
+---
+
+## 🚀 4. Phase 4 완료 기준 (Definition of Done)
+
+- [ ] P&ID 도면(SVG/Canvas) 위에 **실시간 OPC UA 값**이 정확한 좌표에 표시되는가?
+- [ ] 특정 노드 클릭 시 **하류 영향도 분석(Impact Analysis)** 결과가 시각적으로 하이라이트 되는가?
+- [ ] C# 메인 서버와 Python 분석 엔진 간의 **API 통신**이 지연 없이 이루어지는가?
+- [ ] 운영자가 도면을 통해 **이상 징후의 전파 경로**를 직관적으로 파악할 수 있는가?
+- [ ] 전체 파이프라인(`추출 $\rightarrow$ 모델링 $\rightarrow$ 매핑 $\rightarrow$ 시각화`)이 통합되어 동작하는가?
--- a/.rooBackup/2026-05-02_0448/Graph_Pipeline_Phase2.md
+++ b/.rooBackup/2026-05-02_0448/Graph_Pipeline_Phase2.md
@@ -0,0 +1,145 @@
+# 🕸️ Graph Pipeline Phase 2: 위상 모델링 (Topology Modeling)
+
+이 문서는 P&ID Graph Pipeline의 두 번째 단계인 **위상 모델링**의 상세 구현 계획을 다룹니다. 1단계에서 추출한 기하학적 객체(좌표, BBox)를 기반으로, 설비 간의 **연결성(Connectivity)**과 **흐름(Flow)**을 정의하는 지식 그래프(Knowledge Graph)를 구축하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `networkx` | 그래프 데이터 구조 생성 및 알고리즘 분석 | 핵심 라이브러리 |
+| `shapely` | 객체 간 거리 계산 및 포함 관계 분석 | 1단계와 연계 |
+| `scikit-learn` | (선택) KD-Tree를 이용한 고속 근접 이웃 검색 | 대규모 도면 최적화 |
+| `matplotlib` | 생성된 그래프의 위상 구조 시각화 검증 | 디버깅용 |
+
+### 1.2 설치 명령어
+```bash
+pip install networkx shapely scikit-learn matplotlib
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 그래프 정의 (Graph Definition)
+*   **노드 (Nodes):**
+    *   `Equipment`: 펌프, 탱크, 열교환기 등 (속성: ID, 타입, BBox)
+    *   `Instrument`: 전송기, 밸브, 게이지 등 (속성: ID, 타입, BBox)
+    *   `Tag`: 텍스트 기반 태그 (속성: TagName, Value)
+*   **엣지 (Edges):**
+    *   `Pipe`: 설비-설비, 설비-계기 간의 물리적 연결 (속성: LineNumber, 방향성)
+    *   `Association`: 태그-설비 간의 논리적 연결 (속성: 관계 타입 - 예: 'belongs_to')
+
+### 2.2 위상 추론 로직 (Topology Inference)
+1.  **태그-설비 결합 (Tag-to-Entity Binding):**
+    *   태그 텍스트의 BBox와 가장 가까운 심볼(Equipment/Instrument)을 찾아 `Association` 엣지를 생성합니다.
+2.  **배관 연결성 분석 (Line Connectivity):**
+    *   `LINE` 또는 `POLYLINE`의 끝점이 특정 설비의 BBox 내부에 있거나 임계 거리($\epsilon$) 이내에 있으면 두 노드를 `Pipe` 엣지로 연결합니다.
+3.  **흐름 방향성 부여 (Flow Direction):**
+    *   화살표 심볼의 방향 또는 공정 흐름 규칙을 분석하여 엣지에 `source` $\rightarrow$ `target` 방향을 설정합니다.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 그래프 구축 핵심 코드
+```python
+import networkx as nx
+from shapely.geometry import box, Point
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data, all_extracted_tags=None):
+        """
+        Phase 5 병렬 아키텍처 반영:
+        - geometric_data: Phase 1에서 추출된 기하학적 데이터
+        - all_extracted_tags: 여러 Worker(Phase 3)가 분산 추출한 태그 리스트의 통합본 (flatten_results 결과)
+        """
+        self.data = geometric_data
+        self.all_tags = all_extracted_tags if all_extracted_tags else []
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 모든 객체를 노드로 추가
+        for item in self.data:
+            self.G.add_node(item['id'],
+                           type=item['type'],
+                           bbox=box(*item['bbox'].values()),
+                           value=item.get('value'))
+
+        # 2. 분산 추출된 태그 통합 및 노드 추가 (Phase 5 반영)
+        for tag in self.all_tags:
+            # tag: { "id": "...", "tagName": "...", "bbox": {...}, "type": "TEXT" }
+            self.G.add_node(tag['id'],
+                           type='TEXT',
+                           bbox=box(*tag['bbox'].values()),
+                           value=tag.get('tagName'))
+
+        # 3. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] != 'TEXT']
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 3. 배관 기반 물리적 연결 (Pipe)
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'POLYLINE']]
+        for line in lines:
+            connected_nodes = self._find_connected_nodes(line, equipments)
+            if len(connected_nodes) >= 2:
+                # 라인을 통해 연결된 두 설비 간 엣지 생성
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < 50.0 else None # 임계값 50.0
+
+    def _find_connected_nodes(self, line_id, equipment_ids):
+        # 라인의 시작/끝점이 어떤 설비 BBox에 포함되는지 확인
+        # (실제 구현 시 line의 coordinates 활용)
+        return [eq for eq in equipment_ids if self.G.nodes[eq]['bbox'].intersects(self.G.nodes[line_id]['bbox'])]
+
+# 실행 (Phase 5 Orchestrator 관점)
+# 1. Phase 1 결과 로드
+# 2. Phase 3 Worker들의 결과를 flatten_results()로 통합
+all_tags = flatten_results([worker1_res, worker2_res, worker3_res, worker4_res, worker5_res])
+
+builder = PidTopologyBuilder(geometric_data, all_extracted_tags=all_tags)
+builder.build_graph()
+graph = builder.G
+```
+
+### 3.2 위상 분석 유틸리티: 영향도 분석 (Impact Analysis)
+```python
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
+
+# 예: P-101 펌프 고장 시 영향 분석
+affected = analyze_impact(graph, "node_P101")
+print(f"Impacted Equipment: {affected}")
+```
+
+---
+
+## 🚀 4. Phase 2 완료 기준 (Definition of Done)
+
+- [ ] 모든 설비와 계기가 그래프의 **노드(Node)**로 변환되었는가?
+- [ ] 분산 추출된 태그 리스트가 `flatten_results`를 통해 통합되어 그래프에 반영되었는가? (Phase 5 반영)
+- [ ] 태그와 설비 간의 **논리적 연결(Association)**이 정확하게 매핑되었는가?
+- [ ] 배관(Line)을 통해 설비 간의 **물리적 연결(Pipe Edge)**이 생성되었는가?
+- [ ] `nx.descendants` 등을 통해 특정 노드로부터의 **흐름 추적(Flow Tracing)**이 가능한가?
+- [ ] 생성된 그래프 구조가 JSON(GraphML 등) 형태로 저장되어 Phase 3로 전달 가능한가?
--- a/.rooBackup/2026-05-02_0517/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0517/futurePlan/End-to-End
@@ -0,0 +1,158 @@
+# 🧠 Graph Pipeline Phase 3: 지능형 매핑 및 검증 (Intelligent Mapping & Validation)
+
+이 문서는 P&ID Graph Pipeline의 세 번째 단계인 **지능형 매핑 및 검증**의 상세 구현 계획을 다룹니다. 2단계에서 구축한 위상 그래프(Topology Graph)를 활용하여, 도면 상의 가상 노드들을 실제 Experion 시스템의 **실시간 태그(Real-time Tags)**와 정밀하게 연결하고 그 타당성을 검증하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `openai` / `langchain` | LLM API 연동 및 프롬프트 체이닝 | 매핑 추론 및 검증 핵심 |
+| `fuzzywuzzy` / `rapidfuzz` | 태그 이름 간의 문자열 유사도 계산 | 1차 후보군 추출용 |
+| `networkx` | 그래프 기반 인접 노드(Context) 추출 | 2단계 그래프 활용 |
+| `pydantic` | 매핑 결과의 구조화 및 유효성 검사 | 데이터 정규화 |
+| `requests` | ExperionCrawler API (C#)와 통신 | 실제 태그 리스트 조회 |
+
+### 1.2 설치 명령어
+```bash
+pip install openai langchain rapidfuzz networkx pydantic requests
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 매핑 파이프라인 (Mapping Pipeline)
+단순 이름 매칭의 한계를 극복하기 위해 **[후보 추출 $\rightarrow$ 맥락 분석 $\rightarrow$ LLM 확정]**의 3단계 프로세스를 거칩니다.
+
+1.  **1차 후보 추출 (Candidate Generation):**
+    *   도면의 태그 텍스트와 Experion 시스템의 전체 태그 리스트를 `RapidFuzz`로 비교하여 유사도 상위 N개를 추출합니다.
+2.  **맥락 정보 수집 (Context Gathering):**
+    *   해당 노드의 그래프 상 인접 노드(1-hop, 2-hop) 정보를 수집합니다.
+    *   예: "현재 노드는 `PT-101`이며, 상류에 `P-101(Pump)`이 있고 하류에 `V-101(Valve)`이 있음."
+3.  **LLM 기반 최종 매핑 (LLM-based Resolution):**
+    *   후보 태그 리스트와 위상 맥락을 LLM에게 전달하여 가장 타당한 태그를 선택하게 합니다.
+
+### 2.2 상호 검증 로직 (Cross-Validation)
+매핑된 결과가 실제 공정 데이터와 일치하는지 검증합니다.
+*   **위상적 일관성:** 도면에서 `A $\rightarrow$ B` 순서라면, 실제 데이터에서도 `A`의 변화가 `B`에 영향을 주는지 상관관계 분석.
+*   **속성 일치성:** 도면의 심볼 타입(예: Pressure Transmitter)과 실제 태그의 속성(예: Engineering Unit = 'bar' 또는 'psi')이 일치하는지 확인.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 맥락 기반 매핑 엔진
+```python
+import networkx as nx
+import asyncio
+from rapidfuzz import process, fuzz
+from openai import AsyncOpenAI # 비동기 클라이언트로 변경
+
+client = AsyncOpenAI(api_key="your-api-key")
+
+class IntelligentMapper:
+    def __init__(self, graph, system_tags):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+
+    def get_node_context(self, node_id):
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            context.append(f"Connected to {attr.get('value', n)} (Type: {attr.get('type')})")
+        return ", ".join(context)
+
+    async def _resolve_generic(self, node_id, category_prompt):
+        """공통 매핑 로직 (비동기)"""
+        tag_text = self.graph.nodes[node_id].get('value', '')
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        context = self.get_node_context(node_id)
+        
+        prompt = f"""
+        {category_prompt}
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        위 맥락을 고려할 때 가장 적절한 시스템 태그 하나만 반환하세요.
+        이유가 불분명하면 'UNKNOWN'을 반환하세요.
+        """
+        
+        response = await client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return response.choices[0].message.content
+
+    # --- 전문화된 Worker 함수들 (Phase 5 병렬 처리 반영) ---
+    
+    async def extract_transmitters(self, node_ids):
+        """전송기(Transmitter) 전문 매핑 Worker"""
+        prompt = "당신은 계측기 전문 엔지니어입니다. 특히 Pressure/Flow/Level Transmitter 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_valves(self, node_ids):
+        """밸브(Valve) 전문 매핑 Worker"""
+        prompt = "당신은 밸브 및 액추에이터 전문 엔지니어입니다. 밸브의 개폐 상태 및 제어 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_equipment(self, node_ids):
+        """주요 설비(Pump, Tank 등) 전문 매핑 Worker"""
+        prompt = "당신은 공정 설비 전문 엔지니어입니다. 펌프, 탱크, 열교환기 등의 메인 설비 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+# 사용 예시 (Phase 5 Orchestrator 관점)
+async def main():
+    mapper = IntelligentMapper(graph, ["FIC-101.PV", "PT-101.PV", "P-101.STATUS"])
+    
+    # 분류별로 노드 그룹화 (예시)
+    transmitter_nodes = ["node_1", "node_2"]
+    valve_nodes = ["node_3", "node_4"]
+    equipment_nodes = ["node_5"]
+
+    # asyncio.gather를 통한 병렬 호출
+    results = await asyncio.gather(
+        mapper.extract_transmitters(transmitter_nodes),
+        mapper.extract_valves(valve_nodes),
+        mapper.extract_equipment(equipment_nodes)
+    )
+    
+    # 결과 통합 (flatten)
+    final_mapping = {**results[0], **results[1], **results[2]}
+    print(f"Parallel Resolved Mapping: {final_mapping}")
+
+asyncio.run(main())
+```
+
+### 3.2 검증 유틸리티: 속성 일치 확인
+```python
+def validate_mapping(resolved_tag, symbol_type, tag_metadata):
+    """심볼 타입과 실제 태그 메타데이터의 일치 여부 검증"""
+    type_map = {
+        "Pressure Transmitter": ["pressure", "bar", "psi", "pa"],
+        "Flow Meter": ["flow", "m3/h", "lpm"],
+        "Temperature Sensor": ["temp", "celsius", "k"]
+    }
+    
+    expected_keywords = type_map.get(symbol_type, [])
+    actual_desc = tag_metadata.get('description', '').lower()
+    
+    # 메타데이터 설명에 기대 키워드가 포함되어 있는지 확인
+    is_valid = any(kw in actual_desc for kw in expected_keywords)
+    return is_valid
+```
+
+---
+
+## 🚀 4. Phase 3 완료 기준 (Definition of Done)
+
+- [ ] 모든 도면 노드에 대해 **1차 후보군(Candidates)**이 자동으로 생성되는가?
+- [ ] `NetworkX` 그래프를 통해 **인접 노드 맥락(Context)**이 정확히 추출되는가?
+- [ ] LLM이 맥락을 반영하여 **최종 태그를 결정**하고 그 근거를 제시하는가?
+- [ ] 매핑된 태그의 **메타데이터(Unit, Description)**와 도면 심볼 타입 간의 일치성이 검증되는가?
+- [ ] 최종 매핑 결과가 `(도면노드ID, 시스템태그, 신뢰도, 검증결과)` 형태로 저장되는가?
--- a/.rooBackup/2026-05-02_0530/futurePlan/End-to-End
+++ b/.rooBackup/2026-05-02_0530/futurePlan/End-to-End
@@ -0,0 +1,145 @@
+# 🎨 Graph Pipeline Phase 4: 활용 및 시각화 (Application & Visualization)
+
+이 문서는 P&ID Graph Pipeline의 최종 단계인 **활용 및 시각화**의 상세 구현 계획을 다룹니다. 앞선 단계에서 구축한 [기하학적 데이터 $\rightarrow$ 위상 그래프 $\rightarrow$ 시스템 태그 매핑] 결과물을 결합하여, 운영자가 도면 상에서 실시간 공정 상태를 파악하고 장애 영향도를 분석할 수 있는 인터페이스를 구현하는 것이 목표입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 기술 스택
+
+### 1.1 프론트엔드 (Visualization)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `SVG / Canvas API` | P&ID 도면 렌더링 및 데이터 오버레이 | 벡터 기반 정밀 렌더링 |
+| `Cytoscape.js` / `D3.js` | 위상 그래프 시각화 및 인터랙티브 탐색 | 그래프 분석 뷰어 |
+| `Vue.js` / `React` | 전체 UI 프레임워크 및 상태 관리 | `src/Web` 구조와 통합 |
+| `Axios` / `WebSocket` | 실시간 OPC UA 데이터 수신 및 API 통신 | 실시간 업데이트 |
+
+### 1.2 백엔드 (API & Analysis)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `ASP.NET Core` | Graph API 및 분석 엔드포인트 제공 | `ExperionCrawler` 메인 서버 |
+| `NetworkX` (Python) | 영향도 분석 및 경로 추적 알고리즘 실행 | 분석 엔진 (Phase 2 활용) |
+| `FastAPI` / `Flask` | Python 분석 엔진과 C# 서버 간의 브릿지 | 분석 마이크로서비스 |
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 실시간 데이터 오버레이 (Real-time Overlay)
+도면의 좌표 정보와 매핑된 시스템 태그를 연결하여 실시간 값을 표시합니다.
+1.  **매핑 데이터 로드:** `(도면노드ID, 시스템태그, 좌표)` 리스트를 프론트엔드로 전달.
+2.  **실시간 스트리밍:** `OPC UA` $\rightarrow$ `C# Server` $\rightarrow$ `WebSocket` $\rightarrow$ `Frontend`.
+3.  **동적 렌더링:** 태그 값이 변경되면 해당 좌표의 SVG 요소 색상을 변경하거나 툴팁에 현재 값을 표시.
+
+### 2.2 영향도 분석 엔진 (Impact Analysis Engine)
+특정 설비의 이상 발생 시 하류(Downstream) 영향을 계산합니다.
+1.  **분석 요청:** 사용자가 도면에서 특정 노드(예: 펌프 P-101)를 클릭.
+2.  **그래프 탐색:** Python 분석 엔진에서 `nx.descendants(G, 'P-101')` 실행.
+3.  **결과 반환:** 영향받는 모든 노드 ID 리스트와 경로(Path)를 반환.
+4.  **시각적 강조:** 도면 상에서 영향 경로를 하이라이트(예: 빨간색 선) 처리.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 [Backend] 영향도 분석 API (C# $\rightarrow$ Python Bridge)
+```csharp
+// src/Web/Controllers/PidGraphController.cs
+
+// 1. 분석 상태 추적을 위한 DTO
+public record AnalysisStatus(string taskId, double progress, string status, string message);
+
+// 2. 실시간 진행 상태 조회 API (Phase 5 병렬 처리 반영)
+[HttpGet("status/{taskId}")]
+public async Task<IActionResult> GetAnalysisStatus(string taskId)
+{
+    // Orchestrator가 관리하는 작업 상태 저장소(Redis/MemoryCache)에서 조회
+    var status = await _statusService.GetStatusAsync(taskId);
+    if (status == null) return NotFound();
+    
+    return Ok(new {
+        taskId = status.TaskId,
+        progress = status.Progress, // 0.0 ~ 1.0
+        status = status.Status,     // "Processing", "Completed", "Failed"
+        message = status.Message
+    });
+}
+
+[HttpGet("impact/{nodeId}")]
+public async Task<IActionResult> GetImpactAnalysis(string nodeId)
+{
+    // Python 분석 마이크로서비스에 요청
+    var response = await _httpClient.GetAsync($"http://python-analysis-api/impact/{nodeId}");
+    var result = await response.Content.ReadFromJsonAsync<ImpactResult>();
+    
+    return Ok(result);
+}
+```
+
+### 3.2 [Frontend] SVG 데이터 오버레이 및 진행률 표시 (JavaScript)
+```javascript
+// src/Web/wwwroot/js/pid-viewer.js
+
+// 1. 실시간 값 업데이트
+async function updateRealtimeValues(tagData) {
+    for (const [tag, value] of Object.entries(tagData)) {
+        const element = document.getElementById(`tag-node-${tag}`);
+        if (element) {
+            element.style.fill = value > threshold ? 'red' : 'green';
+            element.setAttribute('data-value', value);
+            const tooltip = document.getElementById('pid-tooltip');
+            tooltip.innerText = `${tag}: ${value}`;
+        }
+    }
+}
+
+// 2. 분석 진행 상태 표시 (Phase 5 병렬 처리 반영)
+async function trackAnalysisProgress(taskId) {
+    const progressBar = document.getElementById('analysis-progress-bar');
+    const statusText = document.getElementById('analysis-status-text');
+
+    const pollStatus = async () => {
+        const response = await fetch(`/api/pid/status/${taskId}`);
+        const data = await response.json();
+
+        // 프로그레스 바 업데이트 (예: 20% -> 40% -> 100%)
+        progressBar.style.width = `${data.progress * 100}%`;
+        statusText.innerText = `분석 중... ${Math.round(data.progress * 100)}% (${data.message})`;
+
+        if (data.status !== 'Completed' && data.status !== 'Failed') {
+            setTimeout(pollStatus, 1000); // 1초 간격 폴링
+        } else {
+            statusText.innerText = data.status === 'Completed' ? '분석 완료!' : '분석 실패';
+        }
+    };
+
+    pollStatus();
+}
+```
+
+### 3.3 [Analysis] 경로 추적 유틸리티 (Python)
+```python
+import networkx as nx
+
+def get_propagation_path(graph, start_node, end_node):
+    """장애 전파 경로를 최단 경로 기반으로 추출"""
+    try:
+        path = nx.shortest_path(graph, source=start_node, target=end_node)
+        return path
+    except nx.NetworkXNoPath:
+        return None
+
+# 예: P-101에서 V-105까지의 영향 경로 추출
+path = get_propagation_path(topology_graph, "P-101", "V-105")
+```
+
+---
+
+## 🚀 4. Phase 4 완료 기준 (Definition of Done)
+
+- [ ] P&ID 도면(SVG/Canvas) 위에 **실시간 OPC UA 값**이 정확한 좌표에 표시되는가?
+- [ ] 병렬 처리 중인 분석 작업의 **진행 상태(Progress Bar)**가 UI에 실시간으로 반영되는가? (Phase 5 반영)
+- [ ] 특정 노드 클릭 시 **하류 영향도 분석(Impact Analysis)** 결과가 시각적으로 하이라이트 되는가?
+- [ ] C# 메인 서버와 Python 분석 엔진 간의 **API 통신**이 지연 없이 이루어지는가?
+- [ ] 운영자가 도면을 통해 **이상 징후의 전파 경로**를 직관적으로 파악할 수 있는가?
+- [ ] 전체 파이프라인(`추출 $\rightarrow$ 모델링 $\rightarrow$ 매핑 $\rightarrow$ 시각화`)이 통합되어 동작하는가?
--- a/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase1.md
+++ b/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase1.md
@@ -0,0 +1,219 @@
+# 🛠️ Graph Pipeline Phase 1: 기하학적 데이터 추출 (Geometric Extraction)
+
+이 문서는 P&ID Graph Pipeline의 첫 번째 단계인 **기하학적 데이터 추출**의 상세 구현 계획을 다룹니다. 목표는 단순한 텍스트 추출을 넘어, 도면 내 모든 객체의 **물리적 위치(좌표)**와 **기하학적 속성**을 보존하여 이후 위상 모델링(Topology Modeling)이 가능하도록 하는 것입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `ezdxf` | DXF 파일 파싱 및 엔티티 추출 | 핵심 라이브러리 |
+| `shapely` | 기하학적 연산 (Intersection, Distance, Bounding Box) | 좌표 기반 분석 필수 |
+| `numpy` | 대량의 좌표 데이터 계산 및 행렬 연산 | 성능 최적화 |
+| `pandas` | 추출된 객체 데이터의 구조화 및 CSV/JSON 저장 | 데이터 관리 |
+| `pydantic` | 추출 데이터의 스키마 정의 및 유효성 검증 | 데이터 무결성 보장 |
+| `pytesseract` / `pdf2image` | PDF 도면의 영역 기반 OCR 추출 | PDF 처리 시 필요 |
+
+### 1.2 설치 명령어
+```bash
+pip install ezdxf shapely numpy pandas pydantic pytesseract pdf2image
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 데이터 모델 (Schema)
+모든 추출 객체는 다음과 같은 공통 속성을 갖는 `GeometricEntity` 모델을 따릅니다.
+
+```python
+from pydantic import BaseModel
+from typing import List, Optional, Union, Tuple
+
+class BoundingBox(BaseModel):
+    min_x: float
+    min_y: float
+    max_x: float
+    max_y: float
+    center: Tuple[float, float]
+
+class GeometricEntity(BaseModel):
+    entity_id: str
+    entity_type: str  # TEXT, LINE, CIRCLE, POLYLINE, ARC
+    layer: str
+    bbox: BoundingBox
+    properties: dict  # 텍스트 값, 색상, 선 굵기 등
+    coordinates: List[Tuple[float, float]]  # 시작점, 끝점 또는 정점 리스트
+```
+
+### 2.2 처리 파이프라인 흐름
+1.  **DXF Load:** `ezdxf.readfile()`을 통해 도면 로드.
+2.  **Entity Iteration:** 모든 레이어의 엔티티를 순회하며 타입별 분류.
+3.  **Coordinate Extraction:** 
+    *   `TEXT`: 삽입점(Insertion Point) 및 텍스트 길이를 이용한 BBox 계산.
+    *   `LINE`: 시작점(Start)과 끝점(End) 추출.
+    *   `POLYLINE`: 모든 정점(Vertices) 리스트 추출.
+    *   `CIRCLE/ARC`: 중심점(Center)과 반지름(Radius) 추출.
+4.  **Spatial Normalization:** 도면 좌표계를 분석 시스템 좌표계로 정규화.
+5.  **Structured Export:** JSON 또는 DB(PostgreSQL/PostGIS)에 저장.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 DXF 기하학적 추출 핵심 코드
+```python
+import ezdxf
+import re
+import json
+from shapely.geometry import box, LineString, Point
+from typing import List, Optional, Tuple
+
+class PidGeometricExtractor:
+    def __init__(self, file_path: str):
+        self.doc = ezdxf.readfile(file_path)
+        self.msp = self.doc.modelspace()
+
+    def clean_text(self, text: str) -> str:
+        """DXF 특수 제어 문자 및 MTEXT 포맷팅을 최대한 제거하여 LLM 토큰 부하 감소"""
+        if not text:
+            return ""
+        
+        # 1. MTEXT 포맷팅 및 제어 문자 제거
+        # \P(줄바꿈), \W(너비), \L(밑줄), \A(정렬), \C(색상), \H(높이), \S(스택), \T(탭) 및 관련 인자 제거
+        text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text)
+        
+        # 2. 중괄호 { } 제거 (MTEXT에서 서식 지정 시 사용됨)
+        text = re.sub(r'[\{\}]', ' ', text)
+        
+        # 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered)
+        text = re.sub(r'%%[U|O|S|R]', ' ', text)
+        
+        # 4. 불필요한 특수 기호 및 반복되는 공백 정제
+        # - 연속된 공백을 하나로 통합
+        # - 텍스트 양 끝의 공백 제거
+        text = re.sub(r'\s+', ' ', text).strip()
+        
+        return text
+
+    def get_bbox(self, entity) -> Optional[box]:
+        """엔티티의 Bounding Box를 계산하여 shapely box 객체로 반환"""
+        try:
+            if entity.dxftype() == 'TEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.height
+                # 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6)
+                width = len(entity.dxf.text) * h * 0.6
+                return box(p.x, p.y, p.x + width, p.y + h)
+            
+            elif entity.dxftype() == 'MTEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5
+                # MTEXT는 보통 width 속성이 정의되어 있음
+                w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6
+                return box(p.x, p.y, p.x + w, p.y + h)
+
+            elif entity.dxftype() == 'LINE':
+                start = entity.dxf.start
+                end = entity.dxf.end
+                return box(min(start.x, end.x), min(start.y, end.y),
+                          max(start.x, end.x), max(start.y, end.y))
+            
+            elif entity.dxftype() == 'LWPOLYLINE':
+                points = entity.get_points()
+                xs = [p[0] for p in points]
+                ys = [p[1] for p in points]
+                return box(min(xs), min(ys), max(xs), max(ys))
+                
+        except Exception as e:
+            print(f"Error calculating bbox for {entity.dxftype()}: {e}")
+        return None
+
+    def extract_and_save(self, output_path: str):
+        """
+        추출된 기하학적 데이터를 파일로 저장하여 Phase 3 Worker들이
+        공유 메모리/파일 시스템을 통해 참조할 수 있도록 함 (Phase 5 병렬 아키텍처 반영)
+        """
+        results = []
+        for entity in self.msp:
+            bbox_obj = self.get_bbox(entity)
+            if bbox_obj:
+                # 텍스트 값 추출 및 정제
+                raw_text = ""
+                if entity.dxftype() == 'TEXT':
+                    raw_text = entity.dxf.text
+                elif entity.dxftype() == 'MTEXT':
+                    raw_text = entity.text
+                
+                results.append({
+                    "id": entity.dxf.handle,
+                    "type": entity.dxftype(),
+                    "layer": entity.dxf.layer,
+                    "bbox": {
+                        "min_x": bbox_obj.bounds[0],
+                        "min_y": bbox_obj.bounds[1],
+                        "max_x": bbox_obj.bounds[2],
+                        "max_y": bbox_obj.bounds[3]
+                    },
+                    "raw_value": raw_text,
+                    "clean_value": self.clean_text(raw_text) if raw_text else None
+                })
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=4)
+        
+        return output_path
+
+# 사용 예시 (Phase 5 Orchestrator 관점)
+extractor = PidGeometricExtractor("plant_drawing.dxf")
+# 데이터를 직접 반환받지 않고 공유 저장소(파일)에 적재
+geo_data_path = extractor.extract_and_save("shared_geo_data.json")
+```
+
+### 3.2 유틸리티 함수: 인접성 체크 (Proximity Utility)
+추후 2단계(위상 모델링)에서 사용할 핵심 유틸리티입니다.
+
+```python
+from shapely.geometry import Point
+
+def is_near(entity_a_bbox, entity_b_bbox, threshold=5.0):
+    """두 객체의 Bounding Box 간의 최단 거리가 임계값 이내인지 확인"""
+    return entity_a_bbox.distance(entity_b_bbox) <= threshold
+
+def is_inside(point, bbox):
+    """특정 점이 Bounding Box 내부에 있는지 확인"""
+    return bbox.contains(Point(point))
+```
+
+---
+
+## 🚀 4. Phase 1 완료 기준 (Definition of Done)
+
+- [ ] DXF 파일 내 모든 `TEXT`, `LINE`, `POLYLINE`의 좌표 데이터가 누락 없이 추출되는가?
+- [ ] 각 객체별로 정확한 `Bounding Box`가 계산되어 저장되는가?
+- [ ] 추출된 데이터가 `GeometricEntity` 스키마에 맞게 JSON 파일로 저장되어 Worker들이 공유 참조 가능한가? (Phase 5 반영)
+- [ ] (선택 사항) PDF 도면의 경우 OCR을 통해 텍스트의 좌표값이 추출되는가?
+
+---
+
+## 🧐 감독자 진단 결과 (2026-05-02)
+
+### 1. 프로그램 설계 점검
+- **강점**: `ezdxf`와 `shapely`를 조합하여 기하학적 데이터(BBox, 좌표)를 보존하려는 접근 방식이 매우 적절함. 특히 Phase 5의 병렬 아키텍처를 고려하여 데이터를 파일/공유 저장소에 적재하는 구조는 확장성 면에서 우수함.
+- **보완 필요 사항**:
+    - **MTEXT 처리**: 현재 예시 코드(`3.1`)는 `TEXT` 엔티티만 처리하고 있으나, 실제 DXF 파일 분석 결과 `MTEXT` 엔티티가 다수 존재함. `MTEXT`는 내부 포맷팅 코드(예: `\P`, `\W`)가 포함되어 있어 단순 텍스트 추출 시 정제가 필요함.
+    - **BBox 계산 정밀도**: `TEXT` 엔티티의 BBox를 `p.x + 10, p.y + 5`와 같이 상수로 처리하고 있음. 실제 도면의 폰트 크기(`height`)와 정렬 방식(`align`)을 반영한 동적 계산 로직이 반드시 추가되어야 함.
+
+### 2. 실제 도면(`No-10_Plant_PID.dxf`) 분석 기반 차이점
+- **엔티티 규모**: 총 28,819개의 엔티티가 존재하여 데이터 양이 상당함. 단순 리스트 저장보다는 인덱싱 전략이 필요할 수 있음.
+- **텍스트 복잡도**:
+    - `MTEXT` 내에 `\P` (줄바꿈), `\L` (밑줄) 등 제어 문자가 포함된 수정 사항(Revision) 텍스트가 많음. 이를 그대로 추출하면 위상 분석 시 노이즈가 될 가능성이 높음.
+    - `%%U` (Underline)와 같은 DXF 특수 제어 문자가 텍스트 값에 포함되어 있어, 이를 제거하는 전처리 과정이 필수적임.
+- **데이터 특성**: `IA-10922-25A-F1A-n`와 같은 복합 파이프라인 번호(Pipe Line Number) 형식이 확인됨. 이를 일반 태그(Tag Name)와 명확히 구분하여 추출하고 관리하는 로직이 Phase 2/3에서 중요하게 작용할 것으로 보임.
+
+### 3. 최종 권고 사항
+1. **MTEXT 지원 추가**: `PidGeometricExtractor`에 `MTEXT` 처리 로직을 추가하고, 제어 문자를 제거하는 `clean_text()` 유틸리티 함수를 구현할 것.
+2. **동적 BBox 구현**: `entity.dxf.height`를 활용하여 텍스트 크기에 맞는 정확한 Bounding Box를 계산하도록 수정할 것.
+3. **전처리 파이프라인 강화**: 추출 단계에서 `%%U` 등의 특수 문자를 제거하는 정제 단계를 추가하여 데이터 품질을 높일 것.
--- a/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase2.md
+++ b/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase2.md
@@ -0,0 +1,180 @@
+# 🕸️ Graph Pipeline Phase 2: 위상 모델링 (Topology Modeling)
+
+이 문서는 P&ID Graph Pipeline의 두 번째 단계인 **위상 모델링**의 상세 구현 계획을 다룹니다. 1단계에서 추출한 기하학적 객체(좌표, BBox)를 기반으로, 설비 간의 **연결성(Connectivity)**과 **흐름(Flow)**을 정의하는 지식 그래프(Knowledge Graph)를 구축하는 것이 목표입니다.
+
+---
+
+## 🚩 [Supervisor's Audit] 진단 결과 및 개선 권고
+
+**감독자 진단 일자:** 2026-05-02
+**진단 결과:** ⚠️ **부분적 보완 필요 (Partial Improvement Required)**
+
+### 🔍 주요 진단 내용
+1.  **연결성 추론의 단순성 (Critical):** 현재 `_find_connected_nodes`가 단순 BBox 교차(`intersects`)만 확인하고 있습니다. 실제 P&ID에서 배관(Line)은 설비의 외곽선에 닿거나 매우 근접한 형태로 나타나며, 단순 BBox 교차는 오탐(False Positive) 확률이 매우 높습니다.
+2.  **방향성 정의 부재 (Medium):** `DiGraph`를 사용하지만, 실제 엣지에 방향성을 부여하는 구체적인 로직(화살표 인식, 공정 흐름 규칙)이 예시 코드에 누락되어 있습니다.
+3.  **임계값 하드코딩 (Low):** `min_dist < 50.0`과 같은 임계값이 하드코딩되어 있어, 도면 스케일(Scale)이 변경될 경우 대응이 불가능합니다.
+4.  **데이터 무결성 검증 부족 (Medium):** 그래프 생성 후 고립된 노드(Isolated Nodes)나 비정상적인 루프에 대한 검증 단계가 없습니다.
+
+### 🛠️ 수정 및 반영 사항
+- **연결성 로직 고도화:** BBox 교차 방식에서 $\rightarrow$ **Line End-point 기반 근접 분석** 방식으로 변경.
+- **방향성 추론 단계 명시:** 화살표 심볼 및 공정 흐름 기반의 `source` $\rightarrow$ `target` 결정 로직 추가.
+- **설정의 외부화:** 임계값($\epsilon$)을 설정 파일이나 파라미터로 관리하도록 구조 변경.
+- **검증 단계 추가:** 그래프 구축 후 위상 무결성 검사(Topology Validation) 단계 도입.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `networkx` | 그래프 데이터 구조 생성 및 알고리즘 분석 | 핵심 라이브러리 |
+| `shapely` | 객체 간 거리 계산 및 포함 관계 분석 | 1단계와 연계 |
+| `scikit-learn` | (선택) KD-Tree를 이용한 고속 근접 이웃 검색 | 대규모 도면 최적화 |
+| `matplotlib` | 생성된 그래프의 위상 구조 시각화 검증 | 디버깅용 |
+
+### 1.2 설치 명령어
+```bash
+pip install networkx shapely scikit-learn matplotlib
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 그래프 정의 (Graph Definition)
+*   **노드 (Nodes):**
+    *   `Equipment`: 펌프, 탱크, 열교환기 등 (속성: ID, 타입, BBox, CenterPoint)
+    *   `Instrument`: 전송기, 밸브, 게이지 등 (속성: ID, 타입, BBox, CenterPoint)
+    *   `Tag`: 텍스트 기반 태그 (속성: TagName, Value, BBox)
+*   **엣지 (Edges):**
+    *   `Pipe`: 설비-설비, 설비-계기 간의 물리적 연결 (속성: LineNumber, 방향성, 연결타입)
+    *   `Association`: 태그-설비 간의 논리적 연결 (속성: 관계 타입 - 예: 'belongs_to')
+
+### 2.2 위상 추론 로직 (Topology Inference)
+1.  **태그-설비 결합 (Tag-to-Entity Binding):**
+    *   태그 텍스트의 BBox와 가장 가까운 심볼(Equipment/Instrument)을 찾아 `Association` 엣지를 생성합니다.
+2.  **배관 연결성 분석 (Line Connectivity) [개선]:**
+    *   `LINE` 또는 `POLYLINE`의 **시작점과 끝점(End-points)**을 추출합니다.
+    *   각 끝점이 특정 설비의 BBox 내부에 있거나, 설정된 임계 거리($\epsilon$) 이내에 있을 때만 `Pipe` 엣지로 연결합니다. (단순 BBox 교차 방식 지양)
+3.  **흐름 방향성 부여 (Flow Direction) [추가]:**
+    *   배관 상의 화살표 심볼 위치와 방향을 분석하여 `source` $\rightarrow$ `target`을 결정합니다.
+    *   화살표가 없는 경우, 공정 표준(예: 탱크 $\rightarrow$ 펌프 $\rightarrow$ 밸브)에 따른 기본 방향을 부여합니다.
+4.  **위상 무결성 검증 (Topology Validation) [추가]:**
+    *   연결되지 않은 고립 노드 탐색 및 리포팅.
+    *   비정상적인 사이클(Cycle) 또는 단절 구간 확인.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 그래프 구축 핵심 코드
+```python
+import networkx as nx
+from shapely.geometry import box, Point, LineString
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data, all_extracted_tags=None, config=None):
+        """
+        - geometric_data: Phase 1에서 추출된 기하학적 데이터
+        - all_extracted_tags: 통합된 태그 리스트
+        - config: {'dist_threshold': 50.0, 'tag_threshold': 100.0} 등 설정값
+        """
+        self.data = geometric_data
+        self.all_tags = all_extracted_tags if all_extracted_tags else []
+        self.config = config if config else {'dist_threshold': 50.0, 'tag_threshold': 100.0}
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 모든 객체를 노드로 추가
+        for item in self.data:
+            self.G.add_node(item['id'],
+                           type=item['type'],
+                           bbox=box(*item['bbox'].values()),
+                           value=item.get('value'))
+
+        # 2. 분산 추출된 태그 통합 및 노드 추가
+        for tag in self.all_tags:
+            self.G.add_node(tag['id'],
+                           type='TEXT',
+                           bbox=box(*tag['bbox'].values()),
+                           value=tag.get('tagName'))
+
+        # 3. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] != 'TEXT']
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 4. 배관 기반 물리적 연결 (Pipe) [개선됨]
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'POLYLINE']]
+        for line_id in lines:
+            line_geom = self.G.nodes[line_id]['bbox'] # 실제로는 LineString 객체여야 함
+            # 라인의 끝점 추출 (가정: line_geom이 LineString인 경우)
+            endpoints = [line_geom.coords[0], line_geom.coords[-1]] if hasattr(line_geom, 'coords') else []
+            
+            connected_nodes = []
+            for pt in endpoints:
+                p = Point(pt)
+                for eq_id in equipments:
+                    if self.G.nodes[eq_id]['bbox'].distance(p) < self.config['dist_threshold']:
+                        connected_nodes.append(eq_id)
+            
+            if len(connected_nodes) >= 2:
+                # 방향성 추론 로직 (단순화: 시작점 -> 끝점)
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < self.config['tag_threshold'] else None
+
+    def validate_topology(self):
+        """위상 무결성 검증"""
+        isolated = list(nx.isolates(self.G))
+        return {"isolated_nodes": isolated, "node_count": self.G.number_of_nodes(), "edge_count": self.G.number_of_edges()}
+
+# 실행 예시
+all_tags = flatten_results([worker1_res, worker2_res])
+config = {'dist_threshold': 30.0, 'tag_threshold': 80.0}
+builder = PidTopologyBuilder(geometric_data, all_extracted_tags=all_tags, config=config)
+builder.build_graph()
+validation_res = builder.validate_topology()
+print(f"Validation Result: {validation_res}")
+```
+
+### 3.2 위상 분석 유틸리티: 영향도 분석 (Impact Analysis)
+```python
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
+
+# 예: P-101 펌프 고장 시 영향 분석
+affected = analyze_impact(graph, "node_P101")
+print(f"Impacted Equipment: {affected}")
+```
+
+---
+
+## 🚀 4. Phase 2 완료 기준 (Definition of Done)
+
+- [ ] 모든 설비와 계기가 그래프의 **노드(Node)**로 변환되었는가?
+- [ ] 분산 추출된 태그 리스트가 `flatten_results`를 통해 통합되어 그래프에 반영되었는가?
+- [ ] 태그와 설비 간의 **논리적 연결(Association)**이 정확하게 매핑되었는가?
+- [ ] 배관(Line)의 **끝점 분석**을 통해 설비 간의 **물리적 연결(Pipe Edge)**이 생성되었는가? (BBox 교차 방식 배제)
+- [ ] 화살표 및 공정 규칙에 기반한 **방향성(Directionality)**이 엣지에 부여되었는가?
+- [ ] `validate_topology`를 통해 고립 노드 및 위상 오류가 검토되었는가?
+- [ ] `nx.descendants` 등을 통해 특정 노드로부터의 **흐름 추적(Flow Tracing)**이 가능한가?
+- [ ] 생성된 그래프 구조가 JSON(GraphML 등) 형태로 저장되어 Phase 3로 전달 가능한가?
--- a/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase3.md
+++ b/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase3.md
@@ -0,0 +1,211 @@
+# 🧠 Graph Pipeline Phase 3: 지능형 매핑 및 검증 (Intelligent Mapping & Validation)
+
+이 문서는 P&ID Graph Pipeline의 세 번째 단계인 **지능형 매핑 및 검증**의 상세 구현 계획을 다룹니다. 2단계에서 구축한 위상 그래프(Topology Graph)를 활용하여, 도면 상의 가상 노드들을 실제 Experion 시스템의 **실시간 태그(Real-time Tags)**와 정밀하게 연결하고 그 타당성을 검증하는 것이 목표입니다.
+
+---
+
+## 🚩 [Supervisor's Audit] 감독자 진단 결과 및 수정 사항
+
+본 프로그램 설계에 대해 감독자 관점에서 정밀 진단을 수행하였으며, 다음과 같은 취약점과 개선 사항을 발견하여 반영하였습니다.
+
+### 1. 진단 결과 (Audit Findings)
+
+| 항목 | 진단 내용 | 심각도 | 수정 방향 |
+|---|---|---|---|
+| **에러 처리** | LLM 응답이 JSON 형식이 아니거나 `UNKNOWN`일 때의 예외 처리 로직 부족 | HIGH | 구조화된 출력(JSON) 강제 및 Fallback 전략 추가 |
+| **성능/비용** | 모든 노드에 대해 개별 LLM 호출 시 API 비용 급증 및 속도 저하 | MED | 배치(Batch) 처리 및 1차 필터링 강화 |
+| **검증 정밀도** | 단순 키워드 매칭 기반 검증은 오탐(False Positive) 가능성이 높음 | MED | 데이터 타입 및 엔지니어링 유닛(EU)의 엄격한 비교 로직 추가 |
+| **데이터 정합성** | 매핑 결과의 이력 관리 및 사람이 수동으로 수정할 수 있는 피드백 루프 부재 | LOW | 매핑 결과 저장 스키마에 `confidence` 및 `manual_override` 필드 추가 |
+
+### 2. 수정 이유 (Rationale)
+- **안정성 확보:** LLM은 비결정론적 특성이 있으므로, 프로그램이 런타임에 중단되지 않도록 Pydantic을 이용한 엄격한 스키마 검증이 필수적입니다.
+- **효율성 최적화:** 수천 개의 태그를 개별 호출하는 것은 비효율적입니다. 유사도 기반으로 후보군을 좁히고, 유사 그룹을 묶어 배치 처리함으로써 비용을 절감합니다.
+- **신뢰도 향상:** 단순 텍스트 매칭을 넘어 실제 시스템의 메타데이터(Unit, Range 등)를 교차 검증해야 엔지니어링 관점에서 신뢰할 수 있는 결과가 됩니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `openai` / `langchain` | LLM API 연동 및 프롬프트 체이닝 | 매핑 추론 및 검증 핵심 |
+| `fuzzywuzzy` / `rapidfuzz` | 태그 이름 간의 문자열 유사도 계산 | 1차 후보군 추출용 |
+| `networkx` | 그래프 기반 인접 노드(Context) 추출 | 2단계 그래프 활용 |
+| `pydantic` | 매핑 결과의 구조화 및 유효성 검사 | **[강화]** 데이터 정규화 및 런타임 타입 체크 |
+| `requests` | ExperionCrawler API (C#)와 통신 | 실제 태그 리스트 조회 |
+
+### 1.2 설치 명령어
+```bash
+pip install openai langchain rapidfuzz networkx pydantic requests
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 매핑 파이프라인 (Mapping Pipeline)
+단순 이름 매칭의 한계를 극복하기 위해 **[후보 추출 $\rightarrow$ 맥락 분석 $\rightarrow$ LLM 확정 $\rightarrow$ 스키마 검증]**의 4단계 프로세스를 거칩니다.
+
+1.  **1차 후보 추출 (Candidate Generation):**
+    *   도면의 태그 텍스트와 Experion 시스템의 전체 태그 리스트를 `RapidFuzz`로 비교하여 유사도 상위 N개를 추출합니다.
+2.  **맥락 정보 수집 (Context Gathering):**
+    *   해당 노드의 그래프 상 인접 노드(1-hop, 2-hop) 정보를 수집합니다.
+    *   예: "현재 노드는 `PT-101`이며, 상류에 `P-101(Pump)`이 있고 하류에 `V-101(Valve)`이 있음."
+3.  **LLM 기반 최종 매핑 (LLM-based Resolution):**
+    *   후보 태그 리스트와 위상 맥락을 LLM에게 전달하여 가장 타당한 태그를 선택하게 합니다.
+    *   **[개선]** JSON Mode를 사용하여 `{"tag": "...", "reason": "...", "confidence": 0.9}` 형태로 응답을 강제합니다.
+4.  **구조적 검증 (Structural Validation):**
+    *   Pydantic 모델을 통해 LLM 응답의 형식을 검증하고, 실패 시 `UNKNOWN` 처리 및 로그를 남깁니다.
+
+### 2.2 상호 검증 로직 (Cross-Validation)
+매핑된 결과가 실제 공정 데이터와 일치하는지 검증합니다.
+*   **위상적 일관성:** 도면에서 `A $\rightarrow$ B` 순서라면, 실제 데이터에서도 `A`의 변화가 `B`에 영향을 주는지 상관관계 분석.
+*   **속성 일치성:** 도면의 심볼 타입(예: Pressure Transmitter)과 실제 태그의 속성(예: Engineering Unit = 'bar' 또는 'psi')이 일치하는지 확인. **[강화]** 단순 키워드가 아닌 Unit 매핑 테이블을 통한 엄격한 비교.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 맥락 기반 매핑 엔진
+```python
+import networkx as nx
+import asyncio
+import json
+from typing import List, Optional
+from pydantic import BaseModel, Field
+from rapidfuzz import process, fuzz
+from openai import AsyncOpenAI
+
+# --- [추가] 응답 구조화를 위한 Pydantic 모델 ---
+class MappingResult(BaseModel):
+    resolved_tag: str = Field(..., description="The final mapped system tag")
+    reason: str = Field(..., description="Reason for this mapping based on context")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score from 0 to 1")
+
+client = AsyncOpenAI(api_key="your-api-key")
+
+class IntelligentMapper:
+    def __init__(self, graph, system_tags):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+
+    def get_node_context(self, node_id):
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            context.append(f"Connected to {attr.get('value', n)} (Type: {attr.get('type')})")
+        return ", ".join(context)
+
+    async def _resolve_generic(self, node_id, category_prompt):
+        """공통 매핑 로직 (비동기 + 구조화 응답)"""
+        tag_text = self.graph.nodes[node_id].get('value', '')
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        context = self.get_node_context(node_id)
+        
+        prompt = f"""
+        {category_prompt}
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        반드시 다음 JSON 형식으로만 응답하세요:
+        {{
+            "resolved_tag": "태그명 또는 UNKNOWN",
+            "reason": "매핑 이유",
+            "confidence": 0.0~1.0
+        }}
+        """
+        
+        try:
+            response = await client.chat.completions.create(
+                model="gpt-4-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                response_format={ "type": "json_object" } # JSON 모드 강제
+            )
+            raw_content = response.choices[0].message.content
+            # Pydantic을 통한 유효성 검사
+            return MappingResult.model_validate_json(raw_content)
+        except Exception as e:
+            print(f"Error resolving node {node_id}: {e}")
+            return MappingResult(resolved_tag="UNKNOWN", reason=f"Error: {str(e)}", confidence=0.0)
+
+    # --- 전문화된 Worker 함수들 ---
+    async def extract_transmitters(self, node_ids):
+        prompt = "당신은 계측기 전문 엔지니어입니다. 특히 Pressure/Flow/Level Transmitter 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_valves(self, node_ids):
+        prompt = "당신은 밸브 및 액추에이터 전문 엔지니어입니다. 밸브의 개폐 상태 및 제어 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_equipment(self, node_ids):
+        prompt = "당신은 공정 설비 전문 엔지니어입니다. 펌프, 탱크, 열교환기 등의 메인 설비 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+# 사용 예시
+async def main():
+    # 가상 데이터
+    graph = nx.Graph()
+    graph.add_node("node_1", value="PT-101", type="Pressure Transmitter")
+    graph.add_node("node_2", value="P-101", type="Pump")
+    graph.add_edge("node_1", "node_2")
+    
+    mapper = IntelligentMapper(graph, ["PT-101.PV", "PT-102.PV", "P-101.STATUS"])
+    
+    results = await asyncio.gather(
+        mapper.extract_transmitters(["node_1"]),
+        mapper.extract_equipment(["node_2"])
+    )
+    
+    final_mapping = {**results[0], **results[1]}
+    print(f"Parallel Resolved Mapping: {final_mapping}")
+
+asyncio.run(main())
+```
+
+### 3.2 검증 유틸리티: 속성 일치 확인 (강화 버전)
+```python
+def validate_mapping(resolved_tag, symbol_type, tag_metadata):
+    """심볼 타입과 실제 태그 메타데이터의 엄격한 일치 여부 검증"""
+    # 단순 키워드가 아닌 허용 단위(Unit) 정의
+    unit_map = {
+        "Pressure Transmitter": ["bar", "psi", "kPa", "Pa"],
+        "Flow Meter": ["m3/h", "lpm", "kg/h"],
+        "Temperature Sensor": ["°C", "C", "K", "°F"]
+    }
+    
+    actual_unit = tag_metadata.get('unit', '').strip()
+    allowed_units = unit_map.get(symbol_type, [])
+    
+    # 1. 단위 일치 확인 (최우선)
+    if actual_unit and actual_unit in allowed_units:
+        return True, "Unit Match"
+    
+    # 2. 단위가 없는 경우 설명(Description) 기반 2차 검증
+    actual_desc = tag_metadata.get('description', '').lower()
+    expected_keywords = {
+        "Pressure Transmitter": ["pressure", "press"],
+        "Flow Meter": ["flow", "flowrate"],
+        "Temperature Sensor": ["temp", "temperature"]
+    }
+    
+    keywords = expected_keywords.get(symbol_type, [])
+    if any(kw in actual_desc for kw in keywords):
+        return True, "Description Match (Unit Missing)"
+        
+    return False, "Mismatch: Symbol type and Tag metadata do not align"
+```
+
+---
+
+## 🚀 4. Phase 3 완료 기준 (Definition of Done)
+
+- [ ] 모든 도면 노드에 대해 **1차 후보군(Candidates)**이 자동으로 생성되는가?
+- [ ] `NetworkX` 그래프를 통해 **인접 노드 맥락(Context)**이 정확히 추출되는가?
+- [ ] LLM이 **JSON 형식**으로 최종 태그를 결정하고, 그 근거와 신뢰도를 제시하는가?
+- [ ] **Pydantic**을 통해 LLM 응답의 구조적 유효성이 검증되는가?
+- [ ] 매핑된 태그의 **엔지니어링 유닛(Unit)**과 도면 심볼 타입 간의 일치성이 엄격히 검증되는가?
+- [ ] 최종 매핑 결과가 `(도면노드ID, 시스템태그, 신뢰도, 검증결과, 매핑근거)` 형태로 저장되는가?
--- a/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase4.md
+++ b/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase4.md
@@ -0,0 +1,197 @@
+# 🎨 Graph Pipeline Phase 4: 활용 및 시각화 (Application & Visualization)
+
+이 문서는 P&ID Graph Pipeline의 최종 단계인 **활용 및 시각화**의 상세 구현 계획을 다룹니다. 앞선 단계에서 구축한 [기하학적 데이터 $\rightarrow$ 위상 그래프 $\rightarrow$ 시스템 태그 매핑] 결과물을 결합하여, 운영자가 도면 상에서 실시간 공정 상태를 파악하고 장애 영향도를 분석할 수 있는 인터페이스를 구현하는 것이 목표입니다.
+
+---
+
+## 🔍 [Supervisor Diagnosis] 프로그램 진단 및 개선 권고
+
+**진단 일자:** 2026-05-02
+**진단자:** Roo (Software Engineer / Supervisor)
+
+### 1. 종합 진단 결과
+현재 계획은 기본적인 데이터 흐름(C# $\rightarrow$ Python $\rightarrow$ Frontend)을 잘 정의하고 있으나, **실제 산업 현장의 대규모 P&ID 도면 적용 시 발생할 수 있는 성능 및 안정성 문제**에 대한 고려가 부족합니다. 특히 실시간 데이터 오버레이의 부하 관리와 분석 결과의 신뢰성 검증 단계가 누락되어 있습니다.
+
+### 2. 주요 진단 항목 및 수정 이유
+
+| 항목 | 진단 결과 | 위험도 | 수정 이유 및 개선 방향 |
+|---|---|---|---|
+| **데이터 전송 효율** | WebSocket/API 폴링 방식의 단순 나열 | MED | 수천 개의 태그가 포함된 도면에서 개별 폴링/전송 시 네트워크 부하 급증 $\rightarrow$ **태그 그룹화 및 변경분 기반(Delta) 전송** 도입 필요 |
+| **프론트엔드 렌더링** | SVG/Canvas 단순 오버레이 | HIGH | 노드 수가 많아질 경우 DOM 요소 증가로 인한 브라우저 랙 발생 $\rightarrow$ **Canvas 기반 렌더링 최적화 및 Viewport 기반 가시 영역 렌더링** 전략 필요 |
+| **분석 엔진 신뢰성** | `nx.descendants` 단순 활용 | MED | 단순 위상 전파는 실제 공정의 '흐름 방향(Flow Direction)'과 '밸브 개폐 상태'를 무시함 $\rightarrow$ **엣지 속성(방향성, 상태)을 반영한 가중치 경로 분석**으로 고도화 |
+| **에러 핸들링** | Python 브릿지 통신 시 예외 처리 미흡 | LOW | 분석 엔진 다운 시 C# 서버의 블로킹 가능성 $\rightarrow$ **Circuit Breaker 패턴 및 타임아웃 설정** 명시 필요 |
+| **사용자 경험(UX)** | 단순 하이라이트 표시 | LOW | 영향도 결과가 많을 경우 도면이 빨간색으로 도배됨 $\rightarrow$ **단계별 영향도(1차, 2차...) 색상 구분 및 필터링** 기능 추가 |
+
+---
+
+## 📦 1. 필수 패키지 및 기술 스택
+
+### 1.1 프론트엔드 (Visualization)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `SVG / Canvas API` | P&ID 도면 렌더링 및 데이터 오버레이 | **Canvas API 우선 권장 (대규모 노드 성능 최적화)** |
+| `Cytoscape.js` / `D3.js` | 위상 그래프 시각화 및 인터랙티브 탐색 | 그래프 분석 뷰어 |
+| `Vue.js` / `React` | 전체 UI 프레임워크 및 상태 관리 | `src/Web` 구조와 통합 |
+| `Axios` / `WebSocket` | 실시간 OPC UA 데이터 수신 및 API 통신 | **SignalR (ASP.NET Core) 도입 권장 (실시간 양방향 통신 최적화)** |
+
+### 1.2 백엔드 (API & Analysis)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `ASP.NET Core` | Graph API 및 분석 엔드포인트 제공 | `ExperionCrawler` 메인 서버 |
+| `NetworkX` (Python) | 영향도 분석 및 경로 추적 알고리즘 실행 | 분석 엔진 (Phase 2 활용) |
+| `FastAPI` / `Flask` | Python 분석 엔진과 C# 서버 간의 브릿지 | 분석 마이크로서비스 |
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 실시간 데이터 오버레이 (Real-time Overlay)
+도면의 좌표 정보와 매핑된 시스템 태그를 연결하여 실시간 값을 표시합니다.
+1.  **매핑 데이터 로드:** `(도면노드ID, 시스템태그, 좌표)` 리스트를 프론트엔드로 전달.
+2.  **실시간 스트리밍:** `OPC UA` $\rightarrow$ `C# Server` $\rightarrow$ `SignalR Hub` $\rightarrow$ `Frontend`. (**개선: 변경된 값만 전송하는 Delta Update 방식 적용**)
+3.  **동적 렌더링:** 태그 값이 변경되면 해당 좌표의 Canvas 요소를 업데이트하거나 툴팁에 현재 값을 표시. (**개선: Viewport 내 요소만 업데이트하여 CPU 부하 감소**)
+
+### 2.2 영향도 분석 엔진 (Impact Analysis Engine)
+특정 설비의 이상 발생 시 하류(Downstream) 영향을 계산합니다.
+1.  **분석 요청:** 사용자가 도면에서 특정 노드(예: 펌프 P-101)를 클릭.
+2.  **그래프 탐색:** Python 분석 엔진에서 `nx.descendants(G, 'P-101')` 실행. (**개선: 엣지의 `flow_direction` 속성을 확인하여 실제 유체 흐름 방향으로만 전파 계산**)
+3.  **결과 반환:** 영향받는 모든 노드 ID 리스트, 경로(Path), 그리고 **영향 단계(Depth)**를 반환.
+4.  **시각적 강조:** 도면 상에서 영향 경로를 단계별 색상(예: 1차-진한 빨강, 2차-연한 빨강)으로 하이라이트 처리.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 [Backend] 영향도 분석 API (C# $\rightarrow$ Python Bridge)
+```csharp
+// src/Web/Controllers/PidGraphController.cs
+
+// 1. 분석 상태 추적을 위한 DTO
+public record AnalysisStatus(string taskId, double progress, string status, string message);
+
+// 2. 실시간 진행 상태 조회 API (Phase 5 병렬 처리 반영)
+[HttpGet("status/{taskId}")]
+public async Task<IActionResult> GetAnalysisStatus(string taskId)
+{
+    // Orchestrator가 관리하는 작업 상태 저장소(Redis/MemoryCache)에서 조회
+    var status = await _statusService.GetStatusAsync(taskId);
+    if (status == null) return NotFound();
+    
+    return Ok(new {
+        taskId = status.TaskId,
+        progress = status.Progress, // 0.0 ~ 1.0
+        status = status.Status,     // "Processing", "Completed", "Failed"
+        message = status.Message
+    });
+}
+
+[HttpGet("impact/{nodeId}")]
+public async Task<IActionResult> GetImpactAnalysis(string nodeId)
+{
+    try 
+    {
+        // Python 분석 마이크로서비스에 요청 (Timeout 및 Circuit Breaker 적용 권장)
+        var response = await _httpClient.GetAsync($"http://python-analysis-api/impact/{nodeId}");
+        response.EnsureSuccessStatusCode();
+        var result = await response.Content.ReadFromJsonAsync<ImpactResult>();
+        
+        return Ok(result);
+    }
+    catch (HttpRequestException ex)
+    {
+        // 분석 엔진 연결 실패 시 적절한 에러 메시지 반환
+        return StatusCode(503, new { error = "Analysis Engine is currently unavailable", details = ex.Message });
+    }
+}
+```
+
+### 3.2 [Frontend] Canvas 기반 데이터 오버레이 및 진행률 표시 (JavaScript)
+```javascript
+// src/Web/wwwroot/js/pid-viewer.js
+
+// 1. 실시간 값 업데이트 (Canvas 최적화 버전)
+async function updateRealtimeValues(tagData) {
+    // tagData: { "TAG_01": { value: 10.5, status: "OK" }, ... }
+    
+    const ctx = canvas.getContext('2d');
+    
+    for (const [tag, data] of Object.entries(tagData)) {
+        const node = nodeMap.get(tag); // 좌표 정보 맵
+        if (node && isInViewport(node)) {
+            // 뷰포트 내에 있을 때만 렌더링
+            ctx.fillStyle = data.value > threshold ? 'red' : 'green';
+            ctx.beginPath();
+            ctx.arc(node.x, node.y, 5, 0, Math.PI * 2);
+            ctx.fill();
+            
+            // 툴팁 데이터 업데이트
+            updateTooltipData(tag, data.value);
+        }
+    }
+}
+
+// 2. 분석 진행 상태 표시 (Phase 5 병렬 처리 반영)
+async function trackAnalysisProgress(taskId) {
+    const progressBar = document.getElementById('analysis-progress-bar');
+    const statusText = document.getElementById('analysis-status-text');
+
+    const pollStatus = async () => {
+        try {
+            const response = await fetch(`/api/pid/status/${taskId}`);
+            const data = await response.json();
+
+            // 프로그레스 바 업데이트
+            progressBar.style.width = `${data.progress * 100}%`;
+            statusText.innerText = `분석 중... ${Math.round(data.progress * 100)}% (${data.message})`;
+
+            if (data.status !== 'Completed' && data.status !== 'Failed') {
+                setTimeout(pollStatus, 1000); // 1초 간격 폴링
+            } else {
+                statusText.innerText = data.status === 'Completed' ? '분석 완료!' : '분석 실패';
+            }
+        } catch (e) {
+            statusText.innerText = '상태 조회 중 오류 발생';
+        }
+    };
+
+    pollStatus();
+}
+```
+
+### 3.3 [Analysis] 흐름 방향 반영 경로 추적 (Python)
+```python
+import networkx as nx
+
+def get_propagation_path_with_flow(graph, start_node):
+    """
+    단순 descendants가 아닌, 엣지의 방향성(flow_direction)과 
+    상태(valve_open)를 고려한 실제 영향 전파 경로 추출
+    """
+    # 1. 유효한 엣지만 필터링 (방향이 맞고 밸브가 열려있는 경로)
+    valid_edges = [
+        (u, v, d) for u, v, d in graph.edges(data=True) 
+        if d.get('flow_direction') == 'forward' and d.get('valve_status') == 'open'
+    ]
+    filtered_graph = nx.DiGraph()
+    filtered_graph.add_edges_from(valid_edges)
+    
+    # 2. 전파 단계별 노드 추출 (BFS)
+    propagation_levels = nx.single_source_shortest_path_length(filtered_graph, start_node)
+    
+    # { node_id: distance } 형태로 반환하여 프론트엔드에서 색상 구분 가능하게 함
+    return propagation_levels
+
+# 예: P-101에서 시작되는 실제 유체 흐름 기반 영향도 분석
+impact_map = get_propagation_path_with_flow(topology_graph, "P-101")
+```
+
+---
+
+## 🚀 4. Phase 4 완료 기준 (Definition of Done)
+
+- [ ] P&ID 도면(Canvas) 위에 **실시간 OPC UA 값**이 정확한 좌표에 표시되며, 뷰포트 최적화가 적용되었는가?
+- [ ] **SignalR 또는 Delta Update**를 통해 네트워크 부하를 최소화하며 실시간 데이터를 수신하는가?
+- [ ] 병렬 처리 중인 분석 작업의 **진행 상태(Progress Bar)**가 UI에 실시간으로 반영되는가?
+- [ ] 특정 노드 클릭 시 **유체 흐름 방향이 반영된 영향도 분석** 결과가 단계별 색상으로 하이라이트 되는가?
+- [ ] C# 서버와 Python 엔진 간 통신에 **타임아웃 및 예외 처리**가 적용되어 시스템 안정성이 확보되었는가?
+- [ ] 전체 파이프라인(`추출 $\rightarrow$ 모델링 $\rightarrow$ 매핑 $\rightarrow$ 시각화`)이 통합되어 동작하는가?
--- a/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase5.md
+++ b/.rooBackup/2026-05-02_pipeline_sync/Graph_Pipeline_Phase5.md
@@ -0,0 +1,138 @@
+# 🔌 Graph Pipeline Phase 5: MCP 서버 통합 및 고성능 병렬 아키텍처 (MCP Integration & Parallel Processing)
+
+이 문서는 앞서 설계한 1~4단계의 Graph Pipeline을 현재 프로젝트의 **Unified MCP Server (`mcp-server/server.py`)**에 통합하는 방안을 다룹니다. 특히, 대용량 도면 처리 시 발생하는 지연과 버퍼 문제를 해결하기 위해 `PID_Parser_Plan_Revision.md`의 **분산 처리 기법**과 vLLM의 **Continuous Batching** 특성을 극대화한 병렬 아키텍처를 적용합니다.
+
+---
+
+## 🏗️ 1. 통합 아키텍처 설계
+
+### 1.1 고성능 병렬 데이터 흐름 (Parallel End-to-End Flow)
+단일 순차 요청 방식에서 벗어나, **[전처리 $\rightarrow$ 병렬 분산 추출 $\rightarrow$ 통합 후처리]** 구조로 전환합니다.
+
+`Frontend (UI)` $\rightarrow$ `C# Server (API)` $\rightarrow$ `MCP Server (Orchestrator)` $\rightarrow$ `Parallel Worker Tools (vLLM Batching)` $\rightarrow$ `Result Aggregator` $\rightarrow$ `C# Server`
+
+1.  **요청:** 사용자가 UI에서 도면 분석 시작 버튼 클릭.
+2.  **전처리 (Orchestrator):** MCP 서버가 DXF를 로드하여 기하학적 데이터를 추출하고, 분석 대상(Transmitter, Valve, Pump 등)별로 데이터를 분할합니다.
+3.  **병렬 호출 (Continuous Batching):**
+    *   분할된 데이터를 기반으로 여러 개의 MCP 툴(또는 동일 툴의 다중 요청)을 **동시에(Asynchronously)** 호출합니다.
+    *   vLLM 서버는 이 다수의 요청을 **Continuous Batching**으로 묶어 처리함으로써, 개별 요청 시보다 전체 처리량(Throughput)을 획기적으로 높입니다.
+4.  **통합 및 저장 (Aggregator):** 각 분산 툴이 반환한 결과를 취합하여 최종 위상 그래프를 구축하고 DB에 저장합니다.
+
+### 1.2 MCP 서버 내 역할 분담 (분산 처리 모델)
+`PID_Parser_Plan_Revision.md`를 반영하여, 기능을 세분화하고 병렬 실행 가능하게 설계합니다.
+
+| 구분 | MCP Tool / Module | 역할 | 병렬 처리 전략 |
+|---|---|---|---|
+| **Orchestrator** | `orchestrate_pid_pipeline` | 전체 공정 제어, 데이터 분할 및 결과 취합 | Asyncio 기반 비동기 제어 |
+| **Worker 1** | `extract_transmitters` | FIT, FT, LT, PT, TE 추출 | vLLM Batching 요청 |
+| **Worker 2** | `extract_valves` | FCV, LCV, TCV, PCV, XV 추출 | vLLM Batching 요청 |
+| **Worker 3** | `extract_gauges` | PG, TG, LG 추출 | vLLM Batching 요청 |
+| **Worker 4** | `extract_equipment` | Column, Tank, Filter, Drum, Heat Exchanger 등 추출 | vLLM Batching 요청 |
+| **Worker 5** | `extract_pumps` | P-xxxx, VP-xxxx 추출 | vLLM Batching 요청 |
+| **Analyzer** | `analyze_pid_impact` | 구축된 그래프 기반 영향도 분석 | Graph Algorithm (CPU) |
+
+---
+
+## 💻 2. MCP 서버 통합 구현 가이드
+
+### 2.1 비동기 병렬 처리 설계 (Asyncio + vLLM Batching)
+`FastMCP` 환경에서 `asyncio.gather`를 사용하여 여러 추출 툴을 동시에 호출함으로써 vLLM의 Continuous Batching 효율을 극대화합니다.
+
+```python
+# mcp-server/server.py 통합 설계 (개념 코드)
+import asyncio
+from typing import List
+
+async def run_parallel_extraction(geo_data):
+    """
+    분류별 추출 툴을 병렬로 호출하여 vLLM Batching 유도
+    """
+    # 각 분류별 프롬프트와 데이터 준비
+    tasks = [
+        extract_transmitters_async(geo_data),
+        extract_valves_async(geo_data),
+        extract_gauges_async(geo_data),
+        extract_equipment_async(geo_data),
+        extract_pumps_async(geo_data)
+    ]
+    
+    # 동시에 요청을 던져 vLLM이 내부적으로 Batch 처리하게 함
+    results = await asyncio.gather(*tasks)
+    return results
+
+@mcp.tool()
+async def build_pid_graph_parallel(filepath: str) -> str:
+    """
+    분산 처리 기법을 적용한 P&ID 그래프 생성 툴
+    """
+    # 1. 전처리 (Phase 1)
+    extractor = PidGeometricExtractor(filepath)
+    geo_data = extractor.extract_all()
+    
+    # 2. 병렬 분산 추출 (vLLM Batching 활용)
+    # 각 Worker 툴들이 LLM에 요청을 보낼 때 vLLM이 이를 묶어서 처리함
+    extracted_parts = await run_parallel_extraction(geo_data)
+    
+    # 3. 결과 통합 및 위상 모델링 (Phase 2)
+    all_tags = flatten_results(extracted_parts)
+    builder = PidTopologyBuilder(geo_data, all_tags)
+    builder.build_graph()
+    
+    # 4. 저장
+    graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json")
+    nx.write_graphml(builder.G, f"storage/{graph_id}")
+    
+    return json.dumps({"success": True, "graph_id": graph_id, "nodes": builder.G.number_of_nodes()})
+```
+
+### 2.2 C# 서버와의 인터페이스 (`McpClient` 활용)
+C# 서버는 `src/Infrastructure/Mcp/McpClient.cs`를 통해 위 툴들을 호출합니다.
+
+### 2.2 C# 서버와의 인터페이스 (`McpClient` 활용)
+C# 서버는 `src/Infrastructure/Mcp/McpClient.cs`를 통해 위 툴들을 호출합니다.
+
+```csharp
+// src/Core/Application/Services/PidGraphService.cs (신규 서비스)
+public async Task<ImpactResult> GetImpactAnalysisAsync(string graphId, string nodeId)
+{
+    var request = new McpToolRequest {
+        ToolName = "analyze_pid_impact",
+        Arguments = new { graph_id = graphId, start_node_id = nodeId }
+    };
+    
+    var jsonResponse = await _mcpClient.CallToolAsync(request);
+    return JsonSerializer.Deserialize<ImpactResult>(jsonResponse);
+}
+```
+
+---
+
+## 🛠️ 3. 프로그램 구성 및 배포 전략
+
+### 3.1 디렉토리 구조 확장
+```text
+mcp-server/
+├── server.py              # MCP 메인 서버 (툴 정의)
+├── pipeline/              # Graph Pipeline 핵심 로직 (Phase 1~4)
+│   ├── __init__.py
+│   ├── extractor.py       # Phase 1: Geometric Extraction
+│   ├── topology.py        # Phase 2: Topology Modeling
+│   ├── mapper.py          # Phase 3: Intelligent Mapping
+│   └── analyzer.py        # Phase 4: Impact Analysis
+└── storage/               # 생성된 그래프 파일 (.graphml) 저장소
+```
+
+### 3.2 실행 프로세스
+1.  **MCP 서버 기동:** `python mcp-server/server.py --http` (포트 5001)
+2.  **C# 서버 기동:** `dotnet run` (포트 5000)
+3.  **통신:** C# 서버 $\xrightarrow{HTTP/JSON}$ MCP 서버 $\xrightarrow{Python\ Libs}$ 결과 반환.
+
+---
+
+## 🚀 4. 최종 완료 기준 (Definition of Done)
+
+- [ ] `mcp-server/server.py`에 `build_pid_graph`, `analyze_pid_impact` 등 핵심 툴이 정의되었는가?
+- [ ] Phase 1~4의 Python 로직이 `mcp-server/pipeline/` 모듈로 구조화되어 통합되었는가?
+- [ ] C# `McpClient`를 통해 MCP 서버의 그래프 분석 툴을 호출하고 결과를 수신할 수 있는가?
+- [ ] 도면 업로드 $\rightarrow$ 그래프 생성 $\rightarrow$ 태그 매핑 $\rightarrow$ 영향도 분석으로 이어지는 **End-to-End 파이프라인**이 완성되었는가?
+- [ ] 모든 과정이 `json_response=True` 및 `stateless_http=True` 설정 하에 안정적으로 동작하는가?
--- a/.rooBackup/20260502-084417/futurePlan/End-to-End
+++ b/.rooBackup/20260502-084417/futurePlan/End-to-End
@@ -0,0 +1,46 @@
+P&ID 도면 분석을 고도화하여 **Graph Pipeline**을 구축하는 것은 단순한 텍스트 추출을 넘어, 설비 간의 **연결성(Connectivity)**과 **위상(Topology)**을 이해하는 것을 의미합니다.
+
+제가 이 작업을 수행한다면, 다음과 같은 **4단계 전략**으로 접근하여 효율성을 극대화하겠습니다.
+
+---
+
+### 1. 데이터 추출 단계: "단순 텍스트 $\rightarrow$ 기하학적 객체"
+현재의 텍스트 기반 추출에서 벗어나, 객체의 **좌표(Coordinate)**와 **속성(Property)**을 보존하는 구조로 변경해야 합니다.
+
+*   **객체 중심 파싱:** DXF의 Entity(Line, Circle, Text, Polyline)를 개별 객체로 인식하고, 각 객체의 중심점과 경계 상자(Bounding Box)를 저장합니다.
+*   **심볼 라이브러리 구축:** 밸브, 펌프, 탱크 등 반복되는 심볼의 기하학적 패턴을 정의하여, 텍스트가 없어도 "이 모양은 밸브다"라고 인식하는 패턴 매칭 로직을 도입합니다.
+*   **OCR 고도화:** PDF의 경우, 단순 텍스트 추출이 아닌 영역 기반 OCR을 통해 텍스트의 물리적 위치를 정확히 파악하여 인접한 심볼과 연결합니다.
+
+### 2. 그래프 모델링 단계: "객체 $\rightarrow$ 노드 및 엣지"
+추출된 객체들을 기반으로 **Knowledge Graph**를 생성합니다.
+
+*   **노드(Node):** 설비(Equipment), 계기(Instrument), 태그(Tag)를 노드로 정의합니다.
+*   **엣지(Edge):** 배관(Line)을 엣지로 정의합니다.
+    *   **연결성 판단:** `Line`의 끝점이 `Equipment`의 경계 상자 내에 있거나 매우 근접해 있다면 두 노드를 연결된 것으로 간주합니다.
+    *   **방향성 부여:** 화살표 심볼이나 공정 흐름(Flow)을 분석하여 엣지에 방향성을 부여합니다.
+*   **계층 구조 생성:** `Unit $\rightarrow$ Equipment $\rightarrow$ Component $\rightarrow$ Tag` 순의 계층적 그래프 구조를 설계합니다.
+
+### 3. 지능형 매핑 및 검증 단계: "도면 $\rightarrow$ 실제 데이터"
+그래프 구조를 활용해 Experion 시스템의 실제 태그와 정밀하게 매핑합니다.
+
+*   **맥락 기반 매핑 (Contextual Mapping):** 단순히 이름이 비슷한 태그를 찾는 것이 아니라, "펌프 P-101 옆에 있는 PT-101은 P-101의 압력 전송기일 확률이 높다"는 그래프 상의 인접성을 활용합니다.
+*   **상호 검증 (Cross-Validation):** 
+    *   도면 상의 연결 관계(P-101 $\rightarrow$ V-101)와 실제 공정 데이터의 상관관계(P-101 가동 시 V-101 유량 변화)를 비교하여 매핑의 정확도를 검증합니다.
+*   **LLM 기반 추론:** 모호한 태그명이나 누락된 정보는 MCP 서버를 통해 LLM이 도면의 맥락과 R530 문서를 분석하여 최적의 매핑 후보를 추천하게 합니다.
+
+### 4. 활용 및 시각화 단계: "분석 $\rightarrow$ 인사이트"
+구축된 그래프를 통해 운영자에게 실질적인 가치를 제공합니다.
+
+*   **영향도 분석 (Impact Analysis):** 특정 밸브(V-101)가 고장 났을 때, 그래프 탐색(BFS/DFS)을 통해 하류(Downstream)에 영향을 받는 모든 설비와 태그를 즉시 식별합니다.
+*   **디지털 트윈 뷰:** P&ID 도면 위에 실시간 OPC UA 값을 오버레이하여, 도면을 보면서 현재 공정 상태를 한눈에 파악하는 인터페이스를 구현합니다.
+*   **이상 징후 전파 경로 추적:** 특정 태그에서 알람이 발생했을 때, 그래프를 역추적하여 근본 원인(Root Cause)이 될 가능성이 높은 상류 설비를 추천합니다.
+
+---
+
+### 🚀 효율적인 실행을 위한 로드맵 (Priority)
+
+1.  **Short-term (Quick Win):** DXF 파서 수정 $\rightarrow$ 객체 좌표 저장 $\rightarrow$ 단순 인접성 기반 태그-설비 매핑.
+2.  **Mid-term (Core):** 심볼 패턴 매칭 도입 $\rightarrow$ 배관(Line) 기반의 그래프 구조(NetworkX 등 활용) 구축.
+3.  **Long-term (Advanced):** LLM 기반의 도면-데이터 추론 엔진 통합 $\rightarrow$ 실시간 데이터 오버레이 UI 구현.
+
+이렇게 **[기하학적 추출 $\rightarrow$ 위상 모델링 $\rightarrow$ 맥락적 매핑 $\rightarrow$ 운영 인사이트]** 순으로 확장하는 것이 가장 리스크가 적고 효율적인 방법이라고 생각합니다.
--- a/.rooBackup/20260502-100801/futurePlan/End-to-End
+++ b/.rooBackup/20260502-100801/futurePlan/End-to-End
@@ -0,0 +1,122 @@
+# 🔌 Graph Pipeline Phase 5: MCP 서버 통합 및 시스템 아키텍처 (MCP Integration)
+
+이 문서는 앞서 설계한 1~4단계의 Graph Pipeline을 현재 프로젝트의 **Unified MCP Server (`mcp-server/server.py`)**에 통합하는 방안과 최종 프로그램 구조를 다룹니다. 이를 통해 C# 메인 서버와 LLM, 그리고 도면 분석 엔진이 하나의 생태계에서 유기적으로 동작하게 합니다.
+
+---
+
+## 🏗️ 1. 통합 아키텍처 설계
+
+### 1.1 전체 데이터 흐름 (End-to-End Flow)
+`Frontend (UI)` $\rightarrow$ `C# Server (API)` $\rightarrow$ `MCP Server (Python)` $\rightarrow$ `Graph Pipeline Engine` $\rightarrow$ `Experion DB/OPC UA`
+
+1.  **요청:** 사용자가 UI에서 "P-101 펌프의 영향도 분석" 요청.
+2.  **중계:** C# 서버가 `McpClient`를 통해 MCP 서버의 `analyze_pid_impact` 툴 호출.
+3.  **분석:** MCP 서버는 내부적으로 `NetworkX` 그래프를 로드하여 하류 노드를 계산.
+4.  **응답:** 분석 결과(노드 리스트)를 JSON으로 반환 $\rightarrow$ C# 서버 $\rightarrow$ UI 하이라이트.
+
+### 1.2 MCP 서버 내 역할 분담
+현재 `server.py`는 RAG, NL2SQL, 단순 태그 추출 기능을 가지고 있습니다. 여기에 **Graph Pipeline 전용 도구 세트**를 추가합니다.
+
+| 기존 기능 | 추가될 Graph Pipeline 기능 | 역할 |
+|---|---|---|
+| `parse_pid_dxf` | `build_pid_graph` | DXF $\rightarrow$ 기하 추출 $\rightarrow$ 위상 그래프 생성 및 저장 |
+| `match_pid_tags` | `resolve_graph_tags` | 그래프 맥락을 반영한 지능형 태그 매핑 |
+| (신규) | `analyze_pid_impact` | 특정 노드 기준 영향도 분석 (Downstream 탐색) |
+| (신규) | `get_graph_topology` | 시각화를 위한 노드-엣지 리스트 반환 |
+
+---
+
+## 💻 2. MCP 서버 통합 구현 가이드
+
+### 2.1 MCP Tool 캡슐화 설계
+`mcp-server/server.py`에 다음과 같은 형태로 툴을 추가합니다.
+
+```python
+# mcp-server/server.py 에 추가될 내용 (개념 코드)
+
+@mcp.tool()
+def build_pid_graph(filepath: str) -> str:
+    """
+    P&ID 도면을 분석하여 위상 그래프를 생성하고 저장합니다.
+    Phase 1(기하 추출) + Phase 2(위상 모델링) 통합 실행.
+    """
+    # 1. Phase 1: Geometric Extraction
+    extractor = PidGeometricExtractor(filepath)
+    geo_data = extractor.extract_all()
+    
+    # 2. Phase 2: Topology Modeling
+    builder = PidTopologyBuilder(geo_data)
+    builder.build_graph()
+    
+    # 3. 그래프 저장 (GraphML 또는 JSON)
+    graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json")
+    nx.write_graphml(builder.G, f"storage/{graph_id}")
+    
+    return json.dumps({"success": True, "graph_id": graph_id, "nodes": builder.G.number_of_nodes()})
+
+@mcp.tool()
+def analyze_pid_impact(graph_id: str, start_node_id: str) -> str:
+    """
+    특정 설비의 장애 시 영향을 받는 하류 설비 리스트를 반환합니다.
+    """
+    # 그래프 로드
+    G = nx.read_graphml(f"storage/{graph_id}")
+    
+    # 영향도 분석 (Phase 4 로직)
+    impacted = nx.descendants(G, start_node_id)
+    
+    return json.dumps({
+        "success": True, 
+        "start_node": start_node_id, 
+        "impacted_nodes": list(impacted)
+    })
+```
+
+### 2.2 C# 서버와의 인터페이스 (`McpClient` 활용)
+C# 서버는 `src/Infrastructure/Mcp/McpClient.cs`를 통해 위 툴들을 호출합니다.
+
+```csharp
+// src/Core/Application/Services/PidGraphService.cs (신규 서비스)
+public async Task<ImpactResult> GetImpactAnalysisAsync(string graphId, string nodeId)
+{
+    var request = new McpToolRequest {
+        ToolName = "analyze_pid_impact",
+        Arguments = new { graph_id = graphId, start_node_id = nodeId }
+    };
+    
+    var jsonResponse = await _mcpClient.CallToolAsync(request);
+    return JsonSerializer.Deserialize<ImpactResult>(jsonResponse);
+}
+```
+
+---
+
+## 🛠️ 3. 프로그램 구성 및 배포 전략
+
+### 3.1 디렉토리 구조 확장
+```text
+mcp-server/
+├── server.py              # MCP 메인 서버 (툴 정의)
+├── pipeline/              # Graph Pipeline 핵심 로직 (Phase 1~4)
+│   ├── __init__.py
+│   ├── extractor.py       # Phase 1: Geometric Extraction
+│   ├── topology.py        # Phase 2: Topology Modeling
+│   ├── mapper.py          # Phase 3: Intelligent Mapping
+│   └── analyzer.py        # Phase 4: Impact Analysis
+└── storage/               # 생성된 그래프 파일 (.graphml) 저장소
+```
+
+### 3.2 실행 프로세스
+1.  **MCP 서버 기동:** `python mcp-server/server.py --http` (포트 5001)
+2.  **C# 서버 기동:** `dotnet run` (포트 5000)
+3.  **통신:** C# 서버 $\xrightarrow{HTTP/JSON}$ MCP 서버 $\xrightarrow{Python\ Libs}$ 결과 반환.
+
+---
+
+## 🚀 4. 최종 완료 기준 (Definition of Done)
+
+- [ ] `mcp-server/server.py`에 `build_pid_graph`, `analyze_pid_impact` 등 핵심 툴이 정의되었는가?
+- [ ] Phase 1~4의 Python 로직이 `mcp-server/pipeline/` 모듈로 구조화되어 통합되었는가?
+- [ ] C# `McpClient`를 통해 MCP 서버의 그래프 분석 툴을 호출하고 결과를 수신할 수 있는가?
+- [ ] 도면 업로드 $\rightarrow$ 그래프 생성 $\rightarrow$ 태그 매핑 $\rightarrow$ 영향도 분석으로 이어지는 **End-to-End 파이프라인**이 완성되었는가?
+- [ ] 모든 과정이 `json_response=True` 및 `stateless_http=True` 설정 하에 안정적으로 동작하는가?
--- a/Project-Intro/readme.md
+++ b/Project-Intro/readme.md
@@ -0,0 +1,117 @@
+# ExperionCrawler 프로젝트 소개
+
+ExperionCrawler는 Honeywell Experion HS R530 시스템의 데이터를 효율적으로 수집, 저장 및 분석하기 위한 통합 데이터 플랫폼입니다. OPC UA 통신을 통해 실시간 및 히스토리 데이터를 수집하고, LLM 기반의 Text-to-SQL 및 RAG 시스템을 통해 사용자가 자연어로 산업 데이터를 조회할 수 있는 환경을 제공합니다.
+
+## 🛠 개발 환경
+
+- **하드웨어 구성**
+  - **HC900 Controller**: 제어 로직 수행 (CPU 만 있고, I/O 없슴)
+  - **Experion HS R530 서버**: 미니pc (Kmtec k6플러스) Windows 10 LTSC 2021 IoT Enterprise, R530 라이선스 Demo라서 300분 후 죽음
+  - **Nvidia DGX Spark**: 메인 서버 (Ubuntu 24.04), LLM
+  - **개발 PC**: Kmtech K8 Plus (Mini PC)
+
+- **기술 스택**
+  - **Backend**: C# / .NET 8.0 (ASP.NET Core)
+  - **Communication**: OPC UA (Client & Server)
+  - **Database**: PostgreSQL / TimescaleDB (시계열 데이터 최적화)
+  - **AI/LLM**: 
+    - **MCP Server**: Python 3 기반 (Model Context Protocol)
+    - **LLM**: Gemma4-32B-it (Vision 및 통합 지능 처리)
+  - **IDE**: VS Code + Roo Code + Local LLM (Gemma4, Qwen3 등)
+
+---
+
+## 🏗 System Architecture
+
+ExperionCrawler는 데이터 수집 계층, 저장 계층, 지능형 인터페이스 계층의 3단계 구조로 설계되었습니다.
+
+### 연결 환경 다이어그램
+
+```mermaid
+graph TD
+    subgraph "Field & Control Layer"
+        HC900[HC900 Controller] --> R530[Experion HS R530 Server]
+    end
+
+    subgraph "Data Collection Layer (ExperionCrawler)"
+        R530 -- "OPC UA (Client)" --> OPC_Client[ExperionOpcClient]
+        OPC_Client --> RT_Svc[Realtime Service]
+        OPC_Client --> Hist_Svc[History Service]
+        OPC_Client --> Fast_Svc[Fast Session Service]
+        
+        OPC_Server[ExperionOpcServer] -- "OPC UA (Server)" --> External_Client[External OPC UA Clients]
+    end
+
+    subgraph "Storage & Intelligence Layer"
+        RT_Svc --> DB[(TimescaleDB / PostgreSQL)]
+        Hist_Svc --> DB
+        Fast_Svc --> DB
+        
+        DB <--> MCP[MCP Server - Python]
+        MCP <--> LLM[Local LLM - Gemma4/Qwen3]
+        LLM <--> RAG[RAG System - Docs/Code]
+    end
+
+    subgraph "User Interface Layer"
+        WebUI[Web Dashboard] -- "REST API" --> WebAPI[ASP.NET Core API]
+        WebAPI --> RT_Svc
+        WebAPI --> Hist_Svc
+        WebAPI --> T2S[Text-to-SQL Service]
+        T2S <--> MCP
+    end
+```
+
+### 주요 구성 요소 설명
+
+1. **OPC UA Engine**: 
+   - `ExperionOpcClient`: R530 서버로부터 데이터를 읽어오는 클라이언트.
+   - `ExperionOpcServer`: 수집된 데이터를 가공한 결과를 외부 시스템에 다시 제공하는 서버 기능.(서버기능만 가공기능 LLM 중심으로 개발 예정)
+
+2. **Data Pipeline**:
+   - **Realtime**: 실시간 태그 구독 및 DB 저장.(현재 약 1800개 포인트 등록)
+   - **History**: 과거 데이터 스냅샷 및 범위 조회.저장 간격 1분에 한번 
+   - **Fast Session**: 고속 샘플링 데이터 수집 세션 관리. (현장에서 의심가는 포인트 분석을 위해 8개까지 등록해서 최소 1초마다 정해진 시간동안 DB에 저장, 동시3개 가능, 그래프 기능 탑재(초보수준))
+
+3. **Intelligence (RAG & MCP)**:
+   - **MCP (Model Context Protocol)**: LLM이 DB 쿼리 실행, 파일 읽기 등 도구를 사용할 수 있게 하는 인터페이스.
+   - **Text-to-SQL**: 사용자의 자연어 질문을 분석하여 최적의 SQL 쿼리로 변환하고 실행.
+   - **RAG**: Experion HS R530 공식 문서 및 소스코드를 인덱싱하여 정확한 기술 답변 제공.
+
+---
+
+## 📈 프로젝트 진행 현황
+
+### ✅ 완료된 사항
+- [x] **OPC UA 통신 기반 구축**: R530 서버 연결 및 노드 브라우징 구현
+- [x] **데이터 수집 파이프라인**: 실시간 구독, 히스토리 조회, Fast Session 기능 구현
+- [x] **데이터베이스 설계**: TimescaleDB 기반 시계열 데이터 저장 구조 최적화
+- [x] **Text-to-SQL 엔진**: 한국어 자연어 SQL 변환 및 실행 파이프라인 구축
+- [x] **MCP 서버 통합**: Python 기반 MCP 서버를 통한 LLM-DB 연결 환경 조성
+- [x] **인증서 관리**: OPC UA 보안 통신을 위한 인증서 생성 및 신뢰 관계 설정 자동화
+- [x] **RAG 기능추가로 현장 관련 지식 자료 계속 추가 가능 - LLM이 사용하여 정보 제공
+- [x] 
+
+### 🚀 향후 계획 (Roadmap)
+- [ ] **P&ID 도면 분석 자동화**: DXF/PDF 도면에서 태그 정보를 추출하고 DB와 매핑하는 파이프라인 구축-> 현재 구현되어 있긴 하지만 너무 안습
+- [ ] **지능형 태그 매핑**: P&ID 태그 Experion 시스템 태그 간의 AI 기반 자동 매핑
+- [ ] **고도화된 RAG 시스템**: 제품 문서 및 도면 정보를 결합한 하이브리드 RAG 구현
+- [ ] **UI/UX 개선**: 시계열 데이터 시각화(uPlot) 및 자연어 질의 인터페이스 고도화
+- [ ] **시스템 안정화**: 대량 데이터 수집 시의 성능 최적화 및 예외 처리 강화
+
+
+내부 ip address
+Internet router :    192.168.0.1
+개발pc               192.168.0.7
+DGX Spark :          192.168.0.132
+Experion 서버 :      192.168.0.50
+HC900 :              192.168.0.20
+
+외부 접속 방법
+WireGuard 이용 내부 ip 할당 받아서, 접속하거나, Tailgate 이용해서 접속 가능 , 와이어가드가 편함
+
+DGX Spark : ssh windpacer@192.168.0.132, pass :!6A1b8c9d!
+              내부IP로 Nvidia Sync프로그램 다운받아서 연결하면 편함
+              Tailgate로도 직접 액세스 가능함
+UI 접속 : http://192.168.0.132:5000
+
+
--- a/Project-Intro/readme2.md
+++ b/Project-Intro/readme2.md
@@ -0,0 +1,113 @@
+# ExperionCrawler 프로젝트 소개
+
+ExperionCrawler는 Honeywell Experion HS R530 시스템의 데이터를 효율적으로 수집, 저장 및 분석하기 위한 통합 데이터 플랫폼입니다. OPC UA 통신을 통해 실시간 및 히스토리 데이터를 수집하고, LLM 기반의 Text-to-SQL 및 RAG 시스템을 통해 사용자가 자연어로 산업 데이터를 조회할 수 있는 환경을 제공합니다.
+
+## 🛠 개발 환경
+
+- **하드웨어 구성**
+  - **HC900 Controller**: 제어 로직 수행 (CPU 중심)
+  - **Experion HS R530 서버**: Windows 10 LTSC 2021 IoT Enterprise, R530 라이선스 기반 데이터 소스
+  - **Nvidia DGX Spark**: 메인 서버 (Ubuntu 24.04), LLM 및 고성능 연산 처리
+  - **개발 PC**: Kmtech K8 Plus (Mini PC)
+
+- **기술 스택**
+  - **Backend**: C# / .NET 8.0 (ASP.NET Core)
+  - **Communication**: OPC UA (Client & Server)
+  - **Database**: PostgreSQL / TimescaleDB (시계열 데이터 최적화)
+  - **AI/LLM**: 
+    - **MCP Server**: Python 3 기반 (Model Context Protocol)
+    - **LLM**: Gemma4-32B-it (Vision 및 통합 지능 처리)
+  - **IDE**: VS Code + Roo Code + Local LLM (Gemma4, Qwen3 등)
+
+---
+
+## 🏗 System Architecture
+
+ExperionCrawler는 데이터 수집 계층, 저장 계층, 지능형 인터페이스 계층의 3단계 구조로 설계되었습니다.
+
+### 연결 환경 다이어그램
+
+```mermaid
+graph TD
+    subgraph "Field & Control Layer"
+        HC900[HC900 Controller] --> R530[Experion HS R530 Server]
+    end
+
+    subgraph "Data Collection Layer (ExperionCrawler)"
+        R530 -- "OPC UA (Client)" --> OPC_Client[ExperionOpcClient]
+        OPC_Client --> RT_Svc[Realtime Service]
+        OPC_Client --> Hist_Svc[History Service]
+        OPC_Client --> Fast_Svc[Fast Session Service]
+        
+        OPC_Server[ExperionOpcServer] -- "OPC UA (Server)" --> External_Client[External OPC UA Clients]
+    end
+
+    subgraph "Storage & Intelligence Layer"
+        RT_Svc --> DB[(TimescaleDB / PostgreSQL)]
+        Hist_Svc --> DB
+        Fast_Svc --> DB
+        
+        DB <--> MCP[MCP Server - Python]
+        MCP <--> LLM[Local LLM - Gemma4/Qwen3]
+        LLM <--> RAG[RAG System - Docs/Code]
+    end
+
+    subgraph "User Interface Layer"
+        WebUI[Web Dashboard] -- "REST API" --> WebAPI[ASP.NET Core API]
+        WebAPI --> RT_Svc
+        WebAPI --> Hist_Svc
+        WebAPI --> T2S[Text-to-SQL Service]
+        T2S <--> MCP
+    end
+```
+
+### 주요 구성 요소 설명
+
+1. **OPC UA Engine**: 
+   - `ExperionOpcClient`: R530 서버로부터 데이터를 읽어오는 클라이언트.
+   - `ExperionOpcServer`: 수집된 데이터를 가공한 결과를 외부 시스템에 다시 제공하는 서버 기능.(서버기능만 가공기능 미탑재)
+2. **Data Pipeline**:
+   - **Realtime**: 실시간 태그 구독 및 DB 저장.
+   - **History**: 과거 데이터 스냅샷 및 범위 조회.
+   - **Fast Session**: 고속 샘플링 데이터 수집 세션 관리.
+3. **Intelligence (RAG & MCP)**:
+   - **MCP (Model Context Protocol)**: LLM이 DB 쿼리 실행, 파일 읽기 등 도구를 사용할 수 있게 하는 인터페이스.
+   - **Text-to-SQL**: 사용자의 자연어 질문을 분석하여 최적의 SQL 쿼리로 변환하고 실행.
+   - **RAG**: Experion HS R530 공식 문서 및 소스코드를 인덱싱하여 정확한 기술 답변 제공.
+
+---
+
+## 📈 프로젝트 진행 현황
+
+### ✅ 완료된 사항
+- [x] **OPC UA 통신 기반 구축**: R530 서버 연결 및 노드 브라우징 구현
+- [x] **데이터 수집 파이프라인**: 실시간 구독, 히스토리 조회, Fast Session 기능 구현
+- [x] **데이터베이스 설계**: TimescaleDB 기반 시계열 데이터 저장 구조 최적화
+- [x] **Text-to-SQL 엔진**: 한국어 자연어 $\rightarrow$ SQL 변환 및 실행 파이프라인 구축
+- [x] **MCP 서버 통합**: Python 기반 MCP 서버를 통한 LLM-DB 연결 환경 조성
+- [x] **인증서 관리**: OPC UA 보안 통신을 위한 인증서 생성 및 신뢰 관계 설정 자동화
+
+### 🚀 향후 계획 (Roadmap)
+- [ ] **P&ID 도면 분석 자동화**: DXF/PDF 도면에서 태그 정보를 추출하고 DB와 매핑하는 파이프라인 구축
+- [ ] **지능형 태그 매핑**: P&ID 태그 $\leftrightarrow$ Experion 시스템 태그 간의 AI 기반 자동 매핑
+- [ ] **고도화된 RAG 시스템**: 제품 문서 및 도면 정보를 결합한 하이브리드 RAG 구현
+- [ ] **UI/UX 개선**: 시계열 데이터 시각화(uPlot) 및 자연어 질의 인터페이스 고도화
+- [ ] **시스템 안정화**: 대량 데이터 수집 시의 성능 최적화 및 예외 처리 강화
+
+
+내부 ip address
+Internet router :    192.168.0.1
+개발pc               192.168.0.7
+DGX Spark :          192.168.0.132
+Experion 서버 :      192.168.0.50
+HC900 :              192.168.0.20
+
+외부 접속 방법
+WireGuard 이용 내부 ip 할당 받아서, 접속하거나, Tailgate 이용해서 접속 가능 , 와이어가드가 편함
+
+DGX Spark : ssh windpacer@192.168.0.132, pass :!6A1b8c9d!
+              내부IP로 Nvidia Sync프로그램 다운받아서 연결하면 편함
+              Tailgate로도 직접 액세스 가능함
+UI 접속 : http://192.168.0.132:5000
+
+
--- a/pycache/extract_pid_tags_direct.cpython-312.pyc
+++ b/pycache/extract_pid_tags_direct.cpython-312.pyc
--- a/pycache/test_dxf_extract_pid2.cpython-312.pyc
+++ b/pycache/test_dxf_extract_pid2.cpython-312.pyc
--- a/pycache/test_dxf_extract_pid3.cpython-312.pyc
+++ b/pycache/test_dxf_extract_pid3.cpython-312.pyc
--- a/actual_parser_coding_byGemma426b.md
+++ b/actual_parser_coding_byGemma426b.md
@@ -0,0 +1,372 @@
+# Actual Implementation: P&ID Parser (Distributed Processing)
+
+This document contains the actual implementation of the P&ID Parser based on the design plan.
+
+## 1. Python Implementation
+
+### 1.1 `dxf_preprocessor.py`
+```python
+import ezdxf
+import json
+from datetime import datetime
+import os
+
+class DXFPreprocessor:
+    """
+    DXF 파일을 로드하여 핵심 엔티티를 추출하고 중간 JSON 포맷으로 저장합니다.
+    """
+    def __init__(self):
+        self.entities = []
+
+    def load_and_parse(self, file_path):
+        try:
+            if not os.path.exists(file_path):
+                print(f"Error: File not found {file_path}")
+                return False
+                
+            doc = ezdxf.readfile(file_path)
+            msp = doc.modelspace()
+            
+            for entity in msp:
+                # 추출 대상 엔티티 타입 정의
+                if entity.dxftype() in ['TEXT', 'MTEXT', 'LINE', 'CIRCLE', 'LWPOLYLINE']:
+                    data = {
+                        "type": entity.dxftype(),
+                        "layer": entity.dxf.layer,
+                        "content": "",
+                        "coordinates": {"x": 0.0, "y": 0.0, "z": 0.0},
+                        "attributes": {"color": entity.dxf.color, "lineweight": entity.dxf.lineweight}
+                    }
+                    
+                    # 텍스트 내용 추출
+                    if entity.dxftype() in ['TEXT', 'MTEXT']:
+                        data["content"] = entity.dxf.text if entity.dxftype() == 'TEXT' else entity.text
+                    
+                    # 좌표 정보 추출 (단순화)
+                    try:
+                        if entity.dxftype() == 'LINE':
+                            data["coordinates"] = {"x": entity.dxf.start.x, "y": entity.dxf.start.y, "z": entity.dxf.start.z}
+                        elif entity.dxftype() == 'CIRCLE':
+                            data["coordinates"] = {"x": entity.dxf.center.x, "y": entity.dxf.center.y, "z": entity.dxf.center.z}
+                        elif entity.dxftype() == 'LWPOLYLINE':
+                            data["coordinates"] = {"x": entity.dxf.vertices[0].x, "y": entity.dxf.vertices[0].y, "z": 0.0}
+                    except Exception:
+                        pass # 좌표 추출 실패 시 기본값 유지
+                            
+                    self.entities.append(data)
+            return True
+        except Exception as e:
+            print(f"Error parsing DXF: {e}")
+            return False
+
+    def generate_intermediate_json(self, output_path, filename):
+        data = {
+            "metadata": {
+                "filename": filename,
+                "timestamp": datetime.now().isoformat()
+            },
+            "entities": self.entities
+        }
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        print(f"Intermediate JSON saved to: {output_path}")
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python dxf_preprocessor.py <input_dxf_path>")
+    else:
+        input_path = sys.argv[1]
+        output_path = input_path.replace(".dxf", "_intermediate.json")
+        preprocessor = DXFPreprocessor()
+        if preprocessor.load_and_parse(input_path):
+            preprocessor.generate_intermediate_json(output_path, os.path.basename(input_path))
+```
+
+### 1.2 `extractors/base_extractor.py`
+```python
+import json
+import re
+import sys
+import os
+
+class BaseExtractor:
+    """
+    모든 특화된 추출기(Specialized Extractors)의 기본 클래스입니다.
+    """
+    def __init__(self, input_json_path):
+        self.input_json_path = input_json_path
+        self.data = None
+        self.results = []
+
+    def load_input_json(self):
+        try:
+            with open(self.input_json_path, 'r', encoding='utf-8') as f:
+                self.data = json.load(f)
+            return True
+        except Exception as e:
+            print(f"Error loading JSON: {e}")
+            return False
+
+    def apply_regex_pattern(self, pattern):
+        if not self.data:
+            return
+        
+        regex = re.compile(pattern)
+        for entity in self.data.get("entities", []):
+            content = entity.get("content", "")
+            if content:
+                match = regex.search(content)
+                if match:
+                    # 매칭된 정보를 결과 리스트에 추가
+                    self.results.append({
+                        "tag": match.group(0),
+                        "type": entity["type"],
+                        "layer": entity["layer"],
+                        "content": content,
+                        "coordinates": entity["coordinates"]
+                    })
+
+    def save_output_json(self, output_path):
+        output_data = {
+            "source_file": self.data["metadata"]["filename"],
+            "extracted_count": len(self.results),
+            "results": self.results
+        }
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(output_data, f, indent=2, ensure_ascii=False)
+        print(f"Extraction results saved to: {output_path}")
+
+if __name__ == "__main__":
+    # This block is replaced by specific extractor scripts
+    pass
+```
+
+### 1.3 `extractors/transmitter_extractor.py` (Example of Specialized Extractor)
+```python
+import sys
+from base_extractor import BaseExtractor
+
+class TransmitterExtractor(BaseExtractor):
+    def run(self):
+        # Pattern: (FIT|FT|LT|PT|TE) - 123
+        pattern = r"(FIT|FT|LT|PT|TE)\s?-\s?\d+"
+        self.apply_regex_pattern(pattern)
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python transmitter_extractor.py <input_json_path>")
+    else:
+        input_path = sys.argv[1]
+        output_path = input_path.replace(".json", "_transmitter.json")
+        extractor = TransmitterExtractor(input_path)
+        if extractor.load_input_json():
+            extractor.run()
+            extractor.save_output_json(output_path)
+```
+
+### 1.4 `extraction_orchestrator.py`
+```python
+import subprocess
+import json
+import os
+import glob
+
+class ExtractionOrchestrator:
+    """
+    서브 프로세스들을 병렬로 실행하고 결과를 통합합니다.
+    """
+    def __init__(self, extractor_scripts):
+        self.extractor_scripts = extractor_scripts
+        self.processes = []
+
+    def run_parallel_extractors(self, input_json_path):
+        print(f"Starting parallel extraction for: {input_json_path}")
+        for script in self.extractor_scripts:
+            # 각 스크립트를 별도 프로세스로 실행
+            p = subprocess.Popen(['python', script, input_json_path])
+            self.processes.append(p)
+        
+        for p in self.processes:
+            p.wait()
+        print("All extraction processes completed.")
+
+    def aggregate_results(self, input_json_path, output_master_path):
+        master_results = []
+        # 모든 _*.json 파일들을 찾아 병합
+        pattern = os.path.dirname(input_json_path) + "/*_*.json"
+        result_files = glob.glob(pattern)
+        
+        for file_path in result_files:
+            if os.path.basename(file_path).startswith("intermediate"):
+                continue # 원본 중간 파일은 제외
+                
+            try:
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    res_data = json.load(f)
+                    master_results.extend(res_data["results"])
+            except Exception as e:
+                print(
+                    f"Error aggregating {file_path}: {e}"
+                )
+
+        final_output = {
+            "source_file": os.path.basename(input_json_path),
+            "total_extracted": len(master_results),
+            "results": master_results
+        }
+        
+        with open(output_master_path, 'w', encoding='utf-8') as f:
+            json.dump(final_output, f, indent=2, ensure_ascii=False)
+        print(f"Master extraction result saved to: {output_master_path}")
+
+if __name__ == "__main__":
+    # Example usage
+    scripts = [
+        'extractors/transmitter_extractor.py',
+        # 'extractors/valve_extractor.py', ...
+    ]
+    orchestrator = ExtractionOrchestrator(scripts)
+    input_json = "path/to/intermediate.json"
+    orchestrator.run_parallel_extractors(input_json)
+    orchestrator.aggregate_results(input_json, "master_result.json")
+```
+
+### 1.5 `database_integrator.py`
+```python
+import json
+import requests
+
+class DatabaseIntegrator:
+    """
+    병합된 결과를 .NET Backend API로 전송합니다.
+    """
+    def __init__(self, api_url):
+        self.api_url = api_url
+
+    def send_to_backend(self, master_json_path):
+        try:
+            with open(master_json_append_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            
+            response = requests.post(self.api_url, json=data)
+            if response.status_code == 200:
+                print("Successfully sent data to backend.")
+            else:
+                print(f"Failed to send data. Status: {response.status_code}, Error: {response.text}")
+        except Exception as e:
+            print(f"Error during integration: {e}")
+
+if __name__ == "__main__":
+    API_ENDPOINT = "http://localhost:5000/api/pid/extraction"
+    integrator = DatabaseIntegrator(API_ENDPOINT)
+    integrator.send_to_backend("master_result.json")
+```
+
+## 2. C# Backend Implementation
+
+### 2.1 `PidExtractionController.cs`
+```csharp
+using Microsoft.AspNetCore.Mvc;
+using ExperionCrawler.Core.Application.DTOs;
+using ExperionCrawler.Core.Application.Services;
+
+namespace ExperionCrawler.Web.Controllers
+{
+    [ApiController]
+    [Route("api/[controller]")]
+    public class PidExtractionController : ControllerBase
+    {
+        private readonly IPidProcessingService _pidService;
+
+        public PidExtractionController(IPidProcessingService pidService)
+        {
+            _pidService = pidService;
+        }
+
+        [HttpPost("extraction")]
+        public async Task<IActionResult> PostExtractionResult([FromBody] ExtractionDto dto)
+        {
+            if (dto == null) return BadRequest("Invalid data.");
+
+            try
+            {
+                await _pidService.ProcessAndSave(dto);
+                return Ok(new { message = "Extraction data processed successfully." });
+            }
+            catch (Exception ex)
+            {
+                return StatusCode(500, $"Internal server error: {ex.Message}");
+            }
+        }
+    }
+}
+```
+
+### 2.2 `PidProcessingService.cs`
+```csharp
+using ExperionCrawler.Core.Application.DTOs;
+using ExperionCrawler.Core.Application.Interfaces;
+using ExperionCrawler.Core.Domain.Entities;
+
+namespace ExperionCrawler.Core.Application.Services
+{
+    public class PidProcessingService : IPidProcessingService
+    {
+        private readonly IPidRepository _repository;
+
+        public PidProcessing
+        {
+            _repository = repository;
+        }
+
+        public async Task ProcessAndSave(ExtractionDto dto)
+        {
+            // 1. Validate DTO
+            if (string.IsNullOrEmpty(dto.SourceFile)) throw new ArgumentException("Source file name is required.");
+
+            // 2. Map DTO to Domain Entity
+            foreach (var item in dto.Results)
+            {
+                var equipment = new PidEquipment
+                {
+                    TagName = item.Tag,
+                    Layer = item.Layer,
+                    Description = item.Content,
+                    SourceFile = dto.SourceFile,
+                    CreatedAt = DateTime.UtcNow
+                };
+
+                // 3. Save to Database
+                await _repository.SaveAsync(equipment);
+            }
+        }
+    }
+}
+```
+
+### 2.3 `PidRepository.cs`
+```csharp
+using ExperionCrawler.Core.Application.Interfaces;
+using ExperionCrawler.Core.Domain.Entities;
+using ExperionCrawler.Infrastructure.Database;
+
+namespace ExperionCrawler.Infrastructure.Repositories
+{
+    public class PidRepository : IPidRepository
+    {
+        private readonly ExperionDbContext _context;
+
+        public PidRepository(ExperionDbContext context)
+        {
+            _context = context;
+        }
+
+        public async Task SaveAsync(PidEquipment entity)
+        {
+            await _context.PidEquipments.AddAsync(entity);
+            await _context.SaveChangesAsync();
+        }
+    }
+}
+```
--- a/extract_pid_tags_direct.py
+++ b/extract_pid_tags_direct.py
@@ -0,0 +1,375 @@
+#!/usr/bin/env python3
+"""
+DXF 파일에서 P&ID 태그를 추출하는 스크립트
+- MCP 서버를 거치지 않고 LLM에 직접 요청
+- 전처리 과정에서 의미 없는 텍스트는 필터링
+- CSV 형식으로 LLM에 전달
+"""
+
+import sys
+import json
+import re
+import csv
+import io
+from dataclasses import dataclass
+from typing import List, Optional
+import requests
+
+
+@dataclass
+class TextEntity:
+    """DXF 텍스트 엔티티"""
+    entity_type: str
+    text: str
+    x: float
+    y: float
+    z: float
+    layer: str
+    height: float
+    style: str
+
+
+def parse_dxf_text_entities(file_path: str) -> List[TextEntity]:
+    """DXF 파일에서 TEXT, MTEXT, ATTRIB 엔티티를 파싱"""
+    entities = []
+    
+    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+        lines = f.readlines()
+    
+    i = 0
+    while i < len(lines):
+        line = lines[i].strip()
+        
+        if line in ('TEXT', 'MTEXT', 'ATTRIB'):
+            entity_type = line
+            entity = {
+                'entity_type': entity_type,
+                'text': '',
+                'x': 0.0,
+                'y': 0.0,
+                'z': 0.0,
+                'layer': '',
+                'height': 0.0,
+                'style': ''
+            }
+            
+            i += 1
+            while i < len(lines):
+                code = lines[i].strip()
+                if code == '0':
+                    break
+                
+                if i + 1 < len(lines):
+                    value = lines[i + 1].strip()
+                    
+                    if code == '1':
+                        if entity['text']:
+                            entity['text'] += ' ' + value
+                        else:
+                            entity['text'] = value
+                    elif code == '10':
+                        entity['x'] = float(value)
+                    elif code == '20':
+                        entity['y'] = float(value)
+                    elif code == '30':
+                        entity['z'] = float(value)
+                    elif code == '8':
+                        entity['layer'] = value
+                    elif code == '40':
+                        entity['height'] = float(value)
+                    elif code == '7':
+                        entity['style'] = value
+                    
+                    i += 1
+                
+                i += 1
+            
+            if entity['text']:
+                entities.append(TextEntity(
+                    entity_type=entity['entity_type'],
+                    text=entity['text'],
+                    x=entity['x'],
+                    y=entity['y'],
+                    z=entity['z'],
+                    layer=entity['layer'],
+                    height=entity['height'],
+                    style=entity['style']
+                ))
+        else:
+            i += 1
+    
+    return entities
+
+
+def filter_meaningful_text(entities: List[TextEntity]) -> List[TextEntity]:
+    """
+    의미 있는 텍스트만 필터링
+    """
+    meaningful = []
+    
+    remove_patterns = [
+        r'^\$[A-Z]+$',  # DXV 시스템 변수
+        r'^[0-9]+$',    # 숫자만 있는 텍스트
+        r'^[0-9.]+$',   # 숫자와 점만 있는 텍스트
+        r'^[a-zA-Z0-9_]{1}$',  # 1자 알파벳/숫자/언더스코어
+        r'^[ \t]+$',    # 공백만 있는 텍스트
+        r'^[a-zA-Z0-9]{1,2}$',  # 2자 이하의 알파벳/숫자 조합
+    ]
+    
+    for entity in entities:
+        text = entity.text.strip()
+        
+        if not text:
+            continue
+        
+        is_system_var = False
+        for pattern in remove_patterns:
+            if re.match(pattern, text):
+                is_system_var = True
+                break
+        
+        if is_system_var:
+            continue
+        
+        is_meaningful = False
+        
+        # 태그명 패턴 확인 (예: P-101, PIC-6211, T-10101)
+        if re.match(r'^[A-Z]+[-_][A-Z0-9]+$', text):
+            is_meaningful = True
+        # 3자 이상이고 알파벳/숫자/한글이 포함된 경우
+        elif len(text) >= 3 and (re.search(r'[A-Z]', text) or re.search(r'[0-9]', text)):
+            is_meaningful = True
+        # 한글 포함
+        elif re.search(r'[가-힣]', text):
+            is_meaningful = True
+        
+        if is_meaningful:
+            meaningful.append(TextEntity(
+                entity_type=entity.entity_type,
+                text=text,
+                x=entity.x,
+                y=entity.y,
+                z=entity.z,
+                layer=entity.layer,
+                height=entity.height,
+                style=entity.style
+            ))
+    
+    return meaningful
+
+
+def filter_tag_candidates_strict(entities: List[TextEntity]) -> List[TextEntity]:
+    """
+    P&ID 태그 후보만 필터링 (엄격한 기준 - 실제 태그 패턴에만 매칭)
+    """
+    tag_candidates = []
+    
+    for entity in entities:
+        text = entity.text.strip()
+        
+        if not text:
+            continue
+        
+        # 태그 패턴: P-101, PIC-6211, T-10101, FT-201 등
+        # 첫 글자는 대문자(1-4자), 뒤에 하이픈 또는 언더스코어, 그리고 알파벳/숫자가 옴
+        if re.match(r'^[A-Z]{1,4}[-_][A-Z0-9]+$', text):
+            tag_candidates.append(entity)
+    
+    return tag_candidates
+
+
+def export_to_csv(entities: List[TextEntity]) -> str:
+    """CSV 형식으로 변환 (LLM 파싱 용이)"""
+    lines = []
+    # 헤더 추가
+    lines.append("entity_type,text,x,y,z,layer,height,style")
+    for entity in entities:
+        # CSV 이스케이프: 쉼표, 따옴표, 줄바꿈이 포함된 경우 따옴표로 감싸기
+        text = entity.text.replace('"', '""')
+        if ',' in text or '"' in text or '\n' in text:
+            text = f'"{text}"'
+        lines.append(f"{entity.entity_type},{text},{entity.x},{entity.y},{entity.z},{entity.layer},{entity.height},{entity.style}")
+    return "\n".join(lines)
+
+
+def export_to_simple_text(entities: List[TextEntity]) -> str:
+    """간단한 텍스트 형식으로 변환 (LLM 파싱 용이)"""
+    lines = []
+    for entity in entities:
+        lines.append(f"TEXT: {entity.text}")
+    return "\n".join(lines)
+
+
+def extract_pid_tags_with_llm_simple(text_data: str, model_url: str = "http://localhost:8000/v1/chat/completions") -> dict:
+    """
+    LLM을 사용하여 P&ID 태그 추출 (간단한 텍스트 형식)
+    MCP 서버를 거치지 않고 vLLM 직접 요청
+    """
+    prompt = f"""당신은 P&ID(Piping and Instrumentation Diagram) 도면에서 태그 정보를 추출하는 전문가입니다.
+
+주어진 텍스트는 DXF 파일에서 추출한 P&ID 태그 후보입니다. 이 데이터에서 실제 P&ID 태그를 추출해주세요.
+
+**태그 형식 (예시):**
+- P-101: Pump (펌프)
+- PIC-6211: Pressure Indicating Controller (압력 측정 및 제어)
+- T-10101: Tank (탱크)
+- FT-201: Flow Transmitter (유량 측정)
+- PT-101: Pressure Transmitter (압력 측정)
+- LIC-6201: Level Indicating Controller (유량 측정 및 제어)
+- FIC-6113: Flow Indicating Controller (유량 측정 및 제어)
+- DP-10101: Differential Pressure (차압)
+- VP-10117: Valve Positioner (밸브 포지셔너)
+- SP-10601: Switch Pressure (압력 스위치)
+
+**태그 패턴:**
+- 첫 글자는 장비/계기 유형을 나타냅니다 (P, T, F, L, P, V, S, C, E, D 등)
+- 뒤에 숫자가 붙어 고유 식별자를 만듭니다
+- 계기 유형은 PIC, FIC, LIC, TIC 등으로 확장될 수 있습니다
+
+**추출할 필드:**
+- tagNo: 태그 번호 (예: P-101, PIC-6211)
+- equipmentName: 장비 이름 (예: Pump, Tank, Pressure Transmitter)
+- instrumentType: 계기 유형 (P, T, FT, PT, PIC, LIC, FIC, LV, MV 등)
+- lineNumber: 파이프 라인 번호 (있는 경우)
+- pidDrawingNo: 도면 번호 (있는 경우)
+- confidence: 추출 신뢰도 (0.0 ~ 1.0)
+
+**텍스트 데이터:**
+{text_data}
+
+**요청:**
+1. 텍스트 데이터에서 실제 P&ID 태그만 추출하세요 (의미 없는 텍스트는 제외)
+2. JSON 배열 형식으로 응답하세요
+3. 각 태그는 위의 필드를 포함해야 합니다
+4. 알 수 없는 정보는 null로 설정하세요
+5. 신뢰도 점수를 부여하세요
+
+**응답 형식 (JSON만, 추가 설명 없이):**
+[
+  {{"tagNo": "P-101", "equipmentName": "Pump", "instrumentType": "P", "lineNumber": null, "pidDrawingNo": null, "confidence": 0.95}},
+  {{"tagNo": "PIC-6211", "equipmentName": "Pressure Indicating Controller", "instrumentType": "PIC", "lineNumber": null, "pidDrawingNo": null, "confidence": 0.90}}
+]
+"""
+
+    payload = {
+        "model": "Qwen/Qwen3-Coder-Next-FP8",
+        "messages": [
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        "temperature": 0.1,
+        "max_tokens": 8192,
+        "stream": False
+    }
+
+    try:
+        response = requests.post(model_url, json=payload, timeout=300)
+        response.raise_for_status()
+        
+        result = response.json()
+        content = result.get('choices', [{}])[0].get('message', {}).get('content', '')
+        
+        # JSON 파싱
+        try:
+            # 코드 블록으로 감싸진 JSON 제거
+            json_match = re.search(r'\[.*\]', content, re.DOTALL)
+            if json_match:
+                json_str = json_match.group()
+                return json.loads(json_str)
+            else:
+                return json.loads(content)
+        except json.JSONDecodeError as e:
+            # 원본 응답을 파일로 저장
+            error_output_path = dxf_path.replace('.dxf', '_error_response.txt')
+            with open(error_output_path, 'w', encoding='utf-8') as f:
+                f.write(content)
+            return {"error": f"JSON 파싱 실패: {str(e)}", "raw_response": content, "error_output_path": error_output_path}
+            
+    except requests.exceptions.RequestException as e:
+        return {"error": f"LLM 요청 실패: {str(e)}"}
+
+
+def main():
+    if len(sys.argv) < 2:
+        print("사용법: python extract_pid_tags.py <dxf_file_path> [model_url]")
+        sys.exit(1)
+    
+    dxf_path = sys.argv[1]
+    model_url = sys.argv[2] if len(sys.argv) > 2 else "http://localhost:8000/v1/chat/completions"
+    
+    print(f"DXF 파일 파싱 중: {dxf_path}")
+    entities = parse_dxf_text_entities(dxf_path)
+    print(f"총 {len(entities)}개 텍스트 엔티티 found")
+    
+    print("의미 있는 텍스트 필터링 중...")
+    meaningful = filter_meaningful_text(entities)
+    print(f"의미 있는 텍스트: {len(meaningful)}개")
+    
+    # P&ID 태그 후보만 필터링 (엄격한 기준)
+    tag_candidates = filter_tag_candidates_strict(meaningful)
+    print(f"P&ID 태그 후보 (엄격한 기준): {len(tag_candidates)}개")
+    
+    # 상위 200개만 전달 (토큰 제한 대응)
+    top_meaningful = tag_candidates[:200]
+    print(f"LLM에 전달할 텍스트 수: {len(top_meaningful)}개")
+    
+    # 간단한 텍스트 형식으로 변환
+    simple_text = export_to_simple_text(top_meaningful)
+    
+    print("\n" + "="*80)
+    print("LLM에 전달할 텍스트 데이터 (첫 50줄):")
+    print("="*80)
+    lines = simple_text.split('\n')
+    for line in lines[:50]:
+        print(line)
+    if len(lines) > 50:
+        print(f"... (총 {len(lines)}줄)")
+    
+    print("\n" + "="*80)
+    print("LLM에 P&ID 태그 추출 요청 중...")
+    print("="*80)
+    
+    result = extract_pid_tags_with_llm_simple(simple_text, model_url)
+    
+    if 'error' in result:
+        print(f"오류: {result['error']}")
+        if 'raw_response' in result:
+            print(f"원본 응답: {result['raw_response'][:500]}")
+        if 'error_output_path' in result:
+            print(f"오류 응답 저장 경로: {result['error_output_path']}")
+    else:
+        print(f"\n성공적으로 추출된 태그: {len(result)}개")
+        print("\n추출 결과:")
+        for i, tag in enumerate(result[:20], 1):
+            print(f"{i}. {tag.get('tagNo', 'N/A')} - {tag.get('equipmentName', 'N/A')} ({tag.get('instrumentType', 'N/A')}) - confidence: {tag.get('confidence', 0)}")
+        
+        if len(result) > 20:
+            print(f"... (총 {len(result)}개)")
+        
+        # 결과를 JSON 파일로 저장
+        json_output_path = dxf_path.replace('.dxf', '_extracted.json')
+        with open(json_output_path, 'w', encoding='utf-8') as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+        print(f"\nJSON 결과가 저장되었습니다: {json_output_path}")
+        
+        # 결과를 CSV 파일로 저장
+        csv_output_path = dxf_path.replace('.dxf', '_extracted.csv')
+        with open(csv_output_path, 'w', encoding='utf-8', newline='') as f:
+            writer = csv.writer(f)
+            writer.writerow(['tagNo', 'equipmentName', 'instrumentType', 'lineNumber', 'pidDrawingNo', 'confidence'])
+            for tag in result:
+                writer.writerow([
+                    tag.get('tagNo', ''),
+                    tag.get('equipmentName', ''),
+                    tag.get('instrumentType', ''),
+                    tag.get('lineNumber', ''),
+                    tag.get('pidDrawingNo', ''),
+                    tag.get('confidence', 0)
+                ])
+        print(f"CSV 결과가 저장되었습니다: {csv_output_path}")
+
+
+if __name__ == '__main__':
+    main()
--- a/Pipeline/Concept-P&ID
+++ b/Pipeline/Concept-P&ID
@@ -0,0 +1,205 @@
+✔ 🎯 End-to-End P&ID Graph Pipeline (실전 구조)
+
+                ┌──────────────────────┐
+                │   P&ID PDF Input     │
+                └─────────┬────────────┘
+                          ↓
+        ┌─────────────────────────────────┐
+        │  1. Document Parsing Layer      │
+        │  (layout + text + tables)      │
+        └─────────┬──────────────────────┘
+                  ↓
+        ┌─────────────────────────────────┐
+        │  2. Spatial Element Extraction  │
+        │  (symbols + coordinates)        │
+        └─────────┬──────────────────────┘
+                  ↓
+        ┌─────────────────────────────────┐
+        │  3. Entity Extraction (LLM)     │
+        │  FIC-101, Pump-01, Valve...    │
+        └─────────┬──────────────────────┘
+                  ↓
+        ┌─────────────────────────────────┐
+        │  4. Relationship Inference      │
+        │  (rules + LLM hybrid)          │
+        └─────────┬──────────────────────┘
+                  ↓
+        ┌─────────────────────────────────┐
+        │  5. Graph Builder               │
+        │  nodes + edges                 │
+        └─────────┬──────────────────────┘
+                  ↓
+        ┌─────────────────────────────────┐
+        │  6. DB Integration Layer        │
+        │  (existing OPC + SQL system)    │
+        └─────────────────────────────────┘
+
+
+
+        ✔ 1️⃣ Document Parsing Layer (PDF → 구조화)
+기술
+
+Unstructured
+
+역할
+텍스트 추출
+표 추출
+블록 segmentation
+page coordinate 유지
+출력 예시
+
+{
+  "page": 12,
+  "elements": [
+    {
+      "text": "FIC-101",
+      "bbox": [120, 300, 160, 320]
+    }
+  ]
+}
+
+👉 핵심: 좌표 반드시 유지
+
+✔ 2️⃣ Spatial Element Extraction (핵심 단계)
+
+여기서 P&ID가 살아난다.
+
+해야 할 것
+symbol detection
+line detection
+proximity mapping
+결과
+JSON
+{
+  "FIC-101": { "x": 120, "y": 300 },
+  "FT-101": { "x": 110, "y": 220 },
+  "Valve-203": { "x": 300, "y": 310 }
+}
+
+
+
+✔ 3️⃣ Entity Extraction (LLM)
+모델
+
+Qwen (via vLLM)
+
+입력
+Extract all P&ID entities:
+- controller
+- sensor
+- valve
+- pump
+출력
+[
+  {"name":"FIC-101","type":"controller"},
+  {"name":"FT-101","type":"sensor"},
+  {"name":"Valve-203","type":"valve"}
+]
+
+
+
+✔ 4️⃣ Relationship Inference (가장 중요)
+
+여기서 2개를 섞는다:
+
+A. Rule Engine (필수)
+if sensor near controller:
+    relation = "signal"
+
+if controller connected to valve:
+    relation = "control"
+
+if pump → tank:
+    relation = "flow"
+
+    
+B. LLM 보조 판단
+Determine relationship based on P&ID context:
+Entities + coordinates
+최종 출력
+[
+  {
+    "source": "FT-101",
+    "target": "FIC-101",
+    "relation": "signal",
+    "confidence": 0.93
+  },
+  {
+    "source": "FIC-101",
+    "target": "Valve-203",
+    "relation": "control",
+    "confidence": 0.91
+  }
+]
+✔ 5️⃣ Graph Builder
+nodes = set()
+edges = []
+
+for r in relations:
+    nodes.add(r["source"])
+    nodes.add(r["target"])
+    edges.append(r)
+Graph 구조
+{
+  "nodes": [...],
+  "edges": [...]
+}
+✔ 6️⃣ DB Integration (너 기존 시스템)
+
+여기서 너 강점:
+
+OPC UA 있음 ✔
+NL2SQL 있음 ✔
+DB 있음 ✔
+저장 전략
+P&ID Graph DB
+   ↓
+Mapping Layer
+   ↓
+OPC Tags / SQL semantic layer
+
+
+✔ 🔥 핵심 설계 포인트 (진짜 중요)
+❗ 1. LLM은 “결정자”가 아니다
+
+👉 “해석 보조” 역할
+
+❗ 2. 좌표가 핵심이다
+
+P&ID는 텍스트가 아니라:
+
+공간 + 연결 구조
+
+❗ 3. Rule Engine 없으면 실패한다
+
+LLM 단독 = hallucination
+
+
+✔ 📌 최종 아키텍처 (완성형)
+P&ID PDF
+   ↓
+Unstructured Parser
+   ↓
+Spatial Engine
+   ↓
+Qwen (Entity Extraction)
+   ↓
+Rule Engine (relationship)
+   ↓
+Graph Builder
+   ↓
+DB + OPC Integration
+✔ 🚀 이 구조의 의미
+
+이건 단순 기능이 아니라:
+
+👉 “Engineering Knowledge Graph 생성 엔진”
+
+✔ 다음 단계 (진짜 실전)
+
+원하면 여기서 바로 이어서:
+
+1️⃣ P&ID 실제 PDF 기준 코드
+2️⃣ Rule engine 상세 설계 (ISA 기반)
+3️⃣ Qwen prompt 최적화 (산업용)
+4️⃣ Graph DB vs SQL 구조 선택
--- a/Pipeline/Graph_Pipeline_Phase1.md
+++ b/Pipeline/Graph_Pipeline_Phase1.md
@@ -0,0 +1,220 @@
+# 🛠️ Graph Pipeline Phase 1: 기하학적 데이터 추출 (Geometric Extraction)
+
+이 문서는 P&ID Graph Pipeline의 첫 번째 단계인 **기하학적 데이터 추출**의 상세 구현 계획을 다룹니다. 목표는 단순한 텍스트 추출을 넘어, 도면 내 모든 객체의 **물리적 위치(좌표)**와 **기하학적 속성**을 보존하여 이후 위상 모델링(Topology Modeling)이 가능하도록 하는 것입니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `ezdxf` | DXF 파일 파싱 및 엔티티 추출 | 핵심 라이브러리 |
+| `shapely` | 기하학적 연산 (Intersection, Distance, Bounding Box) | 좌표 기반 분석 필수 |
+| `numpy` | 대량의 좌표 데이터 계산 및 행렬 연산 | 성능 최적화 |
+| `pandas` | 추출된 객체 데이터의 구조화 및 CSV/JSON 저장 | 데이터 관리 |
+| `pydantic` | 추출 데이터의 스키마 정의 및 유효성 검증 | 데이터 무결성 보장 |
+| `pytesseract` / `pdf2image` | PDF 도면의 영역 기반 OCR 추출 | PDF 처리 시 필요 |
+
+### 1.2 설치 명령어
+```bash
+pip install ezdxf shapely numpy pandas pydantic pytesseract pdf2image
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 데이터 모델 (Schema)
+모든 추출 객체는 다음과 같은 공통 속성을 갖는 `GeometricEntity` 모델을 따릅니다.
+
+```python
+from pydantic import BaseModel
+from typing import List, Optional, Union, Tuple
+
+class BoundingBox(BaseModel):
+    min_x: float
+    min_y: float
+    max_x: float
+    max_y: float
+    center: Tuple[float, float]
+
+class GeometricEntity(BaseModel):
+    entity_id: str
+    entity_type: str  # TEXT, LINE, CIRCLE, POLYLINE, ARC
+    layer: str
+    bbox: BoundingBox
+    properties: dict  # 텍스트 값, 색상, 선 굵기 등
+    coordinates: List[Tuple[float, float]]  # 시작점, 끝점 또는 정점 리스트
+```
+
+### 2.2 처리 파이프라인 흐름
+1.  **DXF Load:** `ezdxf.readfile()`을 통해 도면 로드.
+2.  **Entity Iteration:** 모든 레이어의 엔티티를 순회하며 타입별 분류.
+3.  **Coordinate Extraction:** 
+    *   `TEXT`: 삽입점(Insertion Point) 및 텍스트 길이를 이용한 BBox 계산.
+    *   `LINE`: 시작점(Start)과 끝점(End) 추출.
+    *   `POLYLINE`: 모든 정점(Vertices) 리스트 추출.
+    *   `CIRCLE/ARC`: 중심점(Center)과 반지름(Radius) 추출.
+4.  **Spatial Normalization:** 도면 좌표계를 분석 시스템 좌표계로 정규화.
+5.  **Structured Export:** JSON 또는 DB(PostgreSQL/PostGIS)에 저장.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 DXF 기하학적 추출 핵심 코드
+```python
+import ezdxf
+import re
+import json
+from shapely.geometry import box, LineString, Point
+from typing import List, Optional, Tuple
+
+class PidGeometricExtractor:
+    def __init__(self, file_path: str):
+        self.doc = ezdxf.readfile(file_path)
+        self.msp = self.doc.modelspace()
+
+    def clean_text(self, text: str) -> str:
+        """DXF 특수 제어 문자 및 MTEXT 포맷팅을 최대한 제거하여 LLM 토큰 부하 감소"""
+        if not text:
+            return ""
+        
+        # 1. MTEXT 포맷팅 및 제어 문자 제거
+        # \P(줄바꿈), \W(너비), \L(밑줄), \A(정렬), \C(색상), \H(높이), \S(스택), \T(탭) 및 관련 인자 제거
+        text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text)
+        
+        # 2. 중괄호 { } 제거 (MTEXT에서 서식 지정 시 사용됨)
+        text = re.sub(r'[\{\}]', ' ', text)
+        
+        # 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered)
+        text = re.sub(r'%%[U|O|S|R]', ' ', text)
+        
+        # 4. 불필요한 특수 기호 및 반복되는 공백 정제
+        # - 연속된 공백을 하나로 통합
+        # - 텍스트 양 끝의 공백 제거
+        text = re.sub(r'\s+', ' ', text).strip()
+        
+        return text
+
+    def get_bbox(self, entity) -> Optional[box]:
+        """엔티티의 Bounding Box를 계산하여 shapely box 객체로 반환"""
+        try:
+            if entity.dxftype() == 'TEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.height
+                # 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6)
+                width = len(entity.dxf.text) * h * 0.6
+                return box(p.x, p.y, p.x + width, p.y + h)
+            
+            elif entity.dxftype() == 'MTEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5
+                # MTEXT는 보통 width 속성이 정의되어 있음
+                w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6
+                return box(p.x, p.y, p.x + w, p.y + h)
+
+            elif entity.dxftype() == 'LINE':
+                start = entity.dxf.start
+                end = entity.dxf.end
+                return box(min(start.x, end.x), min(start.y, end.y),
+                          max(start.x, end.x), max(start.y, end.y))
+            
+            elif entity.dxftype() == 'LWPOLYLINE':
+                points = entity.get_points()
+                xs = [p[0] for p in points]
+                ys = [p[1] for p in points]
+                return box(min(xs), min(ys), max(xs), max(ys))
+                
+        except Exception as e:
+            print(f"Error calculating bbox for {entity.dxftype()}: {e}")
+        return None
+
+    def extract_and_save(self, output_path: str):
+        """
+        추출된 기하학적 데이터를 파일로 저장하여 Phase 3 Worker들이
+        공유 메모리/파일 시스템을 통해 참조할 수 있도록 함 (Phase 5 병렬 아키텍처 반영)
+        """
+        results = []
+        for entity in self.msp:
+            bbox_obj = self.get_bbox(entity)
+            if bbox_obj:
+                # 텍스트 값 추출 및 정제
+                raw_text = ""
+                if entity.dxftype() == 'TEXT':
+                    raw_text = entity.dxf.text
+                elif entity.dxftype() == 'MTEXT':
+                    raw_text = entity.text
+                
+                results.append({
+                    "id": entity.dxf.handle,
+                    "type": entity.dxftype(),
+                    "layer": entity.dxf.layer,
+                    "bbox": {
+                        "min_x": bbox_obj.bounds[0],
+                        "min_y": bbox_obj.bounds[1],
+                        "max_x": bbox_obj.bounds[2],
+                        "max_y": bbox_obj.bounds[3]
+                    },
+                    "raw_value": raw_text,
+                    "clean_value": self.clean_text(raw_text) if raw_text else None,
+                    "coordinates": entity.get_points() if hasattr(entity, 'get_points') else []
+                })
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=4)
+        
+        return output_path
+
+# 사용 예시 (Phase 5 Orchestrator 관점)
+extractor = PidGeometricExtractor("plant_drawing.dxf")
+# 데이터를 직접 반환받지 않고 공유 저장소(파일)에 적재
+geo_data_path = extractor.extract_and_save("shared_geo_data.json")
+```
+
+### 3.2 유틸리티 함수: 인접성 체크 (Proximity Utility)
+추후 2단계(위상 모델링)에서 사용할 핵심 유틸리티입니다.
+
+```python
+from shapely.geometry import Point
+
+def is_near(entity_a_bbox, entity_b_bbox, threshold=5.0):
+    """두 객체의 Bounding Box 간의 최단 거리가 임계값 이내인지 확인"""
+    return entity_a_bbox.distance(entity_b_bbox) <= threshold
+
+def is_inside(point, bbox):
+    """특정 점이 Bounding Box 내부에 있는지 확인"""
+    return bbox.contains(Point(point))
+```
+
+---
+
+## 🚀 4. Phase 1 완료 기준 (Definition of Done)
+
+- [ ] DXF 파일 내 모든 `TEXT`, `LINE`, `POLYLINE`의 좌표 데이터가 누락 없이 추출되는가?
+- [ ] 각 객체별로 정확한 `Bounding Box`가 계산되어 저장되는가?
+- [ ] 추출된 데이터가 `GeometricEntity` 스키마에 맞게 JSON 파일로 저장되어 Worker들이 공유 참조 가능한가? (Phase 5 반영)
+- [ ] (선택 사항) PDF 도면의 경우 OCR을 통해 텍스트의 좌표값이 추출되는가?
+
+---
+
+## 🧐 감독자 진단 결과 (2026-05-02)
+
+### 1. 프로그램 설계 점검
+- **강점**: `ezdxf`와 `shapely`를 조합하여 기하학적 데이터(BBox, 좌표)를 보존하려는 접근 방식이 매우 적절함. 특히 Phase 5의 병렬 아키텍처를 고려하여 데이터를 파일/공유 저장소에 적재하는 구조는 확장성 면에서 우수함.
+- **보완 필요 사항**:
+    - **MTEXT 처리**: 현재 예시 코드(`3.1`)는 `TEXT` 엔티티만 처리하고 있으나, 실제 DXF 파일 분석 결과 `MTEXT` 엔티티가 다수 존재함. `MTEXT`는 내부 포맷팅 코드(예: `\P`, `\W`)가 포함되어 있어 단순 텍스트 추출 시 정제가 필요함.
+    - **BBox 계산 정밀도**: `TEXT` 엔티티의 BBox를 `p.x + 10, p.y + 5`와 같이 상수로 처리하고 있음. 실제 도면의 폰트 크기(`height`)와 정렬 방식(`align`)을 반영한 동적 계산 로직이 반드시 추가되어야 함.
+
+### 2. 실제 도면(`No-10_Plant_PID.dxf`) 분석 기반 차이점
+- **엔티티 규모**: 총 28,819개의 엔티티가 존재하여 데이터 양이 상당함. 단순 리스트 저장보다는 인덱싱 전략이 필요할 수 있음.
+- **텍스트 복잡도**:
+    - `MTEXT` 내에 `\P` (줄바꿈), `\L` (밑줄) 등 제어 문자가 포함된 수정 사항(Revision) 텍스트가 많음. 이를 그대로 추출하면 위상 분석 시 노이즈가 될 가능성이 높음.
+    - `%%U` (Underline)와 같은 DXF 특수 제어 문자가 텍스트 값에 포함되어 있어, 이를 제거하는 전처리 과정이 필수적임.
+- **데이터 특성**: `IA-10922-25A-F1A-n`와 같은 복합 파이프라인 번호(Pipe Line Number) 형식이 확인됨. 이를 일반 태그(Tag Name)와 명확히 구분하여 추출하고 관리하는 로직이 Phase 2/3에서 중요하게 작용할 것으로 보임.
+
+### 3. 최종 권고 사항
+1. **MTEXT 지원 추가**: `PidGeometricExtractor`에 `MTEXT` 처리 로직을 추가하고, 제어 문자를 제거하는 `clean_text()` 유틸리티 함수를 구현할 것.
+2. **동적 BBox 구현**: `entity.dxf.height`를 활용하여 텍스트 크기에 맞는 정확한 Bounding Box를 계산하도록 수정할 것.
+3. **전처리 파이프라인 강화**: 추출 단계에서 `%%U` 등의 특수 문자를 제거하는 정제 단계를 추가하여 데이터 품질을 높일 것.
--- a/Pipeline/Graph_Pipeline_Phase2.md
+++ b/Pipeline/Graph_Pipeline_Phase2.md
@@ -0,0 +1,184 @@
+# 🕸️ Graph Pipeline Phase 2: 위상 모델링 (Topology Modeling)
+
+이 문서는 P&ID Graph Pipeline의 두 번째 단계인 **위상 모델링**의 상세 구현 계획을 다룹니다. 1단계에서 추출한 기하학적 객체(좌표, BBox)를 기반으로, 설비 간의 **연결성(Connectivity)**과 **흐름(Flow)**을 정의하는 지식 그래프(Knowledge Graph)를 구축하는 것이 목표입니다.
+
+---
+
+## 🚩 [Supervisor's Audit] 진단 결과 및 개선 권고
+
+**감독자 진단 일자:** 2026-05-02
+**진단 결과:** ⚠️ **부분적 보완 필요 (Partial Improvement Required)**
+
+### 🔍 주요 진단 내용
+1.  **연결성 추론의 단순성 (Critical):** 현재 `_find_connected_nodes`가 단순 BBox 교차(`intersects`)만 확인하고 있습니다. 실제 P&ID에서 배관(Line)은 설비의 외곽선에 닿거나 매우 근접한 형태로 나타나며, 단순 BBox 교차는 오탐(False Positive) 확률이 매우 높습니다.
+2.  **방향성 정의 부재 (Medium):** `DiGraph`를 사용하지만, 실제 엣지에 방향성을 부여하는 구체적인 로직(화살표 인식, 공정 흐름 규칙)이 예시 코드에 누락되어 있습니다.
+3.  **임계값 하드코딩 (Low):** `min_dist < 50.0`과 같은 임계값이 하드코딩되어 있어, 도면 스케일(Scale)이 변경될 경우 대응이 불가능합니다.
+4.  **데이터 무결성 검증 부족 (Medium):** 그래프 생성 후 고립된 노드(Isolated Nodes)나 비정상적인 루프에 대한 검증 단계가 없습니다.
+
+### 🛠️ 수정 및 반영 사항
+- **연결성 로직 고도화:** BBox 교차 방식에서 $\rightarrow$ **Line End-point 기반 근접 분석** 방식으로 변경.
+- **방향성 추론 단계 명시:** 화살표 심볼 및 공정 흐름 기반의 `source` $\rightarrow$ `target` 결정 로직 추가.
+- **설정의 외부화:** 임계값($\epsilon$)을 설정 파일이나 파라미터로 관리하도록 구조 변경.
+- **검증 단계 추가:** 그래프 구축 후 위상 무결성 검사(Topology Validation) 단계 도입.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `networkx` | 그래프 데이터 구조 생성 및 알고리즘 분석 | 핵심 라이브러리 |
+| `shapely` | 객체 간 거리 계산 및 포함 관계 분석 | 1단계와 연계 |
+| `scikit-learn` | (선택) KD-Tree를 이용한 고속 근접 이웃 검색 | 대규모 도면 최적화 |
+| `matplotlib` | 생성된 그래프의 위상 구조 시각화 검증 | 디버깅용 |
+
+### 1.2 설치 명령어
+```bash
+pip install networkx shapely scikit-learn matplotlib
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 그래프 정의 (Graph Definition)
+*   **노드 (Nodes):**
+    *   `Equipment`: 펌프, 탱크, 열교환기 등 (속성: ID, 타입, BBox, CenterPoint)
+    *   `Instrument`: 전송기, 밸브, 게이지 등 (속성: ID, 타입, BBox, CenterPoint)
+    *   `Tag`: 텍스트 기반 태그 (속성: TagName, Value, BBox)
+*   **엣지 (Edges):**
+    *   `Pipe`: 설비-설비, 설비-계기 간의 물리적 연결 (속성: LineNumber, 방향성, 연결타입)
+    *   `Association`: 태그-설비 간의 논리적 연결 (속성: 관계 타입 - 예: 'belongs_to')
+
+### 2.2 위상 추론 로직 (Topology Inference)
+1.  **태그-설비 결합 (Tag-to-Entity Binding):**
+    *   태그 텍스트의 BBox와 가장 가까운 심볼(Equipment/Instrument)을 찾아 `Association` 엣지를 생성합니다.
+2.  **배관 연결성 분석 (Line Connectivity) [개선]:**
+    *   `LINE` 또는 `POLYLINE`의 **시작점과 끝점(End-points)**을 추출합니다.
+    *   각 끝점이 특정 설비의 BBox 내부에 있거나, 설정된 임계 거리($\epsilon$) 이내에 있을 때만 `Pipe` 엣지로 연결합니다. (단순 BBox 교차 방식 지양)
+3.  **흐름 방향성 부여 (Flow Direction) [추가]:**
+    *   배관 상의 화살표 심볼 위치와 방향을 분석하여 `source` $\rightarrow$ `target`을 결정합니다.
+    *   화살표가 없는 경우, 공정 표준(예: 탱크 $\rightarrow$ 펌프 $\rightarrow$ 밸브)에 따른 기본 방향을 부여합니다.
+4.  **위상 무결성 검증 (Topology Validation) [추가]:**
+    *   연결되지 않은 고립 노드 탐색 및 리포팅.
+    *   비정상적인 사이클(Cycle) 또는 단절 구간 확인.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 그래프 구축 핵심 코드
+```python
+import networkx as nx
+from shapely.geometry import box, Point, LineString
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data, all_extracted_tags=None, config=None):
+        """
+        - geometric_data: Phase 1에서 추출된 기하학적 데이터
+        - all_extracted_tags: 통합된 태그 리스트
+        - config: {'dist_threshold': 50.0, 'tag_threshold': 100.0} 등 설정값
+        """
+        self.data = geometric_data
+        self.all_tags = all_extracted_tags if all_extracted_tags else []
+        self.config = config if config else {'dist_threshold': 50.0, 'tag_threshold': 100.0}
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 모든 객체를 노드로 추가
+        for item in self.data:
+            # Phase 1에서 'clean_value'로 저장했으므로 이를 value로 사용
+            self.G.add_node(item['id'],
+                           type=item['type'],
+                           bbox=box(*item['bbox'].values()),
+                           value=item.get('clean_value'))
+
+        # 2. 분산 추출된 태그 통합 및 노드 추가
+        for tag in self.all_tags:
+            self.G.add_node(tag['id'],
+                           type='TEXT',
+                           bbox=box(*tag['bbox'].values()),
+                           value=tag.get('tagName'))
+
+        # 3. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] != 'TEXT']
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 4. 배관 기반 물리적 연결 (Pipe) [개선됨]
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'POLYLINE']]
+        for line_id in lines:
+            # Phase 1에서 추출한 coordinates를 사용하여 LineString 생성
+            coords = self.G.nodes[line_id].get('coordinates', [])
+            if not coords:
+                continue
+            line_geom = LineString(coords)
+            endpoints = [line_geom.coords[0], line_geom.coords[-1]]
+            
+            connected_nodes = []
+            for pt in endpoints:
+                p = Point(pt)
+                for eq_id in equipments:
+                    if self.G.nodes[eq_id]['bbox'].distance(p) < self.config['dist_threshold']:
+                        connected_nodes.append(eq_id)
+            
+            if len(connected_nodes) >= 2:
+                # 방향성 추론 로직 (단순화: 시작점 -> 끝점)
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < self.config['tag_threshold'] else None
+
+    def validate_topology(self):
+        """위상 무결성 검증"""
+        isolated = list(nx.isolates(self.G))
+        return {"isolated_nodes": isolated, "node_count": self.G.number_of_nodes(), "edge_count": self.G.number_of_edges()}
+
+# 실행 예시
+all_tags = flatten_results([worker1_res, worker2_res])
+config = {'dist_threshold': 30.0, 'tag_threshold': 80.0}
+builder = PidTopologyBuilder(geometric_data, all_extracted_tags=all_tags, config=config)
+builder.build_graph()
+validation_res = builder.validate_topology()
+print(f"Validation Result: {validation_res}")
+```
+
+### 3.2 위상 분석 유틸리티: 영향도 분석 (Impact Analysis)
+```python
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
+
+# 예: P-101 펌프 고장 시 영향 분석
+affected = analyze_impact(graph, "node_P101")
+print(f"Impacted Equipment: {affected}")
+```
+
+---
+
+## 🚀 4. Phase 2 완료 기준 (Definition of Done)
+
+- [ ] 모든 설비와 계기가 그래프의 **노드(Node)**로 변환되었는가?
+- [ ] 분산 추출된 태그 리스트가 `flatten_results`를 통해 통합되어 그래프에 반영되었는가?
+- [ ] 태그와 설비 간의 **논리적 연결(Association)**이 정확하게 매핑되었는가?
+- [ ] 배관(Line)의 **끝점 분석**을 통해 설비 간의 **물리적 연결(Pipe Edge)**이 생성되었는가? (BBox 교차 방식 배제)
+- [ ] 화살표 및 공정 규칙에 기반한 **방향성(Directionality)**이 엣지에 부여되었는가?
+- [ ] `validate_topology`를 통해 고립 노드 및 위상 오류가 검토되었는가?
+- [ ] `nx.descendants` 등을 통해 특정 노드로부터의 **흐름 추적(Flow Tracing)**이 가능한가?
+- [ ] 생성된 그래프 구조가 JSON(GraphML 등) 형태로 저장되어 Phase 3로 전달 가능한가?
--- a/Pipeline/Graph_Pipeline_Phase3.md
+++ b/Pipeline/Graph_Pipeline_Phase3.md
@@ -0,0 +1,212 @@
+# 🧠 Graph Pipeline Phase 3: 지능형 매핑 및 검증 (Intelligent Mapping & Validation)
+
+이 문서는 P&ID Graph Pipeline의 세 번째 단계인 **지능형 매핑 및 검증**의 상세 구현 계획을 다룹니다. 2단계에서 구축한 위상 그래프(Topology Graph)를 활용하여, 도면 상의 가상 노드들을 실제 Experion 시스템의 **실시간 태그(Real-time Tags)**와 정밀하게 연결하고 그 타당성을 검증하는 것이 목표입니다.
+
+---
+
+## 🚩 [Supervisor's Audit] 감독자 진단 결과 및 수정 사항
+
+본 프로그램 설계에 대해 감독자 관점에서 정밀 진단을 수행하였으며, 다음과 같은 취약점과 개선 사항을 발견하여 반영하였습니다.
+
+### 1. 진단 결과 (Audit Findings)
+
+| 항목 | 진단 내용 | 심각도 | 수정 방향 |
+|---|---|---|---|
+| **에러 처리** | LLM 응답이 JSON 형식이 아니거나 `UNKNOWN`일 때의 예외 처리 로직 부족 | HIGH | 구조화된 출력(JSON) 강제 및 Fallback 전략 추가 |
+| **성능/비용** | 모든 노드에 대해 개별 LLM 호출 시 API 비용 급증 및 속도 저하 | MED | 배치(Batch) 처리 및 1차 필터링 강화 |
+| **검증 정밀도** | 단순 키워드 매칭 기반 검증은 오탐(False Positive) 가능성이 높음 | MED | 데이터 타입 및 엔지니어링 유닛(EU)의 엄격한 비교 로직 추가 |
+| **데이터 정합성** | 매핑 결과의 이력 관리 및 사람이 수동으로 수정할 수 있는 피드백 루프 부재 | LOW | 매핑 결과 저장 스키마에 `confidence` 및 `manual_override` 필드 추가 |
+
+### 2. 수정 이유 (Rationale)
+- **안정성 확보:** LLM은 비결정론적 특성이 있으므로, 프로그램이 런타임에 중단되지 않도록 Pydantic을 이용한 엄격한 스키마 검증이 필수적입니다.
+- **효율성 최적화:** 수천 개의 태그를 개별 호출하는 것은 비효율적입니다. 유사도 기반으로 후보군을 좁히고, 유사 그룹을 묶어 배치 처리함으로써 비용을 절감합니다.
+- **신뢰도 향상:** 단순 텍스트 매칭을 넘어 실제 시스템의 메타데이터(Unit, Range 등)를 교차 검증해야 엔지니어링 관점에서 신뢰할 수 있는 결과가 됩니다.
+
+---
+
+## 📦 1. 필수 패키지 및 환경 설정
+
+### 1.1 Python 패키지
+| 패키지 | 용도 | 비고 |
+|---|---|---|
+| `openai` / `langchain` | LLM API 연동 및 프롬프트 체이닝 | 매핑 추론 및 검증 핵심 |
+| `fuzzywuzzy` / `rapidfuzz` | 태그 이름 간의 문자열 유사도 계산 | 1차 후보군 추출용 |
+| `networkx` | 그래프 기반 인접 노드(Context) 추출 | 2단계 그래프 활용 |
+| `pydantic` | 매핑 결과의 구조화 및 유효성 검사 | **[강화]** 데이터 정규화 및 런타임 타입 체크 |
+| `requests` | ExperionCrawler API (C#)와 통신 | 실제 태그 리스트 조회 |
+
+### 1.2 설치 명령어
+```bash
+pip install openai langchain rapidfuzz networkx pydantic requests
+```
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 매핑 파이프라인 (Mapping Pipeline)
+단순 이름 매칭의 한계를 극복하기 위해 **[후보 추출 $\rightarrow$ 맥락 분석 $\rightarrow$ LLM 확정 $\rightarrow$ 스키마 검증]**의 4단계 프로세스를 거칩니다.
+
+1.  **1차 후보 추출 (Candidate Generation):**
+    *   도면의 태그 텍스트와 Experion 시스템의 전체 태그 리스트를 `RapidFuzz`로 비교하여 유사도 상위 N개를 추출합니다.
+2.  **맥락 정보 수집 (Context Gathering):**
+    *   해당 노드의 그래프 상 인접 노드(1-hop, 2-hop) 정보를 수집합니다.
+    *   예: "현재 노드는 `PT-101`이며, 상류에 `P-101(Pump)`이 있고 하류에 `V-101(Valve)`이 있음."
+3.  **LLM 기반 최종 매핑 (LLM-based Resolution):**
+    *   후보 태그 리스트와 위상 맥락을 LLM에게 전달하여 가장 타당한 태그를 선택하게 합니다.
+    *   **[개선]** JSON Mode를 사용하여 `{"tag": "...", "reason": "...", "confidence": 0.9}` 형태로 응답을 강제합니다.
+4.  **구조적 검증 (Structural Validation):**
+    *   Pydantic 모델을 통해 LLM 응답의 형식을 검증하고, 실패 시 `UNKNOWN` 처리 및 로그를 남깁니다.
+
+### 2.2 상호 검증 로직 (Cross-Validation)
+매핑된 결과가 실제 공정 데이터와 일치하는지 검증합니다.
+*   **위상적 일관성:** 도면에서 `A $\rightarrow$ B` 순서라면, 실제 데이터에서도 `A`의 변화가 `B`에 영향을 주는지 상관관계 분석.
+*   **속성 일치성:** 도면의 심볼 타입(예: Pressure Transmitter)과 실제 태그의 속성(예: Engineering Unit = 'bar' 또는 'psi')이 일치하는지 확인. **[강화]** 단순 키워드가 아닌 Unit 매핑 테이블을 통한 엄격한 비교.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 맥락 기반 매핑 엔진
+```python
+import networkx as nx
+import asyncio
+import json
+from typing import List, Optional
+from pydantic import BaseModel, Field
+from rapidfuzz import process, fuzz
+from openai import AsyncOpenAI
+
+# --- [추가] 응답 구조화를 위한 Pydantic 모델 ---
+class MappingResult(BaseModel):
+    resolved_tag: str = Field(..., description="The final mapped system tag")
+    reason: str = Field(..., description="Reason for this mapping based on context")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score from 0 to 1")
+
+client = AsyncOpenAI(api_key="your-api-key")
+
+class IntelligentMapper:
+    def __init__(self, graph, system_tags):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+
+    def get_node_context(self, node_id):
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            context.append(f"Connected to {attr.get('value', n)} (Type: {attr.get('type')})")
+        return ", ".join(context)
+
+    async def _resolve_generic(self, node_id, category_prompt):
+        """공통 매핑 로직 (비동기 + 구조화 응답)"""
+        # Phase 2에서 'value'에 clean_value가 저장됨
+        tag_text = self.graph.nodes[node_id].get('value', '')
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        context = self.get_node_context(node_id)
+        
+        prompt = f"""
+        {category_prompt}
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        반드시 다음 JSON 형식으로만 응답하세요:
+        {{
+            "resolved_tag": "태그명 또는 UNKNOWN",
+            "reason": "매핑 이유",
+            "confidence": 0.0~1.0
+        }}
+        """
+        
+        try:
+            response = await client.chat.completions.create(
+                model="gpt-4-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                response_format={ "type": "json_object" } # JSON 모드 강제
+            )
+            raw_content = response.choices[0].message.content
+            # Pydantic을 통한 유효성 검사
+            return MappingResult.model_validate_json(raw_content)
+        except Exception as e:
+            print(f"Error resolving node {node_id}: {e}")
+            return MappingResult(resolved_tag="UNKNOWN", reason=f"Error: {str(e)}", confidence=0.0)
+
+    # --- 전문화된 Worker 함수들 ---
+    async def extract_transmitters(self, node_ids):
+        prompt = "당신은 계측기 전문 엔지니어입니다. 특히 Pressure/Flow/Level Transmitter 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_valves(self, node_ids):
+        prompt = "당신은 밸브 및 액추에이터 전문 엔지니어입니다. 밸브의 개폐 상태 및 제어 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+    async def extract_equipment(self, node_ids):
+        prompt = "당신은 공정 설비 전문 엔지니어입니다. 펌프, 탱크, 열교환기 등의 메인 설비 태그 매핑에 특화되어 있습니다."
+        return {nid: await self._resolve_generic(nid, prompt) for nid in node_ids}
+
+# 사용 예시
+async def main():
+    # 가상 데이터
+    graph = nx.Graph()
+    graph.add_node("node_1", value="PT-101", type="Pressure Transmitter")
+    graph.add_node("node_2", value="P-101", type="Pump")
+    graph.add_edge("node_1", "node_2")
+    
+    mapper = IntelligentMapper(graph, ["PT-101.PV", "PT-102.PV", "P-101.STATUS"])
+    
+    results = await asyncio.gather(
+        mapper.extract_transmitters(["node_1"]),
+        mapper.extract_equipment(["node_2"])
+    )
+    
+    final_mapping = {**results[0], **results[1]}
+    print(f"Parallel Resolved Mapping: {final_mapping}")
+
+asyncio.run(main())
+```
+
+### 3.2 검증 유틸리티: 속성 일치 확인 (강화 버전)
+```python
+def validate_mapping(resolved_tag, symbol_type, tag_metadata):
+    """심볼 타입과 실제 태그 메타데이터의 엄격한 일치 여부 검증"""
+    # 단순 키워드가 아닌 허용 단위(Unit) 정의
+    unit_map = {
+        "Pressure Transmitter": ["bar", "psi", "kPa", "Pa"],
+        "Flow Meter": ["m3/h", "lpm", "kg/h"],
+        "Temperature Sensor": ["°C", "C", "K", "°F"]
+    }
+    
+    actual_unit = tag_metadata.get('unit', '').strip()
+    allowed_units = unit_map.get(symbol_type, [])
+    
+    # 1. 단위 일치 확인 (최우선)
+    if actual_unit and actual_unit in allowed_units:
+        return True, "Unit Match"
+    
+    # 2. 단위가 없는 경우 설명(Description) 기반 2차 검증
+    actual_desc = tag_metadata.get('description', '').lower()
+    expected_keywords = {
+        "Pressure Transmitter": ["pressure", "press"],
+        "Flow Meter": ["flow", "flowrate"],
+        "Temperature Sensor": ["temp", "temperature"]
+    }
+    
+    keywords = expected_keywords.get(symbol_type, [])
+    if any(kw in actual_desc for kw in keywords):
+        return True, "Description Match (Unit Missing)"
+        
+    return False, "Mismatch: Symbol type and Tag metadata do not align"
+```
+
+---
+
+## 🚀 4. Phase 3 완료 기준 (Definition of Done)
+
+- [ ] 모든 도면 노드에 대해 **1차 후보군(Candidates)**이 자동으로 생성되는가?
+- [ ] `NetworkX` 그래프를 통해 **인접 노드 맥락(Context)**이 정확히 추출되는가?
+- [ ] LLM이 **JSON 형식**으로 최종 태그를 결정하고, 그 근거와 신뢰도를 제시하는가?
+- [ ] **Pydantic**을 통해 LLM 응답의 구조적 유효성이 검증되는가?
+- [ ] 매핑된 태그의 **엔지니어링 유닛(Unit)**과 도면 심볼 타입 간의 일치성이 엄격히 검증되는가?
+- [ ] 최종 매핑 결과가 `(도면노드ID, 시스템태그, 신뢰도, 검증결과, 매핑근거)` 형태로 저장되는가?
--- a/Pipeline/Graph_Pipeline_Phase4.md
+++ b/Pipeline/Graph_Pipeline_Phase4.md
@@ -0,0 +1,197 @@
+# 🎨 Graph Pipeline Phase 4: 활용 및 시각화 (Application & Visualization)
+
+이 문서는 P&ID Graph Pipeline의 최종 단계인 **활용 및 시각화**의 상세 구현 계획을 다룹니다. 앞선 단계에서 구축한 [기하학적 데이터 $\rightarrow$ 위상 그래프 $\rightarrow$ 시스템 태그 매핑] 결과물을 결합하여, 운영자가 도면 상에서 실시간 공정 상태를 파악하고 장애 영향도를 분석할 수 있는 인터페이스를 구현하는 것이 목표입니다.
+
+---
+
+## 🔍 [Supervisor Diagnosis] 프로그램 진단 및 개선 권고
+
+**진단 일자:** 2026-05-02
+**진단자:** Roo (Software Engineer / Supervisor)
+
+### 1. 종합 진단 결과
+현재 계획은 기본적인 데이터 흐름(C# $\rightarrow$ Python $\rightarrow$ Frontend)을 잘 정의하고 있으나, **실제 산업 현장의 대규모 P&ID 도면 적용 시 발생할 수 있는 성능 및 안정성 문제**에 대한 고려가 부족합니다. 특히 실시간 데이터 오버레이의 부하 관리와 분석 결과의 신뢰성 검증 단계가 누락되어 있습니다.
+
+### 2. 주요 진단 항목 및 수정 이유
+
+| 항목 | 진단 결과 | 위험도 | 수정 이유 및 개선 방향 |
+|---|---|---|---|
+| **데이터 전송 효율** | WebSocket/API 폴링 방식의 단순 나열 | MED | 수천 개의 태그가 포함된 도면에서 개별 폴링/전송 시 네트워크 부하 급증 $\rightarrow$ **태그 그룹화 및 변경분 기반(Delta) 전송** 도입 필요 |
+| **프론트엔드 렌더링** | SVG/Canvas 단순 오버레이 | HIGH | 노드 수가 많아질 경우 DOM 요소 증가로 인한 브라우저 랙 발생 $\rightarrow$ **Canvas 기반 렌더링 최적화 및 Viewport 기반 가시 영역 렌더링** 전략 필요 |
+| **분석 엔진 신뢰성** | `nx.descendants` 단순 활용 | MED | 단순 위상 전파는 실제 공정의 '흐름 방향(Flow Direction)'과 '밸브 개폐 상태'를 무시함 $\rightarrow$ **엣지 속성(방향성, 상태)을 반영한 가중치 경로 분석**으로 고도화 |
+| **에러 핸들링** | Python 브릿지 통신 시 예외 처리 미흡 | LOW | 분석 엔진 다운 시 C# 서버의 블로킹 가능성 $\rightarrow$ **Circuit Breaker 패턴 및 타임아웃 설정** 명시 필요 |
+| **사용자 경험(UX)** | 단순 하이라이트 표시 | LOW | 영향도 결과가 많을 경우 도면이 빨간색으로 도배됨 $\rightarrow$ **단계별 영향도(1차, 2차...) 색상 구분 및 필터링** 기능 추가 |
+
+---
+
+## 📦 1. 필수 패키지 및 기술 스택
+
+### 1.1 프론트엔드 (Visualization)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `SVG / Canvas API` | P&ID 도면 렌더링 및 데이터 오버레이 | **Canvas API 우선 권장 (대규모 노드 성능 최적화)** |
+| `Cytoscape.js` / `D3.js` | 위상 그래프 시각화 및 인터랙티브 탐색 | 그래프 분석 뷰어 |
+| `Vue.js` / `React` | 전체 UI 프레임워크 및 상태 관리 | `src/Web` 구조와 통합 |
+| `Axios` / `WebSocket` | 실시간 OPC UA 데이터 수신 및 API 통신 | **SignalR (ASP.NET Core) 도입 권장 (실시간 양방향 통신 최적화)** |
+
+### 1.2 백엔드 (API & Analysis)
+| 기술/라이브러리 | 용도 | 비고 |
+|---|---|---|
+| `ASP.NET Core` | Graph API 및 분석 엔드포인트 제공 | `ExperionCrawler` 메인 서버 |
+| `NetworkX` (Python) | 영향도 분석 및 경로 추적 알고리즘 실행 | 분석 엔진 (Phase 2 활용) |
+| `FastAPI` / `Flask` | Python 분석 엔진과 C# 서버 간의 브릿지 | 분석 마이크로서비스 |
+
+---
+
+## 📐 2. 상세 설계 구조
+
+### 2.1 실시간 데이터 오버레이 (Real-time Overlay)
+도면의 좌표 정보와 매핑된 시스템 태그를 연결하여 실시간 값을 표시합니다.
+1.  **매핑 데이터 로드:** `(도면노드ID, 시스템태그, 좌표)` 리스트를 프론트엔드로 전달.
+2.  **실시간 스트리밍:** `OPC UA` $\rightarrow$ `C# Server` $\rightarrow$ `SignalR Hub` $\rightarrow$ `Frontend`. (**개선: 변경된 값만 전송하는 Delta Update 방식 적용**)
+3.  **동적 렌더링:** 태그 값이 변경되면 해당 좌표의 Canvas 요소를 업데이트하거나 툴팁에 현재 값을 표시. (**개선: Viewport 내 요소만 업데이트하여 CPU 부하 감소**)
+
+### 2.2 영향도 분석 엔진 (Impact Analysis Engine)
+특정 설비의 이상 발생 시 하류(Downstream) 영향을 계산합니다.
+1.  **분석 요청:** 사용자가 도면에서 특정 노드(예: 펌프 P-101)를 클릭.
+2.  **그래프 탐색:** Python 분석 엔진에서 `nx.descendants(G, 'P-101')` 실행. (**개선: 엣지의 `flow_direction` 속성을 확인하여 실제 유체 흐름 방향으로만 전파 계산**)
+3.  **결과 반환:** 영향받는 모든 노드 ID 리스트, 경로(Path), 그리고 **영향 단계(Depth)**를 반환.
+4.  **시각적 강조:** 도면 상에서 영향 경로를 단계별 색상(예: 1차-진한 빨강, 2차-연한 빨강)으로 하이라이트 처리.
+
+---
+
+## 💻 3. 실제 구현 코딩 가이드 (Example)
+
+### 3.1 [Backend] 영향도 분석 API (C# $\rightarrow$ Python Bridge)
+```csharp
+// src/Web/Controllers/PidGraphController.cs
+
+// 1. 분석 상태 추적을 위한 DTO
+public record AnalysisStatus(string taskId, double progress, string status, string message);
+
+// 2. 실시간 진행 상태 조회 API (Phase 5 병렬 처리 반영)
+[HttpGet("status/{taskId}")]
+public async Task<IActionResult> GetAnalysisStatus(string taskId)
+{
+    // Orchestrator가 관리하는 작업 상태 저장소(Redis/MemoryCache)에서 조회
+    var status = await _statusService.GetStatusAsync(taskId);
+    if (status == null) return NotFound();
+    
+    return Ok(new {
+        taskId = status.TaskId,
+        progress = status.Progress, // 0.0 ~ 1.0
+        status = status.Status,     // "Processing", "Completed", "Failed"
+        message = status.Message
+    });
+}
+
+[HttpGet("impact/{nodeId}")]
+public async Task<IActionResult> GetImpactAnalysis(string nodeId)
+{
+    try 
+    {
+        // Python 분석 마이크로서비스에 요청 (Timeout 및 Circuit Breaker 적용 권장)
+        var response = await _httpClient.GetAsync($"http://python-analysis-api/impact/{nodeId}");
+        response.EnsureSuccessStatusCode();
+        var result = await response.Content.ReadFromJsonAsync<ImpactResult>();
+        
+        return Ok(result);
+    }
+    catch (HttpRequestException ex)
+    {
+        // 분석 엔진 연결 실패 시 적절한 에러 메시지 반환
+        return StatusCode(503, new { error = "Analysis Engine is currently unavailable", details = ex.Message });
+    }
+}
+```
+
+### 3.2 [Frontend] Canvas 기반 데이터 오버레이 및 진행률 표시 (JavaScript)
+```javascript
+// src/Web/wwwroot/js/pid-viewer.js
+
+// 1. 실시간 값 업데이트 (Canvas 최적화 버전)
+async function updateRealtimeValues(tagData) {
+    // tagData: { "TAG_01": { value: 10.5, status: "OK" }, ... }
+    
+    const ctx = canvas.getContext('2d');
+    
+    for (const [tag, data] of Object.entries(tagData)) {
+        const node = nodeMap.get(tag); // 좌표 정보 맵
+        if (node && isInViewport(node)) {
+            // 뷰포트 내에 있을 때만 렌더링
+            ctx.fillStyle = data.value > threshold ? 'red' : 'green';
+            ctx.beginPath();
+            ctx.arc(node.x, node.y, 5, 0, Math.PI * 2);
+            ctx.fill();
+            
+            // 툴팁 데이터 업데이트
+            updateTooltipData(tag, data.value);
+        }
+    }
+}
+
+// 2. 분석 진행 상태 표시 (Phase 5 병렬 처리 반영)
+async function trackAnalysisProgress(taskId) {
+    const progressBar = document.getElementById('analysis-progress-bar');
+    const statusText = document.getElementById('analysis-status-text');
+
+    const pollStatus = async () => {
+        try {
+            const response = await fetch(`/api/pid/status/${taskId}`);
+            const data = await response.json();
+
+            // 프로그레스 바 업데이트
+            progressBar.style.width = `${data.progress * 100}%`;
+            statusText.innerText = `분석 중... ${Math.round(data.progress * 100)}% (${data.message})`;
+
+            if (data.status !== 'Completed' && data.status !== 'Failed') {
+                setTimeout(pollStatus, 1000); // 1초 간격 폴링
+            } else {
+                statusText.innerText = data.status === 'Completed' ? '분석 완료!' : '분석 실패';
+            }
+        } catch (e) {
+            statusText.innerText = '상태 조회 중 오류 발생';
+        }
+    };
+
+    pollStatus();
+}
+```
+
+### 3.3 [Analysis] 흐름 방향 반영 경로 추적 (Python)
+```python
+import networkx as nx
+
+def get_propagation_path_with_flow(graph, start_node):
+    """
+    단순 descendants가 아닌, 엣지의 방향성(flow_direction)과 
+    상태(valve_open)를 고려한 실제 영향 전파 경로 추출
+    """
+    # 1. 유효한 엣지만 필터링 (방향이 맞고 밸브가 열려있는 경로)
+    valid_edges = [
+        (u, v, d) for u, v, d in graph.edges(data=True) 
+        if d.get('flow_direction') == 'forward' and d.get('valve_status') == 'open'
+    ]
+    filtered_graph = nx.DiGraph()
+    filtered_graph.add_edges_from(valid_edges)
+    
+    # 2. 전파 단계별 노드 추출 (BFS)
+    propagation_levels = nx.single_source_shortest_path_length(filtered_graph, start_node)
+    
+    # { node_id: distance } 형태로 반환하여 프론트엔드에서 색상 구분 가능하게 함
+    return propagation_levels
+
+# 예: P-101에서 시작되는 실제 유체 흐름 기반 영향도 분석
+impact_map = get_propagation_path_with_flow(topology_graph, "P-101")
+```
+
+---
+
+## 🚀 4. Phase 4 완료 기준 (Definition of Done)
+
+- [ ] P&ID 도면(Canvas) 위에 **실시간 OPC UA 값**이 정확한 좌표에 표시되며, 뷰포트 최적화가 적용되었는가?
+- [ ] **SignalR 또는 Delta Update**를 통해 네트워크 부하를 최소화하며 실시간 데이터를 수신하는가?
+- [ ] 병렬 처리 중인 분석 작업의 **진행 상태(Progress Bar)**가 UI에 실시간으로 반영되는가?
+- [ ] 특정 노드 클릭 시 **유체 흐름 방향이 반영된 영향도 분석** 결과가 단계별 색상으로 하이라이트 되는가?
+- [ ] C# 서버와 Python 엔진 간 통신에 **타임아웃 및 예외 처리**가 적용되어 시스템 안정성이 확보되었는가?
+- [ ] 전체 파이프라인(`추출 $\rightarrow$ 모델링 $\rightarrow$ 매핑 $\rightarrow$ 시각화`)이 통합되어 동작하는가?
--- a/Pipeline/Graph_Pipeline_Phase5.md
+++ b/Pipeline/Graph_Pipeline_Phase5.md
@@ -0,0 +1,140 @@
+# 🔌 Graph Pipeline Phase 5: MCP 서버 통합 및 고성능 병렬 아키텍처 (MCP Integration & Parallel Processing)
+
+이 문서는 앞서 설계한 1~4단계의 Graph Pipeline을 현재 프로젝트의 **Unified MCP Server (`mcp-server/server.py`)**에 통합하는 방안을 다룹니다. 특히, 대용량 도면 처리 시 발생하는 지연과 버퍼 문제를 해결하기 위해 `PID_Parser_Plan_Revision.md`의 **분산 처리 기법**과 vLLM의 **Continuous Batching** 특성을 극대화한 병렬 아키텍처를 적용합니다.
+
+---
+
+## 🏗️ 1. 통합 아키텍처 설계
+
+### 1.1 고성능 병렬 데이터 흐름 (Parallel End-to-End Flow)
+단일 순차 요청 방식에서 벗어나, **[전처리 $\rightarrow$ 병렬 분산 추출 $\rightarrow$ 통합 후처리]** 구조로 전환합니다.
+
+`Frontend (UI)` $\rightarrow$ `C# Server (API)` $\rightarrow$ `MCP Server (Orchestrator)` $\rightarrow$ `Parallel Worker Tools (vLLM Batching)` $\rightarrow$ `Result Aggregator` $\rightarrow$ `C# Server`
+
+1.  **요청:** 사용자가 UI에서 도면 분석 시작 버튼 클릭.
+2.  **전처리 (Orchestrator):** MCP 서버가 DXF를 로드하여 기하학적 데이터를 추출하고, 분석 대상(Transmitter, Valve, Pump 등)별로 데이터를 분할합니다.
+3.  **병렬 호출 (Continuous Batching):**
+    *   분할된 데이터를 기반으로 여러 개의 MCP 툴(또는 동일 툴의 다중 요청)을 **동시에(Asynchronously)** 호출합니다.
+    *   vLLM 서버는 이 다수의 요청을 **Continuous Batching**으로 묶어 처리함으로써, 개별 요청 시보다 전체 처리량(Throughput)을 획기적으로 높입니다.
+4.  **통합 및 저장 (Aggregator):** 각 분산 툴이 반환한 결과를 취합하여 최종 위상 그래프를 구축하고 DB에 저장합니다.
+
+### 1.2 MCP 서버 내 역할 분담 (분산 처리 모델)
+`PID_Parser_Plan_Revision.md`를 반영하여, 기능을 세분화하고 병렬 실행 가능하게 설계합니다.
+
+| 구분 | MCP Tool / Module | 역할 | 병렬 처리 전략 |
+|---|---|---|---|
+| **Orchestrator** | `orchestrate_pid_pipeline` | 전체 공정 제어, 데이터 분할 및 결과 취합 | Asyncio 기반 비동기 제어 |
+| **Worker 1** | `extract_transmitters` | FIT, FT, LT, PT, TE 추출 | vLLM Batching 요청 |
+| **Worker 2** | `extract_valves` | FCV, LCV, TCV, PCV, XV 추출 | vLLM Batching 요청 |
+| **Worker 3** | `extract_gauges` | PG, TG, LG 추출 | vLLM Batching 요청 |
+| **Worker 4** | `extract_equipment` | Column, Tank, Filter, Drum, Heat Exchanger 등 추출 | vLLM Batching 요청 |
+| **Worker 5** | `extract_pumps` | P-xxxx, VP-xxxx 추출 | vLLM Batching 요청 |
+| **Analyzer** | `analyze_pid_impact` | 구축된 그래프 기반 영향도 분석 | Graph Algorithm (CPU) |
+
+---
+
+## 💻 2. MCP 서버 통합 구현 가이드
+
+### 2.1 비동기 병렬 처리 설계 (Asyncio + vLLM Batching)
+`FastMCP` 환경에서 `asyncio.gather`를 사용하여 여러 추출 툴을 동시에 호출함으로써 vLLM의 Continuous Batching 효율을 극대화합니다.
+
+```python
+# mcp-server/server.py 통합 설계 (개념 코드)
+import asyncio
+from typing import List
+
+async def run_parallel_extraction(geo_data):
+    """
+    분류별 추출 툴을 병렬로 호출하여 vLLM Batching 유도
+    """
+    # 각 분류별 프롬프트와 데이터 준비
+    tasks = [
+        extract_transmitters_async(geo_data),
+        extract_valves_async(geo_data),
+        extract_gauges_async(geo_data),
+        extract_equipment_async(geo_data),
+        extract_pumps_async(geo_data)
+    ]
+    
+    # 동시에 요청을 던져 vLLM이 내부적으로 Batch 처리하게 함
+    results = await asyncio.gather(*tasks)
+    return results
+
+@mcp.tool()
+async def build_pid_graph_parallel(filepath: str) -> str:
+    """
+    분산 처리 기법을 적용한 P&ID 그래프 생성 툴
+    """
+    # 1. 전처리 (Phase 1)
+    extractor = PidGeometricExtractor(filepath)
+    geo_data = extractor.extract_and_save("shared_geo_data.json") # 파일 기반 공유 저장소 활용
+    # 실제 구현 시 geo_data는 파일 경로 또는 로드된 JSON 리스트
+    
+    # 2. 병렬 분산 추출 (vLLM Batching 활용)
+    # 각 Worker 툴들이 LLM에 요청을 보낼 때 vLLM이 이를 묶어서 처리함
+    extracted_parts = await run_parallel_extraction(geo_data)
+    
+    # 3. 결과 통합 및 위상 모델링 (Phase 2)
+    # extracted_parts는 각 Worker(Transmitter, Valve 등)가 반환한 매핑 결과 리스트
+    all_tags = flatten_results(extracted_parts)
+    builder = PidTopologyBuilder(geo_data, all_extracted_tags=all_tags)
+    builder.build_graph()
+    
+    # 4. 저장
+    graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json")
+    nx.write_graphml(builder.G, f"storage/{graph_id}")
+    
+    return json.dumps({"success": True, "graph_id": graph_id, "nodes": builder.G.number_of_nodes()})
+```
+
+### 2.2 C# 서버와의 인터페이스 (`McpClient` 활용)
+C# 서버는 `src/Infrastructure/Mcp/McpClient.cs`를 통해 위 툴들을 호출합니다.
+
+### 2.2 C# 서버와의 인터페이스 (`McpClient` 활용)
+C# 서버는 `src/Infrastructure/Mcp/McpClient.cs`를 통해 위 툴들을 호출합니다.
+
+```csharp
+// src/Core/Application/Services/PidGraphService.cs (신규 서비스)
+public async Task<ImpactResult> GetImpactAnalysisAsync(string graphId, string nodeId)
+{
+    var request = new McpToolRequest {
+        ToolName = "analyze_pid_impact",
+        Arguments = new { graph_id = graphId, start_node_id = nodeId }
+    };
+    
+    var jsonResponse = await _mcpClient.CallToolAsync(request);
+    return JsonSerializer.Deserialize<ImpactResult>(jsonResponse);
+}
+```
+
+---
+
+## 🛠️ 3. 프로그램 구성 및 배포 전략
+
+### 3.1 디렉토리 구조 확장
+```text
+mcp-server/
+├── server.py              # MCP 메인 서버 (툴 정의)
+├── pipeline/              # Graph Pipeline 핵심 로직 (Phase 1~4)
+│   ├── __init__.py
+│   ├── extractor.py       # Phase 1: Geometric Extraction
+│   ├── topology.py        # Phase 2: Topology Modeling
+│   ├── mapper.py          # Phase 3: Intelligent Mapping
+│   └── analyzer.py        # Phase 4: Impact Analysis
+└── storage/               # 생성된 그래프 파일 (.graphml) 저장소
+```
+
+### 3.2 실행 프로세스
+1.  **MCP 서버 기동:** `python mcp-server/server.py --http` (포트 5001)
+2.  **C# 서버 기동:** `dotnet run` (포트 5000)
+3.  **통신:** C# 서버 $\xrightarrow{HTTP/JSON}$ MCP 서버 $\xrightarrow{Python\ Libs}$ 결과 반환.
+
+---
+
+## 🚀 4. 최종 완료 기준 (Definition of Done)
+
+- [ ] `mcp-server/server.py`에 `build_pid_graph`, `analyze_pid_impact` 등 핵심 툴이 정의되었는가?
+- [ ] Phase 1~4의 Python 로직이 `mcp-server/pipeline/` 모듈로 구조화되어 통합되었는가?
+- [ ] C# `McpClient`를 통해 MCP 서버의 그래프 분석 툴을 호출하고 결과를 수신할 수 있는가?
+- [ ] 도면 업로드 $\rightarrow$ 그래프 생성 $\rightarrow$ 태그 매핑 $\rightarrow$ 영향도 분석으로 이어지는 **End-to-End 파이프라인**이 완성되었는가?
+- [ ] 모든 과정이 `json_response=True` 및 `stateless_http=True` 설정 하에 안정적으로 동작하는가?
--- a/Pipeline/No-10_Plant_PID.dxf
+++ b/Pipeline/No-10_Plant_PID.dxf
--- a/futurePlan/End-to-End
+++ b/futurePlan/End-to-End
@@ -0,0 +1,83 @@
+P&ID 도면 분석을 고도화하여 **Graph Pipeline**을 구축하는 것은 단순한 텍스트 추출을 넘어, 설비 간의 **연결성(Connectivity)**과 **위상(Topology)**을 이해하는 것을 의미합니다.
+
+제가 이 작업을 수행한다면, 다음과 같은 **4단계 전략**으로 접근하여 효율성을 극대화하겠습니다.
+
+---
+
+### 1. 데이터 추출 단계: "단순 텍스트 $\rightarrow$ 기하학적 객체"
+현재의 텍스트 기반 추출에서 벗어나, 객체의 **좌표(Coordinate)**와 **속성(Property)**을 보존하는 구조로 변경해야 합니다.
+
+*   **객체 중심 파싱:** DXF의 Entity(Line, Circle, Text, Polyline)를 개별 객체로 인식하고, 각 객체의 중심점과 경계 상자(Bounding Box)를 저장합니다.
+*   **심볼 라이브러리 구축:** 밸브, 펌프, 탱크 등 반복되는 심볼의 기하학적 패턴을 정의하여, 텍스트가 없어도 "이 모양은 밸브다"라고 인식하는 패턴 매칭 로직을 도입합니다.
+*   **OCR 고도화:** PDF의 경우, 단순 텍스트 추출이 아닌 영역 기반 OCR을 통해 텍스트의 물리적 위치를 정확히 파악하여 인접한 심볼과 연결합니다.
+
+### 2. 그래프 모델링 단계: "객체 $\rightarrow$ 노드 및 엣지"
+추출된 객체들을 기반으로 **Knowledge Graph**를 생성합니다.
+
+*   **노드(Node):** 설비(Equipment), 계기(Instrument), 태그(Tag)를 노드로 정의합니다.
+*   **엣지(Edge):** 배관(Line)을 엣지로 정의합니다.
+    *   **연결성 판단:** `Line`의 끝점이 `Equipment`의 경계 상자 내에 있거나 매우 근접해 있다면 두 노드를 연결된 것으로 간주합니다.
+    *   **방향성 부여:** 화살표 심볼이나 공정 흐름(Flow)을 분석하여 엣지에 방향성을 부여합니다.
+*   **계층 구조 생성:** `Unit $\rightarrow$ Equipment $\rightarrow$ Component $\rightarrow$ Tag` 순의 계층적 그래프 구조를 설계합니다.
+
+### 3. 지능형 매핑 및 검증 단계: "도면 $\rightarrow$ 실제 데이터"
+그래프 구조를 활용해 Experion 시스템의 실제 태그와 정밀하게 매핑합니다.
+
+*   **맥락 기반 매핑 (Contextual Mapping):** 단순히 이름이 비슷한 태그를 찾는 것이 아니라, "펌프 P-101 옆에 있는 PT-101은 P-101의 압력 전송기일 확률이 높다"는 그래프 상의 인접성을 활용합니다.
+*   **상호 검증 (Cross-Validation):** 
+    *   도면 상의 연결 관계(P-101 $\rightarrow$ V-101)와 실제 공정 데이터의 상관관계(P-101 가동 시 V-101 유량 변화)를 비교하여 매핑의 정확도를 검증합니다.
+*   **LLM 기반 추론:** 모호한 태그명이나 누락된 정보는 MCP 서버를 통해 LLM이 도면의 맥락과 R530 문서를 분석하여 최적의 매핑 후보를 추천하게 합니다.
+
+### 4. 활용 및 시각화 단계: "분석 $\rightarrow$ 인사이트"
+구축된 그래프를 통해 운영자에게 실질적인 가치를 제공합니다.
+
+*   **영향도 분석 (Impact Analysis):** 특정 밸브(V-101)가 고장 났을 때, 그래프 탐색(BFS/DFS)을 통해 하류(Downstream)에 영향을 받는 모든 설비와 태그를 즉시 식별합니다.
+*   **디지털 트윈 뷰:** P&ID 도면 위에 실시간 OPC UA 값을 오버레이하여, 도면을 보면서 현재 공정 상태를 한눈에 파악하는 인터페이스를 구현합니다.
+*   **이상 징후 전파 경로 추적:** 특정 태그에서 알람이 발생했을 때, 그래프를 역추적하여 근본 원인(Root Cause)이 될 가능성이 높은 상류 설비를 추천합니다.
+
+---
+
+### 🚀 효율적인 실행을 위한 로드맵 (Priority)
+
+1.  **Short-term (Quick Win):** DXF 파서 수정 $\rightarrow$ 객체 좌표 저장 $\rightarrow$ 단순 인접성 기반 태그-설비 매핑.
+2.  **Mid-term (Core):** 심볼 패턴 매칭 도입 $\rightarrow$ 배관(Line) 기반의 그래프 구조(NetworkX 등 활용) 구축.
+3.  **Long-term (Advanced):** LLM 기반의 도면-데이터 추론 엔진 통합 $\rightarrow$ 실시간 데이터 오버레이 UI 구현.
+
+이렇게 **[기하학적 추출 $\rightarrow$ 위상 모델링 $\rightarrow$ 맥락적 매핑 $\rightarrow$ 운영 인사이트]** 순으로 확장하는 것이 가장 리스크가 적고 효율적인 방법이라고 생각합니다.
+
+---
+
+### 🛠️ 단계별 설계 구조 및 실행 코딩 가이드
+
+로드맵의 각 단계를 실제로 구현하기 위한 기술적 설계 구조와 핵심 코딩 전략입니다.
+
+#### 1단계: 기하학적 데이터 추출 (Geometric Extraction)
+*   **설계 구조:** `DXF Parser` $\rightarrow$ `Entity Mapper` $\rightarrow$ `Spatial Database (JSON/SQL)`
+*   **실행 코딩 전략:**
+    *   **좌표 보존:** `ezdxf`를 사용하여 모든 `TEXT`, `LINE`, `CIRCLE`, `LWPOLYLINE`의 시작/끝점 및 중심 좌표를 추출하여 저장합니다.
+    *   **Bounding Box 계산:** 각 텍스트와 심볼의 최소/최대 X, Y 좌표를 계산하여 `Rect` 객체로 관리합니다.
+    *   **데이터 구조:**
+        ```json
+        { "id": "entity_1", "type": "TEXT", "value": "P-101", "bbox": {"x1": 10, "y1": 20, "x2": 15, "y2": 25} }
+        ```
+
+#### 2단계: 위상 모델링 (Topology Modeling)
+*   **설계 구조:** `Spatial Join` $\rightarrow$ `Graph Constructor` $\rightarrow$ `NetworkX Graph`
+*   **실행 코딩 전략:**
+    *   **인접성 판단 (Proximity Search):** 텍스트 노드와 가장 가까운 심볼/라인을 찾기 위해 `KD-Tree` 또는 `R-Tree` 알고리즘을 사용합니다.
+    *   **연결성 추론:** `Line`의 끝점이 `Equipment`의 Bounding Box 내에 포함되는지 확인하여 엣지(Edge)를 생성합니다.
+    *   **그래프 구축:** Python의 `NetworkX` 라이브러리를 사용하여 `G.add_node(equipment)` 및 `G.add_edge(eq1, eq2, relation='pipe')` 형태로 모델링합니다.
+
+#### 3단계: 맥락적 매핑 (Contextual Mapping)
+*   **설계 구조:** `Graph Traversal` $\rightarrow$ `Tag Candidate Search` $\rightarrow$ `LLM Validator`
+*   **실행 코딩 전략:**
+    *   **인접 태그 탐색:** 특정 설비 노드에서 1-hop 또는 2-hop 이내에 존재하는 모든 태그 노드를 수집합니다.
+    *   **매핑 스코어링:** `(이름 유사도 * 0.4) + (위상적 인접도 * 0.6)`와 같은 가중치 모델을 적용하여 최적의 Experion 태그를 매핑합니다.
+    *   **LLM 검증:** 매핑 결과와 도면의 맥락을 LLM에게 전달하여 "P-101 펌프의 토출측에 PT-101이 있는 것이 공정상 타당한가?"를 검증합니다.
+
+#### 4단계: 운영 인사이트 구현 (Operational Insight)
+*   **설계 구조:** `Real-time Data Stream` $\rightarrow$ `Graph Overlay` $\rightarrow$ `Impact Analysis Engine`
+*   **실행 코딩 전략:**
+    *   **실시간 오버레이:** `OPC UA`로 수집된 실시간 값을 그래프 노드의 속성으로 업데이트하고, 이를 프론트엔드(Canvas/SVG)에 렌더링합니다.
+    *   **영향도 분석:** `nx.single_source_shortest_path` 또는 `BFS`를 사용하여 특정 노드 장애 시 영향을 받는 하류(Downstream) 노드 리스트를 즉시 추출합니다.
+    *   **루트 코즈 추적:** 알람 발생 노드로부터 상류(Upstream) 방향으로 역추적하여 이상 징후의 시작점을 식별합니다.
--- a/Pipeline/PID_Parser_Plan_Revision.md
+++ b/Pipeline/PID_Parser_Plan_Revision.md
@@ -0,0 +1,19 @@
+# 현재 문제점 분석
+한정된 자원의 하드웨어로 대용량의 일을 한번에 처리하려고 복잡한 프롬프트를 LLM 에게 주어 처리 시간의 지연과, 전달 및 응답 버퍼의 수신 문제발생
+
+## 분산처리 기법 적용 및 로직 플로우
+
+Reference Program : test_dxf_extract_pid1.py
+
+1. Reference Program 같은 파일을 아래 5가지로 항목으로 작성하고,
+        - dxf_extract_transmitter.py    : FIT, FT, LT, PT, TE
+        - dxf_extract_valve.py          : FCV, LCV, TCV, PCV, XV 
+        - dxf_extract_gague.py          : PG, TG, LG
+        - dxf_extract_equipment.py      : C-?????(Distilation Column), T-????(Tank), F-?????(Filter), D-?????(Drum,Condensor),E-?????(Heat Exchanger) B-?????(BOILER), CT-?????(COOLING TOWER), F-?????(COOLING FAN), CH-??????(CHILLER), K-?????(COMPRESSOR)
+        - dxf_extract_pump.py           : P-10106,  VP-10117
+1. UI 추출시작 버튼 클릭 -> 
+2. 메인 프로그램 시작 -> 파일 전처리(ezdxf)- 전달 받은 데이터 보유 후
+
+3. 1항에서 작성한 프로그램들에 , 전처리 받은 데이터 전달하여, 5개 프로그램 모두 실행
+
+4. 처리량에 따라 실행이 끝난 서브 프로그램들은 각각의 파일에 결과를 저장하게 프로그램 되어 있으니, , 이것을 메인프로그램이 서브 프로그램들의 종료 상태가 되면, 각각 후처리 과정(데이터베이스 저장 절차)을 진행
--- a/Pipeline/PID_Parser_작업지시서_v3.md
+++ b/Pipeline/PID_Parser_작업지시서_v3.md
--- a/Pipeline/pycache/pid_geometric_extractor.cpython-312.pyc
+++ b/Pipeline/pycache/pid_geometric_extractor.cpython-312.pyc
--- a/Pipeline/pycache/pid_intelligent_mapper.cpython-312.pyc
+++ b/Pipeline/pycache/pid_intelligent_mapper.cpython-312.pyc
--- a/Pipeline/pycache/pid_topology_builder.cpython-312.pyc
+++ b/Pipeline/pycache/pid_topology_builder.cpython-312.pyc
--- a/Pipeline/dxf_extract_plan_revised.md
+++ b/Pipeline/dxf_extract_plan_revised.md
@@ -0,0 +1,84 @@
+# 현재 문제점 분석
+한정된 자원의 하드웨어로 대용량의 일을 한번에 처리하려고 복잡한 프롬프트를 LLM 에게 주어 처리 시간의 지연과, 전달 및 응답 버퍼의 수신 문제발생
+
+## 분산처리 기법 적용 및 로직 플로우
+
+1. 추출시작 버튼 클릭 -> 
+2. 메인 프로그램 시작 -> 파일 전처리(ezdxf)- 전달 받은 데이터 보유 후
+3. 미리 작성된 - 참조 파이썬 프로그램 :test_dxf_extract_pid1.py, ~pid2,py, ~pid3.py (for loop 없애고, 단일 chunk 실행으로 변경), 이 python과 같은 파일을 아래 5가지로 프로그램에 전달 받은 데이터 전달하며, 모두 실행 시킴
+     - INSTRUMENTS 
+        - dxf_extract_transmitter.py    : FIT, FT, LT, PT, TE
+        - dxf_extract_valve.py          : FCV, LCV, TCV, PCV, XV 
+        - dxf_extract_gague.py          : PG, TG, LG
+        - dxf_extract_equipment.py      : C-?????(Distilation Column), T-????(Tank), F-?????(Filter), D-?????(Drum,Condensor),E-?????(Heat Exchanger) B-?????(BOILER), CT-?????(COOLING TOWER), F-?????(COOLING FAN), CH-??????(CHILLER), K-?????(COMPRESSOR)
+        - dxf_extract_pump.py           : P-10106,  VP-10117
+3. 비동기로 실행이 끝난 서브 프로그램들은 각각의 파일에 결과가 저장될 것이고, 이것을 메인프로그램이 서브 프로그램들의 종료에 대하여 각각 후처리 과정(데이터베이스 저장 절차)을 진행
+
+4. 위의 실증예 3개 프로그램 동시 실행시 , KV Cache 최대 사용량 30% 미만, 최대 95 token/sec, 실증됨.
+5. 각각의 max context length = 65536으로 설정할것
+
+
+   
+
+    
+3. INSTRUMENTS 와 SYSTEM TAG (tagname)과의 관계설정
+    - 예1) FICQ-10101.PV = FT-10101, FICQ-10101.OP = FCV-10101, FIQ-6115.PV = FT-6115, TI-6117.PV = TE-6117 
+    - LATER -->예2) P-10101.PV, P-10101.OP , XV-10111.PV, XV-10111.OP(LATER : INT 2BIT, 4BIT, 8BIT ENCODER OUTPUT)
+
+### PLANT RESOURCE , FIELD INSTRUMENTS, EQUIPMENTS MANAGEMENT TABLE 신설
+4. PLANT RESOURCE : FILELD INSTRUMENTS용 데이터베이스 테이블 신설
+    - Instruments Table Column: 
+        TagName, 
+        Type:
+            - Flow Transmitter
+                - (Type_sub) : Coriollis Mass Flowmeter
+                - (Type_sub) : Variable Area Flowmeter
+                - (Type_sub) : Rotameter
+                - (Type_sub) : Magnetic Flowmeter
+                - (Type_sub) : 
+                - (Type_sub) : 
+                - (Type_sub) : 
+                - (Type_sub) : 
+            - Pressure Transmitter
+                - (Type_sub) : Absolute Pressure Transmitter (Vacuum)
+                - (Type_sub) : Gauge Pressure Transmitter
+                - (Type_sub) : DP Transmitter
+            - Level Transmitter 
+                - (Type_sub) : Remote Sealed DP Transmitter
+                - (Type_sub) : Float Level Transmitter
+                - (Type_sub) : 
+            - Temperature Sensor
+                - (Type_sub) : R.T.D (pt100)
+                - (Type_sub) : Thermocople type k
+                - (Type_sub) : 
+                - (Type_sub) : 
+
+            - Control Valve
+                - (Type_sub)    : Globe 2 Way
+                - (Size)        : 25A
+                - (Action)  : Fail Close (Air to Open) / Fail Open (Air to Close)  
+                - (Type_sub) : 
+                - (Type_sub) : 
+            - On-Off Valve
+                - (Type_sub) : 
+                - (Type_sub) : 
+            - Pressure Safety Vavle
+                - (Type_sub) : 
+            - Pressure Relief Valve,
+                - (Type_sub) : 
+        Type_sub, 
+        Range_Max, 
+        Range_Calibrated,
+        Model_No, 
+        Installed_at, 
+        Repaired_at, 
+        Repair_history,
+        Last_Calibrated_at, 
+        Recommended_Spare_parts
+        Doc_No,
+        TagName, 
+        DataSheet,
+        Drawings, 
+        NamePlate, : (photo), 
+        Manual_No : (pdf file, numbering rule needed)
+        
--- a/Pipeline/pid_analysis_engine.py
+++ b/Pipeline/pid_analysis_engine.py
@@ -0,0 +1,104 @@
+import networkx as nx
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import Dict, List, Optional
+import uvicorn
+import json
+import os
+
+app = FastAPI(title="P&ID Analysis Engine")
+
+# 전역 변수로 그래프 및 매핑 데이터 로드
+TOPOLOGY_FILE = "futurePlan/End-to-End P&ID Graph Pipeline/pid_graph_topology.json"
+MAPPING_FILE = "futurePlan/End-to-End P&ID Graph Pipeline/pid_final_mapping.json"
+
+topology_graph = nx.DiGraph()
+tag_mapping = {}
+
+def load_data():
+    global topology_graph, tag_mapping
+    try:
+        if os.path.exists(TOPOLOGY_FILE):
+            with open(TOPOLOGY_FILE, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                # NetworkX 그래프 생성
+                for node in data.get('nodes', []):
+                    topology_graph.add_node(node['id'], **node)
+                for edge in data.get('edges', []):
+                    topology_graph.add_edge(edge['source'], edge['target'], **edge)
+            print(f"Successfully loaded topology from {TOPOLOGY_FILE}")
+        
+        if os.path.exists(MAPPING_FILE):
+            with open(MAPPING_FILE, 'r', encoding='utf-8') as f:
+                tag_mapping = json.load(f)
+            print(f"Successfully loaded mapping from {MAPPING_FILE}")
+            
+    except Exception as e:
+        print(f"Error loading data: {e}")
+
+@app.on_event("startup")
+async def startup_event():
+    load_data()
+
+class ImpactRequest(BaseModel):
+    nodeId: str
+
+class ImpactResult(BaseModel):
+    startNode: str
+    impactedNodes: Dict[str, int] # { nodeId: depth }
+    path: List[List[str]]
+
+def get_propagation_path_with_flow(graph, start_node):
+    """
+    엣지의 방향성(flow_direction)과 상태(valve_status)를 고려한 실제 영향 전파 경로 추출
+    """
+    if start_node not in graph:
+        return {}
+
+    # 1. 유효한 엣지만 필터링 (방향이 forward이고 밸브가 open인 경로)
+    # 실제 데이터에 flow_direction이나 valve_status가 없을 경우를 대비해 기본값 설정
+    valid_edges = [
+        (u, v) for u, v, d in graph.edges(data=True) 
+        if d.get('flow_direction', 'forward') == 'forward' 
+        and d.get('valve_status', 'open') == 'open'
+    ]
+    
+    filtered_graph = nx.DiGraph()
+    filtered_graph.add_edges_from(valid_edges)
+    
+    # 2. 전파 단계별 노드 추출 (BFS)
+    try:
+        propagation_levels = nx.single_source_shortest_path_length(filtered_graph, start_node)
+        return propagation_levels
+    except Exception:
+        return {}
+
+@app.get("/impact/{nodeId}")
+async def analyze_impact(nodeId: str):
+    if nodeId not in topology_graph:
+        raise HTTPException(status_code=404, detail=f"Node {nodeId} not found in topology")
+    
+    impact_map = get_propagation_path_with_flow(topology_graph, nodeId)
+    
+    # 경로 추출 (시각화를 위해 간단하게 모든 영향 노드로의 최단 경로 포함)
+    paths = []
+    for target in impact_map.keys():
+        if target != nodeId:
+            try:
+                path = nx.shortest_path(topology_graph, source=nodeId, target=target)
+                paths.append(path)
+            except nx.NetworkXNoPath:
+                continue
+
+    return {
+        "startNode": nodeId,
+        "impactedNodes": impact_map,
+        "paths": paths
+    }
+
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "nodes": topology_graph.number_of_nodes(), "edges": topology_graph.number_of_edges()}
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/Pipeline/pid_final_mapping.json
+++ b/Pipeline/pid_final_mapping.json
@@ -0,0 +1 @@
+[]
--- a/Pipeline/pid_geometric_extractor.py
+++ b/Pipeline/pid_geometric_extractor.py
@@ -0,0 +1,188 @@
+import ezdxf
+import re
+import json
+from typing import List, Optional, Tuple, Union
+from pydantic import BaseModel, Field
+from shapely.geometry import box, Point
+
+# --- Data Models ---
+
+class BoundingBox(BaseModel):
+    min_x: float
+    min_y: float
+    max_x: float
+    max_y: float
+    center: Tuple[float, float]
+
+class GeometricEntity(BaseModel):
+    entity_id: str
+    entity_type: str  # TEXT, MTEXT, LINE, LWPOLYLINE, CIRCLE, ARC
+    layer: str
+    bbox: BoundingBox
+    raw_value: Optional[str] = None
+    clean_value: Optional[str] = None
+    coordinates: List[Union[Tuple[float, float], List[float]]] = Field(default_factory=list)
+    properties: dict = Field(default_factory=dict)
+
+# --- Extractor Implementation ---
+
+class PidGeometricExtractor:
+    def __init__(self, file_path: str):
+        try:
+            self.doc = ezdxf.readfile(file_path)
+            self.msp = self.doc.modelspace()
+        except Exception as e:
+            raise IOError(f"Failed to load DXF file: {e}")
+
+    def clean_text(self, text: str) -> str:
+        """
+        DXF 특수 제어 문자 및 MTEXT 포맷팅을 제거하여 정제된 텍스트 반환.
+        """
+        if not text:
+            return ""
+        
+        # 1. MTEXT 포맷팅 및 제어 문자 제거 (\P, \W, \L, \A, \C, \H, \S, \T 등)
+        text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text)
+        
+        # 2. 중괄호 { } 제거
+        text = re.sub(r'[\{\}]', ' ', text)
+        
+        # 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered)
+        text = re.sub(r'%%[U|O|S|R]', ' ', text)
+        
+        # 4. 불필요한 특수 기호 및 반복되는 공백 정제
+        text = re.sub(r'\s+', ' ', text).strip()
+        
+        return text
+
+    def get_bbox(self, entity) -> Optional[BoundingBox]:
+        """
+        엔티티 타입별로 동적인 Bounding Box를 계산하여 반환.
+        """
+        try:
+            if entity.dxftype() == 'TEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.height
+                # 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6)
+                width = len(entity.dxf.text) * h * 0.6
+                return self._create_bbox(p.x, p.y, p.x + width, p.y + h)
+            
+            elif entity.dxftype() == 'MTEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5
+                w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6
+                return self._create_bbox(p.x, p.y, p.x + w, p.y + h)
+            
+            elif entity.dxftype() == 'LINE':
+                start = entity.dxf.start
+                end = entity.dxf.end
+                return self._create_bbox(
+                    min(start.x, end.x), min(start.y, end.y),
+                    max(start.x, end.x), max(start.y, end.y)
+                )
+            
+            elif entity.dxftype() == 'LWPOLYLINE':
+                points = entity.get_points()
+                if not points: return None
+                xs = [p[0] for p in points]
+                ys = [p[1] for p in points]
+                return self._create_bbox(min(xs), min(ys), max(xs), max(ys))
+            
+            elif entity.dxftype() in ('CIRCLE', 'ARC'):
+                center = entity.dxf.center
+                radius = entity.dxf.radius
+                return self._create_bbox(
+                    center.x - radius, center.y - radius,
+                    center.x + radius, center.y + radius
+                )
+                
+        except Exception as e:
+            print(f"Error calculating bbox for {entity.dxftype()} ({entity.dxf.handle}): {e}")
+        return None
+
+    def _create_bbox(self, min_x, min_y, max_x, max_y) -> BoundingBox:
+        return BoundingBox(
+            min_x=min_x,
+            min_y=min_y,
+            max_x=max_x,
+            max_y=max_y,
+            center=((min_x + max_x) / 2, (min_y + max_y) / 2)
+        )
+
+    def extract_and_save(self, output_path: str):
+        """
+        기하학적 데이터를 추출하여 JSON 파일로 저장.
+        """
+        results = []
+        for entity in self.msp:
+            bbox_obj = self.get_bbox(entity)
+            if not bbox_obj:
+                continue
+            
+            raw_text = ""
+            if entity.dxftype() == 'TEXT':
+                raw_text = entity.dxf.text
+            elif entity.dxftype() == 'MTEXT':
+                raw_text = entity.text
+            
+            # 좌표 추출 (3D 좌표를 2D로 변환)
+            coords = []
+            if hasattr(entity, 'get_points'):
+                # ezdxf의 get_points()는 (x, y, z) 튜플 리스트를 반환함
+                coords = [(p[0], p[1]) for p in entity.get_points()]
+            elif entity.dxftype() == 'LINE':
+                coords = [(entity.dxf.start.x, entity.dxf.start.y), (entity.dxf.end.x, entity.dxf.end.y)]
+            elif entity.dxftype() in ('CIRCLE', 'ARC'):
+                coords = [(entity.dxf.center.x, entity.dxf.center.y)]
+
+            entity_data = GeometricEntity(
+                entity_id=entity.dxf.handle,
+                entity_type=entity.dxftype(),
+                layer=entity.dxf.layer,
+                bbox=bbox_obj,
+                raw_value=raw_text if raw_text else None,
+                clean_value=self.clean_text(raw_text) if raw_text else None,
+                coordinates=coords,
+                properties={
+                    "color": entity.dxf.color,
+                    "lineweight": entity.dxf.lineweight if hasattr(entity.dxf, 'lineweight') else None,
+                }
+            )
+            results.append(entity_data.model_dump())
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=4)
+        
+        return output_path
+
+# --- Proximity Utilities ---
+
+def is_near(bbox_a: BoundingBox, bbox_b: BoundingBox, threshold=5.0) -> bool:
+    """
+    두 Bounding Box 간의 최단 거리가 임계값 이내인지 확인.
+    shapely box를 사용하여 거리 계산.
+    """
+    box_a = box(bbox_a.min_x, bbox_a.min_y, bbox_a.max_x, bbox_a.max_y)
+    box_b = box(bbox_b.min_x, bbox_b.min_y, bbox_b.max_x, bbox_b.max_y)
+    return box_a.distance(box_b) <= threshold
+
+def is_inside(point: Tuple[float, float], bbox: BoundingBox) -> bool:
+    """
+    특정 점이 Bounding Box 내부에 있는지 확인.
+    """
+    return (bbox.min_x <= point[0] <= bbox.max_x) and (bbox.min_y <= point[1] <= bbox.max_y)
+
+# --- Execution Block ---
+
+if __name__ == "__main__":
+    # 테스트 파일 경로 (환경에 맞게 수정)
+    input_dxf = "futurePlan/End-to-End P&ID Graph Pipeline/No-10_Plant_PID.dxf"
+    output_json = "futurePlan/End-to-End P&ID Graph Pipeline/shared_geo_data.json"
+    
+    print(f"Starting extraction from {input_dxf}...")
+    try:
+        extractor = PidGeometricExtractor(input_dxf)
+        saved_path = extractor.extract_and_save(output_json)
+        print(f"Successfully saved geometric data to {saved_path}")
+    except Exception as e:
+        print(f"Extraction failed: {e}")
--- a/Pipeline/pid_graph_topology.json
+++ b/Pipeline/pid_graph_topology.json
--- a/Pipeline/pid_intelligent_mapper.py
+++ b/Pipeline/pid_intelligent_mapper.py
@@ -0,0 +1,126 @@
+import networkx as nx
+import asyncio
+import json
+from typing import List, Optional, Dict, Any, Tuple
+from pydantic import BaseModel, Field
+from rapidfuzz import process, fuzz
+from openai import AsyncOpenAI
+
+# --- 응답 구조화를 위한 Pydantic 모델 ---
+class MappingResult(BaseModel):
+    resolved_tag: str = Field(..., description="The final mapped system tag")
+    reason: str = Field(..., description="Reason for this mapping based on context")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score from 0 to 1")
+
+class IntelligentMapper:
+    def __init__(self, graph: nx.Graph, system_tags: List[str], api_key: str = None):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+        self.client = AsyncOpenAI(api_key=api_key) if api_key else None
+
+    def get_node_context(self, node_id: str) -> str:
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        if not self.graph.has_node(node_id):
+            return "Node not found in graph"
+            
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            val = attr.get('value', n)
+            typ = attr.get('type', 'Unknown')
+            context.append(f"Connected to {val} (Type: {typ})")
+        
+        return ", ".join(context) if context else "No connected neighbors"
+
+    async def _resolve_generic(self, node_id: str, category_prompt: str) -> MappingResult:
+        """공통 매핑 로직 (비동기 + 구조화 응답)"""
+        if not self.client:
+            return MappingResult(resolved_tag="UNKNOWN", reason="API Key not provided", confidence=0.0)
+
+        # Phase 2에서 'value'에 clean_value가 저장됨
+        node_data = self.graph.nodes.get(node_id, {})
+        tag_text = node_data.get('value', '')
+        
+        # 1차 후보 추출 (RapidFuzz)
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        context = self.get_node_context(node_id)
+        
+        prompt = f"""
+        {category_prompt}
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        반드시 다음 JSON 형식으로만 응답하세요:
+        {{
+            "resolved_tag": "태그명 또는 UNKNOWN",
+            "reason": "매핑 이유",
+            "confidence": 0.0~1.0
+        }}
+        """
+        
+        try:
+            response = await self.client.chat.completions.create(
+                model="gpt-4-turbo",
+                messages=[{"role": "user", "content": prompt}],
+                response_format={ "type": "json_object" } # JSON 모드 강제
+            )
+            raw_content = response.choices[0].message.content
+            # Pydantic을 통한 유효성 검사
+            return MappingResult.model_validate_json(raw_content)
+        except Exception as e:
+            print(f"Error resolving node {node_id}: {e}")
+            return MappingResult(resolved_tag="UNKNOWN", reason=f"Error: {str(e)}", confidence=0.0)
+
+    # --- 전문화된 Worker 함수들 ---
+    async def extract_transmitters(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 계측기 전문 엔지니어입니다. 특히 Pressure/Flow/Level Transmitter 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+    async def extract_valves(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 밸브 및 액추에이터 전문 엔지니어입니다. 밸브의 개폐 상태 및 제어 태그 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+    async def extract_equipment(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 공정 설비 전문 엔지니어입니다. 펌프, 탱크, 열교환기 등의 메인 설비 태그 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+def validate_mapping(resolved_tag: str, symbol_type: str, tag_metadata: Dict[str, Any]) -> Tuple[bool, str]:
+    """심볼 타입과 실제 태그 메타데이터의 엄격한 일치 여부 검증"""
+    if resolved_tag == "UNKNOWN":
+        return False, "Tag not resolved"
+
+    # 단순 키워드가 아닌 허용 단위(Unit) 정의
+    unit_map = {
+        "Pressure Transmitter": ["bar", "psi", "kPa", "Pa"],
+        "Flow Meter": ["m3/h", "lpm", "kg/h"],
+        "Temperature Sensor": ["°C", "C", "K", "°F"]
+    }
+    
+    actual_unit = tag_metadata.get('unit', '').strip()
+    allowed_units = unit_map.get(symbol_type, [])
+    
+    # 1. 단위 일치 확인 (최우선)
+    if actual_unit and actual_unit in allowed_units:
+        return True, "Unit Match"
+    
+    # 2. 단위가 없는 경우 설명(Description) 기반 2차 검증
+    actual_desc = tag_metadata.get('description', '').lower()
+    expected_keywords = {
+        "Pressure Transmitter": ["pressure", "press"],
+        "Flow Meter": ["flow", "flowrate"],
+        "Temperature Sensor": ["temp", "temperature"]
+    }
+    
+    keywords = expected_keywords.get(symbol_type, [])
+    if any(kw in actual_desc for kw in keywords):
+        return True, "Description Match (Unit Missing)"
+        
+    return False, "Mismatch: Symbol type and Tag metadata do not align"
--- a/Pipeline/pid_topology_builder.py
+++ b/Pipeline/pid_topology_builder.py
@@ -0,0 +1,190 @@
+import networkx as nx
+from shapely.geometry import box, Point, LineString
+import json
+from typing import List, Dict, Any, Optional, Tuple
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data: List[Dict[str, Any]], all_extracted_tags: Optional[List[Dict[str, Any]]] = None, config: Optional[Dict[str, float]] = None):
+        """
+        - geometric_data: Phase 1에서 추출된 기하학적 데이터 (List of dicts)
+        - all_extracted_tags: 통합된 태그 리스트
+        - config: {'dist_threshold': 50.0, 'tag_threshold': 100.0} 등 설정값
+        """
+        self.data = geometric_data
+        self.all_tags = all_extracted_tags if all_extracted_tags else []
+        
+        if config:
+            self.config = config
+        else:
+            try:
+                with open('futurePlan/End-to-End P&ID Graph Pipeline/topology_config.json', 'r') as f:
+                    self.config = json.load(f)
+            except Exception:
+                self.config = {'dist_threshold': 50.0, 'tag_threshold': 100.0, 'merge_threshold': 2.0}
+        
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 노드 병합 및 추가 (Merging)
+        self.merged_data = self._merge_nodes()
+        for item in self.merged_data:
+            bbox_vals = item['bbox']
+            bbox_geom = box(bbox_vals['min_x'], bbox_vals['min_y'], bbox_vals['max_x'], bbox_vals['max_y'])
+            
+            self.G.add_node(item['entity_id'],
+                           type=item['entity_type'],
+                           bbox=bbox_geom,
+                           value=item.get('clean_value'),
+                           layer=item.get('layer'))
+
+        # 2. 분산 추출된 태그 통합 및 노드 추가
+        for tag in self.all_tags:
+            bbox_vals = tag['bbox']
+            bbox_geom = box(bbox_vals['min_x'], bbox_vals['min_y'], bbox_vals['max_x'], bbox_vals['max_y'])
+            self.G.add_node(tag['entity_id'],
+                           type='TEXT',
+                           bbox=bbox_geom,
+                           value=tag.get('clean_value') or tag.get('tagName'))
+
+        # 3. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] not in ['TEXT', 'LINE', 'LWPOLYLINE']]
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 4. 배관 기반 물리적 연결 (Pipe) [개선: Proximity 기반]
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'LWPOLYLINE']]
+        for line_id in lines:
+            # 저장된 merged_data에서 coordinates 찾기
+            original_item = next((item for item in self.merged_data if item['entity_id'] == line_id), None)
+            if not original_item:
+                original_item = next((item for item in self.data if item['entity_id'] == line_id), None)
+            
+            if not original_item or not original_item.get('coordinates'):
+                continue
+                
+            coords = original_item['coordinates']
+            line_geom = LineString(coords)
+            
+            connected_nodes = []
+            for eq_id in equipments:
+                eq_bbox = self.G.nodes[eq_id]['bbox']
+                # End-point뿐만 아니라 Line 전체와 BBox 간의 최단 거리 측정
+                if line_geom.distance(eq_bbox) < self.config['dist_threshold']:
+                    connected_nodes.append(eq_id)
+            
+            # 중복 제거
+            connected_nodes = list(set(connected_nodes))
+            
+            if len(connected_nodes) >= 2:
+                # 방향성 추론 (단순화: 첫 번째 -> 두 번째)
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+            elif len(connected_nodes) == 1:
+                # 단일 연결 노드 처리 (나중에 분석용)
+                pass
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < self.config['tag_threshold'] else None
+
+    def validate_topology(self):
+        """위상 무결성 검증"""
+        isolated = list(nx.isolates(self.G))
+        return {
+            "isolated_nodes": isolated, 
+            "node_count": self.G.number_of_nodes(), 
+            "edge_count": self.G.number_of_edges()
+        }
+
+    def _merge_nodes(self) -> List[Dict[str, Any]]:
+        """기하학적으로 거의 동일한 노드들을 병합하여 그래프 단순화"""
+        if not self.data:
+            return []
+            
+        merge_threshold = self.config.get('merge_threshold', 2.0)
+        merged = []
+        visited = set()
+
+        for i in range(len(self.data)):
+            if i in visited:
+                continue
+            
+            current = self.data[i]
+            current_bbox = box(*(current['bbox']['min_x'], current['bbox']['min_y'], current['bbox']['max_x'], current['bbox']['max_y']))
+            
+            # 동일 타입이면서 BBox 거리가 매우 가까운 노드들 탐색
+            cluster = [current]
+            visited.add(i)
+            
+            for j in range(i + 1, len(self.data)):
+                if j in visited:
+                    continue
+                
+                target = self.data[j]
+                if target['entity_type'] != current['entity_type']:
+                    continue
+                    
+                target_bbox = box(*(target['bbox']['min_x'], target['bbox']['min_y'], target['bbox']['max_x'], target['bbox']['max_y']))
+                if current_bbox.distance(target_bbox) < merge_threshold:
+                    cluster.append(target)
+                    visited.add(j)
+            
+            # 클러스터 대표값 설정 (첫 번째 노드 기준, BBox는 합집합으로 확장)
+            if len(cluster) > 1:
+                # BBox 합집합 계산
+                min_x = min(c['bbox']['min_x'] for c in cluster)
+                min_y = min(c['bbox']['min_y'] for c in cluster)
+                max_x = max(c['bbox']['max_x'] for c in cluster)
+                max_y = max(c['bbox']['max_y'] for c in cluster)
+                
+                representative = cluster[0].copy()
+                representative['bbox'] = {'min_x': min_x, 'min_y': min_y, 'max_x': max_x, 'max_y': max_y}
+                # 병합된 원본 ID 리스트 저장
+                representative['merged_ids'] = [c['entity_id'] for c in cluster]
+                merged.append(representative)
+            else:
+                merged.append(current)
+                
+        return merged
+
+    def save_graph(self, output_path: str):
+        """그래프 구조를 JSON 형태로 저장 (NetworkX의 node_link_data 활용) {
+            "nodes": [...],
+            "links": [...]
+        }"""
+        from networkx.readwrite import json_graph
+        data = json_graph.node_link_data(self.G)
+        
+        # shapely geometry 객체는 JSON 직렬화가 안 되므로 변환
+        for node in data['nodes']:
+            if 'bbox' in node:
+                bbox = node['bbox']
+                node['bbox'] = {
+                    'min_x': bbox.bounds[0],
+                    'min_y': bbox.bounds[1],
+                    'max_x': bbox.bounds[2],
+                    'max_y': bbox.bounds[3]
+                }
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=4)
+        return output_path
+
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    if start_node not in graph:
+        return []
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
--- a/Pipeline/shared_geo_data.json
+++ b/Pipeline/shared_geo_data.json
--- a/Pipeline/test_pipeline_phase2.py
+++ b/Pipeline/test_pipeline_phase2.py
@@ -0,0 +1,61 @@
+import json
+import sys
+import os
+
+# 경로 설정을 위해 현재 파일의 디렉토리를 sys.path에 추가
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(current_dir)
+
+from pid_geometric_extractor import PidGeometricExtractor
+from pid_topology_builder import PidTopologyBuilder, analyze_impact
+
+def run_pipeline():
+    # 1. 경로 설정 (현재 디렉토리 기준 상대 경로)
+    input_dxf = os.path.join(current_dir, "No-10_Plant_PID.dxf")
+    geo_json_path = os.path.join(current_dir, "shared_geo_data.json")
+    graph_json_path = os.path.join(current_dir, "pid_graph_topology.json")
+
+    print("--- Phase 1: Geometric Extraction ---")
+    try:
+        extractor = PidGeometricExtractor(input_dxf)
+        extractor.extract_and_save(geo_json_path)
+        print(f"Geometric data saved to {geo_json_path}")
+    except Exception as e:
+        print(f"Phase 1 failed: {e}")
+        return
+
+    print("\n--- Phase 2: Topology Modeling ---")
+    try:
+        with open(geo_json_path, 'r', encoding='utf-8') as f:
+            geometric_data = json.load(f)
+        
+        # 테스트를 위해 all_extracted_tags는 빈 리스트로 전달
+        # config를 None으로 전달하여 topology_config.json 설정을 사용하도록 함
+        builder = PidTopologyBuilder(
+            geometric_data=geometric_data,
+            all_extracted_tags=[],
+            config=None
+        )
+        builder.build_graph()
+        
+        # 위상 검증
+        validation = builder.validate_topology()
+        print(f"Topology Validation: {validation}")
+        
+        # 그래프 저장
+        builder.save_graph(graph_json_path)
+        print(f"Graph topology saved to {graph_json_path}")
+        
+        # 영향도 분석 테스트 (노드가 존재하는 경우)
+        if validation['node_count'] > 0:
+            sample_node = list(builder.G.nodes())[0]
+            impacted = analyze_impact(builder.G, sample_node)
+            print(f"Impact analysis for node {sample_node}: {impacted}")
+
+    except Exception as e:
+        print(f"Phase 2 failed: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    run_pipeline()
--- a/Pipeline/test_pipeline_phase3.py
+++ b/Pipeline/test_pipeline_phase3.py
@@ -0,0 +1,134 @@
+import json
+import sys
+import os
+import asyncio
+import networkx as nx
+
+# 경로 설정을 위해 현재 파일의 디렉토리를 sys.path에 추가
+current_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(current_dir)
+
+from pid_geometric_extractor import PidGeometricExtractor
+from pid_topology_builder import PidTopologyBuilder
+from pid_intelligent_mapper import IntelligentMapper, validate_mapping
+
+async def run_full_pipeline():
+    # 1. 경로 설정
+    input_dxf = os.path.join(current_dir, "No-10_Plant_PID.dxf")
+    geo_json_path = os.path.join(current_dir, "shared_geo_data.json")
+    graph_json_path = os.path.join(current_dir, "pid_graph_topology.json")
+    mapping_result_path = os.path.join(current_dir, "pid_final_mapping.json")
+
+    # --- Phase 1: Geometric Extraction ---
+    print("\n--- Phase 1: Geometric Extraction ---")
+    try:
+        extractor = PidGeometricExtractor(input_dxf)
+        extractor.extract_and_save(geo_json_path)
+        print(f"Geometric data saved to {geo_json_path}")
+    except Exception as e:
+        print(f"Phase 1 failed: {e}")
+        return
+
+    # --- Phase 2: Topology Modeling ---
+    print("\n--- Phase 2: Topology Modeling ---")
+    try:
+        with open(geo_json_path, 'r', encoding='utf-8') as f:
+            geometric_data = json.load(f)
+        
+        builder = PidTopologyBuilder(
+            geometric_data=geometric_data, 
+            all_extracted_tags=[], 
+            config={'dist_threshold': 50.0, 'tag_threshold': 100.0}
+        )
+        builder.build_graph()
+        builder.save_graph(graph_json_path)
+        print(f"Graph topology saved to {graph_json_path}")
+    except Exception as e:
+        print(f"Phase 2 failed: {e}")
+        return
+
+    # --- Phase 3: Intelligent Mapping ---
+    print("\n--- Phase 3: Intelligent Mapping ---")
+    try:
+        # 1. 그래프 로드
+        with open(graph_json_path, 'r', encoding='utf-8') as f:
+            graph_data = json.load(f)
+        
+        # NetworkX 그래프 복원 (node_link_data 형식 대응)
+        from networkx.readwrite import json_graph
+        G = json_graph.node_link_graph(graph_data)
+
+        # 2. 시스템 태그 리스트 (실제로는 API나 DB에서 가져와야 함)
+        # 테스트를 위한 가상 태그 리스트
+        system_tags = [
+            "PT-101.PV", "PT-102.PV", "FT-201.PV", "LT-301.PV", 
+            "P-101.STATUS", "P-101.SPEED", "V-101.OPEN", "V-101.CLOSE",
+            "T-101.TEMP", "TK-101.LEVEL"
+        ]
+
+        # 3. 매퍼 초기화 (API Key는 환경변수나 설정파일에서 가져오는 것을 권장)
+        api_key = os.getenv("OPENAI_API_KEY", "your-api-key-here")
+        mapper = IntelligentMapper(G, system_tags, api_key=api_key)
+
+        # 4. 노드 분류 및 매핑 실행
+        nodes = list(G.nodes())
+        transmitter_nodes = [n for n in nodes if "Transmitter" in G.nodes[n].get('type', '')]
+        valve_nodes = [n for n in nodes if "Valve" in G.nodes[n].get('type', '')]
+        equipment_nodes = [n for n in nodes if "Equipment" in G.nodes[n].get('type', '') or "Pump" in G.nodes[n].get('type', '')]
+
+        print(f"Mapping {len(transmitter_nodes)} transmitters, {len(valve_nodes)} valves, {len(equipment_nodes)} equipment...")
+
+        # 비동기 실행
+        results = await asyncio.gather(
+            mapper.extract_transmitters(transmitter_nodes),
+            mapper.extract_valves(valve_nodes),
+            mapper.extract_equipment(equipment_nodes)
+        )
+
+        # 결과 통합
+        final_mapping_raw = {}
+        for res in results:
+            final_mapping_raw.update(res)
+
+        # 5. 검증 및 최종 결과 정리
+        # 가상 메타데이터 (실제로는 시스템에서 조회)
+        mock_metadata = {
+            "PT-101.PV": {"unit": "bar", "description": "Pressure Transmitter 101"},
+            "P-101.STATUS": {"unit": "", "description": "Pump 101 Status"},
+        }
+
+        final_results = []
+        for node_id, mapping in final_mapping_raw.items():
+            symbol_type = G.nodes[node_id].get('type', 'Unknown')
+            tag = mapping.resolved_tag
+            meta = mock_metadata.get(tag, {"unit": "", "description": ""})
+            
+            is_valid, val_msg = validate_mapping(tag, symbol_type, meta)
+            
+            final_results.append({
+                "node_id": node_id,
+                "symbol_type": symbol_type,
+                "original_text": G.nodes[node_id].get('value', ''),
+                "resolved_tag": tag,
+                "confidence": mapping.confidence,
+                "reason": mapping.reason,
+                "validation": {
+                    "is_valid": is_valid,
+                    "message": val_msg
+                }
+            })
+
+        # 6. 결과 저장
+        with open(mapping_result_path, 'w', encoding='utf-8') as f:
+            json.dump(final_results, f, indent=4, ensure_ascii=False)
+        
+        print(f"Final mapping results saved to {mapping_result_path}")
+        print(f"Successfully mapped {len(final_results)} nodes.")
+
+    except Exception as e:
+        print(f"Phase 3 failed: {e}")
+        import traceback
+        traceback.print_exc()
+
+if __name__ == "__main__":
+    asyncio.run(run_full_pipeline())
--- a/Pipeline/topology_config.json
+++ b/Pipeline/topology_config.json
@@ -0,0 +1,5 @@
+{
+    "dist_threshold": 20.0,
+    "tag_threshold": 60.0,
+    "merge_threshold": 2.0
+}
--- a/Pipeline/도면처리-RAG
+++ b/Pipeline/도면처리-RAG
@@ -0,0 +1,718 @@
+
+오전 3:39Claude 응답: 벡터 PDF면 일이 훨씬 수월합니다.벡터 PDF면 일이 훨씬 수월합니다. OCR 안 거치고 텍스트를 정확하게 뽑아낼 수 있어서 정확도 90%+ 가능한 영역입니다.
+권장 파이프라인 (벡터 PDF 한정)
+1단계 - PyMuPDF로 텍스트 + 좌표 추출
+pythonimport fitz  # PyMuPDF
+
+doc = fitz.open("pid.pdf")
+for page in doc:
+    blocks = page.get_text("dict")["blocks"]
+    for b in blocks:
+        for line in b.get("lines", []):
+            for span in line["spans"]:
+                # span["text"], span["bbox"], span["size"], span["font"]
+get_text("dict")가 핵심입니다. 각 텍스트 조각마다 bbox(x0,y0,x1,y1), 폰트, 크기, 회전각까지 다 나옵니다. P&ID에서 수직으로 회전된 라벨도 dir 필드로 잡힙니다.
+2단계 - 도면 영역 vs 타이틀블록 분리
+P&ID는 보통 우측 하단에 타이틀블록(도면번호, 리비전, 작성자)이 있고, 나머지가 도면 영역입니다. 페이지 크기 기준으로 y > 0.85 * page_height and x > 0.7 * page_width 같은 식으로 ROI를 나눠놓으면 나중에 LLM이 "도면번호를 장비태그로 오인"하는 사고를 막을 수 있습니다.
+3단계 - 태그 정규식 1차 필터
+ISA-5.1 + 보통 플랜트에서 쓰는 패턴들:
+pythonTAG_PATTERNS = [
+    r'^[A-Z]{1,4}-\d{3,5}[A-Z]?$',        # P-101A, FT-2034
+    r'^\d{2,3}-[A-Z]{1,4}-\d{3,5}[A-Z]?$', # 10-P-101A (유닛번호 포함)
+    r'^[A-Z]{2,4}\d{3,5}$',                # PT101 (하이픈 없는 변형)
+]
+회사마다 명명 규칙이 다르니 실제 도면 한 장 보고 보정해야 합니다.
+4단계 - 공간 클러스터링으로 "장비-태그-속성" 묶기
+장비 태그(P-101A)와 그 아래/옆에 적힌 설명(CENTRIFUGAL PUMP, 100 GPM @ 50m)은 보통 50~100픽셀 이내에 모여있습니다. scipy의 DBSCAN으로 묶으면 깔끔합니다:
+pythonfrom sklearn.cluster import DBSCAN
+import numpy as np
+
+centers = np.array([[(b[0]+b[2])/2, (b[1]+b[3])/2] for b in bboxes])
+clusters = DBSCAN(eps=80, min_samples=1).fit(centers)
+eps는 도면 스케일 보고 조정하시면 됩니다.
+5단계 - Qwen3-Coder에 구조화 입력
+json{
+  "page": 3,
+  "drawing_no": "P-1234-001",
+  "clusters": [
+    {
+      "id": "c1",
+      "texts": ["P-101A", "CENTRIFUGAL PUMP", "100 GPM", "50m HEAD"],
+      "bbox": [1200, 800, 1350, 920]
+    },
+    {
+      "id": "c2", 
+      "texts": ["V-301", "STORAGE TANK", "5000L"],
+      "bbox": [400, 600, 580, 780]
+    }
+  ]
+}
+프롬프트에서 "각 cluster를 하나의 장비로 보고 tag, equipment_type, specs 필드로 정규화하라"고 시키면 환각 거의 없이 잘 뽑습니다.
+추가 팁
+라인/심볼은 별도 트랙으로: page.get_drawings()로 벡터 라인/원/사각형도 추출됩니다. 펌프 심볼(원형) 위치와 텍스트 위치를 매칭하면 "이 텍스트가 어떤 심볼에 붙어있는지"까지 알 수 있는데, 이건 phase 2로 미루시는 걸 권장합니다. 텍스트만으로도 충분히 가치 있는 결과가 나옵니다.
+좌표계 주의: PyMuPDF는 좌상단이 (0,0)이고 y가 아래로 증가합니다. CAD 출신이면 헷갈리실 수 있어서 미리 말씀드립니다.
+검증 데이터 만들기: 도면 5~10장을 수동으로 라벨링해놓고 추출 결과랑 비교하는 회귀 테스트를 만들어두시면, 나중에 다른 EPC사 도면 들어왔을 때 정규식이나 eps 값 튜닝 빠르게 됩니다.
+RAG 연결 시: 추출된 {tag, equipment_type, specs, drawing_no, page} 구조를 그대로 메타데이터로 쓰면 "10-P-101A 펌프 사양 알려줘" 같은 쿼리에 정확히 답할 수 있습니다. 청크는 텍스트 덩어리가 아니라 cluster 단위로 만드는 게 핵심입니다.
+C# 운영 환경 고려하면, 이 전처리 파이프라인은 Python FastAPI로 띄우고 .NET OPC UA 쪽에서 HTTP로 호출하는 구조를 추천드립니다. PyMuPDF는 AGPL이라 상용 배포 시 라이선스 확인하시거나 Artifex 상용 라이선스 구매 옵션도 있다는 점은 미리 체크해두시면 좋습니다.
+
+
+
+서버 잘 구성하셨네요. 그런데 결론부터 말씀드리면 여기에 그대로 PDF 전처리를 붙이는 건 비추천입니다. 이유와 권장 구조를 짚어드리겠습니다.
+현재 extract_pid_tags의 문제점
+extract_pid_tags(text, source_type)는 이미 텍스트가 추출된 상태를 가정합니다. 즉 누군가가 PDF에서 텍스트를 뽑아서 문자열로 넘겨야 하는데, 이게 가장 어려운 부분입니다. 게다가:
+
+좌표 정보가 사라짐: 문자열 한 덩어리로 받으니 "P-101A 옆에 CENTRIFUGAL PUMP가 있다"는 공간 관계가 날아갑니다. LLM은 그냥 텍스트 뭉치 안에서 패턴 매칭하는 수준이 됩니다.
+12,000자 제한: 큰 P&ID 한 장에서 텍스트만 뽑아도 이걸 넘기 쉽고, 여러 장이면 답 없습니다.
+환각 위험: 좌표/구조 없이 텍스트만 보면 모델이 태그를 잘못 묶거나 만들어냅니다.
+
+권장 구조: 별도 도구로 분리
+이 MCP 서버에 PDF 파싱 도구를 추가하시되, 책임을 나누는 게 좋습니다:
+parse_pid_pdf(pdf_path)              ← 새로 추가 (PyMuPDF로 좌표+텍스트 추출)
+    ↓ 구조화된 JSON (clusters)
+extract_pid_tags_structured(clusters) ← 기존 extract_pid_tags 개선
+    ↓ 태그 리스트
+match_pid_tags(pid_tags, experion_tags) ← 이건 그대로 좋음
+추가할 도구 (기존 server.py에 그대로 붙이시면 됩니다)
+python# ── P&ID PDF 파서 (벡터 PDF 전용) ─────────────────────────────────────────────
+
+@mcp.tool()
+def parse_pid_pdf(pdf_path: str, cluster_eps: float = 80.0) -> str:
+    """벡터 P&ID PDF에서 텍스트 + 좌표를 추출하고 공간 클러스터링합니다.
+    
+    CAD에서 플롯된 벡터 PDF 전용. 스캔본은 별도 OCR 필요.
+    
+    Args:
+        pdf_path:    PDF 파일 절대 경로
+        cluster_eps: DBSCAN 거리 임계값(픽셀). 도면 스케일에 따라 조정.
+    
+    Returns:
+        JSON: { success, pages: [{page, drawing_no, clusters: [{id, texts, bbox}]}] }
+    """
+    try:
+        import fitz
+        from sklearn.cluster import DBSCAN
+        import numpy as np
+        import re as _re
+        
+        doc = fitz.open(pdf_path)
+        pages_out = []
+        
+        for page_idx, page in enumerate(doc):
+            spans = []
+            for block in page.get_text("dict")["blocks"]:
+                for line in block.get("lines", []):
+                    for span in line.get("spans", []):
+                        txt = span["text"].strip()
+                        if txt:
+                            spans.append({
+                                "text": txt,
+                                "bbox": list(span["bbox"]),
+                                "size": span["size"],
+                                "dir":  list(line.get("dir", [1, 0])),  # 회전 감지
+                            })
+            
+            if not spans:
+                pages_out.append({"page": page_idx + 1, "drawing_no": None, "clusters": []})
+                continue
+            
+            # 타이틀블록(우측 하단) 분리
+            pw, ph = page.rect.width, page.rect.height
+            title_spans = [s for s in spans 
+                          if s["bbox"][0] > pw * 0.7 and s["bbox"][1] > ph * 0.85]
+            drawing_no = None
+            for s in title_spans:
+                # 도면번호 패턴 (예: P-1234-001, PID-001)
+                if _re.match(r'^[A-Z&]+-?\d+(-\d+)*$', s["text"]):
+                    drawing_no = s["text"]
+                    break
+            
+            drawing_spans = [s for s in spans if s not in title_spans]
+            if not drawing_spans:
+                pages_out.append({"page": page_idx + 1, "drawing_no": drawing_no, "clusters": []})
+                continue
+            
+            # 공간 클러스터링
+            centers = np.array([
+                [(s["bbox"][0] + s["bbox"][2]) / 2, (s["bbox"][1] + s["bbox"][3]) / 2]
+                for s in drawing_spans
+            ])
+            labels = DBSCAN(eps=cluster_eps, min_samples=1).fit_predict(centers)
+            
+            clusters = {}
+            for span, lbl in zip(drawing_spans, labels):
+                clusters.setdefault(lbl, []).append(span)
+            
+            cluster_list = []
+            for lbl, members in clusters.items():
+                xs = [m["bbox"][0] for m in members] + [m["bbox"][2] for m in members]
+                ys = [m["bbox"][1] for m in members] + [m["bbox"][3] for m in members]
+                cluster_list.append({
+                    "id": f"p{page_idx+1}c{lbl}",
+                    "texts": [m["text"] for m in members],
+                    "bbox": [min(xs), min(ys), max(xs), max(ys)],
+                })
+            
+            pages_out.append({
+                "page": page_idx + 1,
+                "drawing_no": drawing_no,
+                "clusters": cluster_list,
+            })
+        
+        doc.close()
+        return json.dumps({"success": True, "pages": pages_out}, ensure_ascii=False)
+    
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"PDF 파싱 실패: {e}"}, ensure_ascii=False)
+그리고 기존 extract_pid_tags는 두 가지 입력을 다 받게 살짝 손보시면 됩니다. 클러스터 입력일 때는 LLM에 이렇게 넘기세요:
+python# extract_pid_tags 안에서 source_type == "clusters"일 때
+user_msg = (
+    "Each cluster represents one equipment/instrument with nearby texts grouped by spatial proximity.\n"
+    "Identify the tag (matching ISA-5.1 patterns) and the equipment description from each cluster.\n\n"
+    f"Clusters:\n{json.dumps(clusters, ensure_ascii=False, indent=2)}"
+)
+실제 워크플로우
+C#에서 이렇게 호출하시게 됩니다:
+1. parse_pid_pdf("C:/drawings/unit10.pdf")
+   → { pages: [{drawing_no: "P-1234-001", clusters: [...]}] }
+
+2. extract_pid_tags(clusters_json, "clusters")  
+   → { tags: [{tagNo: "P-101A", equipmentName: "CENTRIFUGAL PUMP", ...}] }
+
+3. match_pid_tags(pid_tags, experion_tags)  ← 이미 있음
+   → { mappings: [{pidTag: "P-101A", experionTag: "p-101a.pv", ...}] }
+
+4. (선택) 결과를 RAG에 인덱싱 — 메타데이터에 drawing_no, page, bbox 포함
+의존성 추가
+bashpip install pymupdf scikit-learn numpy
+PyMuPDF는 AGPL 라이선스라 상용 배포 시 주의하셔야 합니다. 사내 시스템이면 보통 문제없지만, 외부 고객사 납품이면 Artifex 상용 라이선스 검토하시거나 pdfplumber(MIT)로 대체하는 것도 방법입니다. pdfplumber도 chars 좌표 다 줍니다.
+짚어드릴 만한 디테일 두 가지
+1. _DB_SCHEMA에 P&ID 매핑 테이블도 추가하시면 좋을 듯: pid_tag_mapping 같은 테이블을 만들어서 (drawing_no, pid_tag, experion_tag, bbox, page) 저장하시면, 나중에 "이 펌프 트렌드 보여줘" 같은 자연어 쿼리가 P&ID → Experion 태그 → history_table 쿼리로 자연스럽게 이어집니다.
+2. RAG 컬렉션 추가 고려: 현재 experion-opc-docs처럼 pid-drawings 컬렉션을 하나 더 만들어서 클러스터 단위로 임베딩해두시면, "원심펌프 P-101A 사양" 같은 질문에 도면 위치까지 답해줄 수 있습니다.
+요약하면, 이 MCP 서버에 붙이는 건 정답이지만 extract_pid_tags에 raw 텍스트 던지지 마시고, parse_pid_pdf 도구를 추가해서 좌표 정보 살린 클러스터를 LLM에 넘기는 구조로 가시는 걸 강력히 권장드립니다.
+
+
+
+SYMBOL & LENGEND 가 보통은 별도로 있는데 그걸 학습하게 하는것은 ?
+
+오전 4:01
+Claude 응답: 좋은 지적입니다.
+좋은 지적입니다. 실무에서 P&ID 첫 1~2장에 항상 SYMBOL & LEGEND 시트가 있고, 회사/프로젝트마다 심볼 모양과 약어가 달라서 이걸 활용하는 건 정확도에 결정적입니다.
+
+다만 "학습"이라는 표현은 신중하게 봐야 합니다. 현재 구조에서 가능한/불가능한 것을 짚어보겠습니다.
+
+가능한 방식 3가지 (난이도 순)
+방식 1: Few-shot 컨텍스트 주입 (즉시 가능, 권장)
+LEGEND 시트에서 추출한 (약어 → 풀네임) 매핑을 LLM 프롬프트에 동적으로 끼워넣는 방식입니다. 진짜 학습이 아니라 in-context learning이지만, 효과는 충분합니다.
+
+LEGEND 시트 파싱
+  ↓
+{"FIC": "Flow Indicating Controller", 
+ "PIT": "Pressure Indicating Transmitter", ...}
+  ↓
+extract_pid_tags 호출 시 system 프롬프트에 주입
+  ↓
+LLM이 이 프로젝트 전용 약어로 정확히 해석
+장점: 파인튜닝 없이 즉시 효과. 프로젝트마다 LEGEND가 달라도 자동 대응. 단점: 프롬프트 길이 증가 (보통 20~50개 약어면 1~2KB라 무시 가능).
+
+방식 2: 심볼 형상 매칭 (phase 2 영역)
+LEGEND 시트의 심볼 도형(사각형+원, 단일 원, 마름모 등)을 page.get_drawings()로 추출 → 도면 페이지의 도형과 매칭. 이게 진짜 "DCS vs 현장계기" 정확 판별의 정공법입니다.
+
+장점: 텍스트 없이도 심볼 종류 판별 가능. 단점: 도형 비교 알고리즘 필요 (벡터 path 정규화, 회전/스케일 보정). 개발 공수 큼.
+
+방식 3: Vision 모델 파인튜닝 (지금은 비추)
+YOLO나 Pix2Struct를 LEGEND 시트로 파인튜닝. 실무에서는 잘 안 씁니다 — 회사마다 LEGEND가 다른데 매번 파인튜닝하면 운영 부담이 큽니다.
+
+권장: 방식 1을 phase 1.5로 추가
+phase 1(기본 파싱) 끝나고 phase 2(심볼 형상) 가기 전 단계로 넣으면 적절합니다. 작업지시서에 추가할 항목은:
+
+LEGEND 시트 자동 감지: 페이지 텍스트에 LEGEND, SYMBOL, ABBREVIATION 같은 키워드가 있고 표 형태 텍스트가 많은 페이지를 LEGEND로 식별.
+LEGEND 파서: 좌측 컬럼(약어) + 우측 컬럼(설명) 패턴으로 좌표 기반 매칭. 이건 P&ID 본문 클러스터링과 다른 알고리즘이 필요합니다 (좌→우 가로 페어링).
+MCP 도구 추가:
+extract_pid_legend(pdf_path) — LEGEND 페이지에서 약어 사전 추출
+extract_pid_tags에 legend: dict 옵션 파라미터 추가
+RAG 활용 옵션: 추출한 LEGEND를 Qdrant에 별도 컬렉션(pid-legend-{project_id})으로 저장하면, 같은 프로젝트의 다른 도면 처리 시 재사용 가능. 이게 가장 깔끔한 운영 방식입니다.
+실무 팁
+LEGEND 시트가 표(table) 형태가 아닌 자유 배치인 경우가 많습니다 (심볼 그림 + 옆에 설명). 이 경우 단순 좌→우 페어링이 안 되고, 다음 같은 휴리스틱이 필요합니다:
+
+심볼 도형의 bbox를 잡고
+그 도형 우측 또는 하단의 가장 가까운 텍스트 블록을 설명으로 페어링
+약어는 보통 도형 안에 있으므로 도형 내부 텍스트 우선
+작업지시서에 방식 1(LEGEND 텍스트 추출 + 프롬프트 주입)을 phase 1.5로 추가할까요? 아니면 phase 1에 통합해서 한 번에 작업시킬까요?
+
+추가로 한 가지 결정해주실 게 있습니다 — LEGEND를 도면 단위로 매번 파싱할지, 프로젝트 단위로 한 번 파싱 후 RAG에 캐시할지에 따라 구조가 달라집니다. 실무 운영 패턴(같은 프로젝트의 도면이 여러 장씩 들어오는지) 알려주시면 그에 맞게 설계해드리겠습니다.
+
+오전 4:06
+Claude 응답: PDF 컨텐츠 일부만 컨텍스트로 들어왔네요.
+PDF 컨텐츠 일부만 컨텍스트로 들어왔네요. 실제 파일을 직접 봐야 정확히 판단할 수 있으니 읽어보겠습니다.
+
+
+파일 조회함, 명령 실행함
+파일 조회함, 명령 실행함
+1페이지짜리 A4 PDF네요. 시각적으로 봐야 심볼 모양과 레이아웃을 파악할 수 있습니다.
+
+
+텍스트 레이아웃 확인
+텍스트 레이아웃 확인
+텍스트 추출이 거의 안 됩니다. 도면이 거의 다 벡터 그래픽으로 그려져 있어서 텍스트가 폰트 임베딩 없이 path로 변환된 것 같습니다. 직접 페이지를 시각화해서 봐야 합니다.
+
+
+명령 실행함, 파일 조회함
+명령 실행함, 파일 조회함
+좋은 자료네요. 90도 회전된 도면이긴 한데(공장 도면 흔한 형태) 내용은 다 보입니다. 이걸 보고 정확히 판단해보겠습니다.
+
+확인된 LEGEND 시트 구조
+이 도면은 **신원리파인드퓨얼(SHINWON REFINED FUEL)**의 표준 P&ID 심볼/레전드 시트로, 다음 섹션들로 구성되어 있습니다:
+
+텍스트 기반 (표/사전 형태) — 자동 추출 쉬움:
+
+INSTRUMENT IDENTIFICATION TABLE: First Letter / Succeeding Letters (A=Analysis, F=Flow Rate, ...)
+PIPE LINE NUMBER: 라인번호 명명규칙 (예: P-10107-500A-F1-H 100)
+INSTRUMENT NUMBER DESIGNATION
+FLUID NAME ABBREVIATION (P.CHE=Process Fluid, CWS=Cooling Water Supply, ...)
+PIPING CLASS & MATERIAL (S1=SPPS380, F2=STS304, ...)
+INSULATION CODE (H=Heat Conservation, P=Personnel Protection, ...)
+ABBREVIATIONS IDENTIFIED WITH VALVES (FC, FO, FS, ...)
+ABBREVIATIONS FOR DETECTING GAS (HC=Hydrocarbon, EA=Ethyl Acetate, ...)
+EQUIPMENT IDENTIFICATION (C=Column, K=Compressor, P=Pump, ...)
+심볼 그림 기반 (도형+라벨) — 형상 인식 필요:
+
+LINE AND VALVE SYMBOLS (밸브 종류별 도형)
+INSTRUMENT SYMBOLS (DCS=사각형+원, 현장계기=원, Interlock=마름모 등) ← 사용자가 처음 말씀하신 그것
+INSTRUMENT LINE SYMBOLS (전기/공압/소프트웨어 신호선)
+ROTATIONARY EQUIPMENT SYMBOLS (펌프 종류별)
+VALVE CONNECTION SYMBOLS
+작업지시서 어떻게 보강할지
+이 LEGEND 시트의 특성을 보고 명확해진 게 있습니다:
+
+텍스트 사전 부분은 phase 1.5에서 즉시 활용 가능 — 표 형태라 좌→우 페어링으로 추출 쉽습니다.
+심볼 형상 부분은 phase 2 본격 작업 — 다만 LEGEND 시트가 있으면 형상 인식이 훨씬 쉬워집니다. "이 회사의 DCS 심볼은 정확히 이렇게 생겼다"는 레퍼런스 도형을 얻을 수 있으니까요.
+이 PDF는 텍스트 추출이 거의 안 됩니다 (pdftotext 결과가 2줄). 폰트가 path로 변환된 케이스입니다. 즉 PyMuPDF의 get_text()도 실패할 가능성이 높습니다. 이건 작업지시서에 중요한 caveat으로 추가해야 합니다.
+확인해드릴 게 있는데, 본 작업 대상 P&ID 본문 도면도 이런 식으로 텍스트가 path로 변환되어 있나요? 아니면 LEGEND 시트만 그렇고 본문은 텍스트가 살아있나요?
+
+이게 중요한 이유: 만약 본문도 path 변환이면 PyMuPDF만으로는 안 되고 OCR이 필수가 됩니다. 그러면 phase 구성을 다시 짜야 합니다 (phase 1을 OCR 기반으로 다시 설계).
+
+본문 도면 한 장만 같은 방식으로 pdftotext 돌려보시고 텍스트가 나오는지 확인 부탁드려도 될까요? 또는 본문 도면 PDF 한 장 올려주시면 제가 직접 확인하겠습니다.
+
+입력 내용: 그럼 오토캐드 2023에서 어떤 형식으로 pdf출력하면 되는데?
+그럼 오토캐드 2023에서 어떤 형식으로 pdf출력하면 되는데?
+
+오전 4:08
+Claude 응답: 결론부터: AutoCAD 2023에서 DWG to PDF.
+결론부터: AutoCAD 2023에서 DWG to PDF.pc3로 플롯하시되, 몇 가지 옵션을 정확히 켜야 합니다.
+
+권장 설정
+플로터(Plotter): DWG To PDF.pc3 (AutoCAD 내장 드라이버)
+
+Properties → Custom Properties 들어가서:
+
+항목	설정값	이유
+Vector quality	1200 dpi 이상	벡터 라인 정밀도
+Raster image quality	300 dpi	래스터 이미지가 있을 경우
+Capture fonts used in the drawing	체크 ✅	가장 중요 — 폰트 임베딩
+Convert all text to geometry	체크 해제 ❌	절대 금지 — 텍스트가 path로 변환됨
+Include layer information	체크 (선택)	디버깅 시 유용
+Include hyperlinks	체크 해제	불필요
+핵심: "Convert all text to geometry"를 끄세요
+업로드해주신 LEGEND 시트가 텍스트 추출 안 됐던 이유가 바로 이 옵션이 켜져 있었기 때문입니다. 이게 켜지면:
+
+AutoCAD가 모든 텍스트를 벡터 path(선분/곡선)로 변환해서 PDF에 박아넣음
+PDF에는 글자가 "그림"으로 들어가서 pdftotext, PyMuPDF, 어떤 파서도 텍스트로 읽을 수 없음
+OCR을 쓰는 수밖에 없게 됨
+이 옵션은 저작권 보호용(받는 사람이 텍스트 못 빼가게)으로 켜는 건데, 사내 분석/RAG 용도면 반드시 끄셔야 합니다.
+
+폰트 관련 추가 팁
+AutoCAD가 SHX 폰트(romans.shx, txt.shx 같은 AutoCAD 전용 벡터 폰트)를 쓰면, "Capture fonts" 옵션이 있어도 PDF에 텍스트로 안 들어가고 path로 변환됩니다. 해결 방법:
+
+TrueType 폰트로 통일: 도면의 텍스트 스타일을 Arial, 맑은 고딕, Romans TTF 등 TTF로 변경. STYLE 명령에서 폰트 교체.
+부득이 SHX를 써야 한다면: 최소한 영문 태그(FIC, PT 등)는 TTF 폰트의 별도 텍스트 스타일로 작성. 한글 라벨은 SHX여도 영향 적음 (어차피 OCR 권장).
+검증 방법
+플롯한 PDF가 제대로 나왔는지 1초만에 확인:
+
+bash
+pdftotext -layout your_drawing.pdf - | head -30
+여기서 FIC, 10101, 장비명 같은 텍스트가 줄줄이 나오면 성공입니다. 빈 줄만 나오거나 깨진 글자만 나오면 위 옵션이 잘못 설정된 것이니 다시 플롯해야 합니다.
+
+또는 PDF를 Adobe Reader로 열어서 Ctrl+F로 "FIC" 검색해보세요. 검색되면 OK, 안 되면 path 변환됨.
+
+PUBLISH로 일괄 처리
+도면이 여러 장이면 단건 PLOT보다 PUBLISH 명령(Sheet Set 또는 Layout 일괄)으로 처리하는 게 효율적입니다. 이때도 위 PC3 설정이 동일하게 적용됩니다. PC3 한 번 잘 만들어두시면 재사용 가능합니다.
+
+실제 운영 팁
+플랜트 도면 받으시는 환경이면 EPC사/CAD 작업자에게 다음을 요청하시면 깔끔합니다:
+
+"PDF 플롯 시 'Convert all text to geometry' 옵션 해제하고, TrueType 폰트 기반으로 출력해주세요. 검증은 PDF 열어서 Ctrl+F로 태그명 검색되는지 확인 부탁드립니다."
+
+이 한 줄이 phase 1 전체 작업의 성공 여부를 결정합니다. 외주 업체에서 받는 도면이면 납품 검수 기준으로 명문화해두시면 좋습니다.
+
+확인해보시고 본문 도면 한 장 다시 플롯해서 텍스트 추출되는지 보시면, 그 결과에 따라 작업지시서를 phase 1(텍스트 기반) 그대로 갈지 OCR 추가할지 결정하시면 됩니다.
+
+Symbol-Definition.pdf
+오전 4:22
+Claude 응답: 텍스트 추출이 잘 되네요.
+텍스트 추출이 잘 되네요. 이제 PyMuPDF로 좌표까지 같이 뽑히는지 확인해보겠습니다. 추출되는 텍스트의 구조를 보면 이게 본문 RAG/태깅 자동화 가능 여부를 결정합니다.
+
+
+명령 2개 실행함, 파일 생성됨
+명령 2개 실행함, 파일 생성됨
+흥미롭네요. 44개 텍스트 span만 추출됐습니다. 시각적으로 본 LEGEND 시트에는 수백 개 텍스트가 있어야 하는데요. 이건 섹션 제목들만 텍스트로 살아있고, 표 내용물은 모두 path 변환된 상태입니다.
+
+확인해보겠습니다:
+
+
+파일 생성됨, 명령 실행함
+파일 생성됨, 명령 실행함
+예상대로입니다. 진단 결과를 명확히 정리해드리면:
+
+진단 결과
+텍스트로 살아있는 것: 섹션 헤더 + 타이틀블록만 (44개) — 큰 글씨(8~10pt)와 한글 폰트(Dotum, Gulim 계열)
+
+Path로 변환된 것: 표 내용물 전부 — 약어, 설명, 작은 영문 텍스트들. 3,666개의 짧은 vector path는 거의 다 글자 획(stroke)입니다. 4839개 drawing 중 75%가 글자 path라는 뜻입니다.
+
+원인
+이 PDF는 AutoCAD에서 SHX 폰트(romans.shx 등)로 작성된 작은 영문 텍스트가 path로 변환된 상태입니다. 한글 섹션 제목은 TTF 폰트(Dotum 등)라서 살아있고, 영문 본문은 SHX라서 path가 됐습니다.
+
+이건 사용자가 "Convert all text to geometry"를 켠 게 아니라, AutoCAD의 기본 동작입니다. SHX는 TrueType이 아니라 AutoCAD 전용 벡터 폰트라서 PDF 표준 폰트로 임베딩이 안 되거든요.
+
+해결책 — AutoCAD 작업자에게 요청할 것
+본문 도면 플롯할 때 다음을 해주셔야 합니다:
+
+옵션 A (권장): SHX → TTF 폰트 교체
+도면 안의 텍스트 스타일을 모두 TrueType으로 바꿉니다. AutoCAD 명령으로:
+
+STYLE 명령 → 각 텍스트 스타일 선택 → 폰트를 TTF로 교체
+권장 TTF 폰트:
+
+romans.ttf (있으면 가장 자연스러움 — SHX의 romans와 시각적으로 유사한 TTF가 일부 사이트에 존재)
+Arial (가장 호환성 좋음, 표준 영문 도면 텍스트로 무난)
+Tahoma 또는 Verdana (작은 글씨도 또렷)
+한글은 맑은 고딕 또는 Dotum 유지
+일괄 변경 스크립트(-STYLE + LISP)로 처리하거나, Express Tools의 TXT2MTXT 활용 가능합니다.
+
+옵션 B: AutoCAD 2016+ 의 "SHX 텍스트를 검색/선택 가능한 PDF로" 옵션
+AutoCAD 2016부터 "SHX 텍스트 인식(SHX text recognition)" 옵션이 PDF 출력에 추가됐습니다:
+
+PLOT 대화상자 → DWG To PDF.pc3 → Properties → Custom Properties
+→ "Capture fonts used in the drawing" 체크
+→ "Include layer information" 아래쪽에 있는 옵션들 확인
+AutoCAD 2023이면 더 발전된 옵션이 있을 겁니다:
+
+PDFSHX 시스템 변수를 1로 설정:
+
+명령창에서: PDFSHX
+값을 1로 입력
+이걸 1로 하면 SHX 텍스트를 PDF에 searchable text(검색 가능한 텍스트)로 같이 출력합니다. 본래 path는 그대로 두고 그 위에 투명한 텍스트 레이어를 얹는 방식이라 시각적으로 동일하면서 텍스트 추출이 가능해집니다.
+
+PDFSHX = 0  (기본값, 텍스트 추출 안 됨)
+PDFSHX = 1  (SHX를 검색 가능 텍스트로 포함) ← 이걸로 설정
+그리고 플롯하면 됩니다.
+
+검증 방법
+bash
+pdftotext your_drawing.pdf - | grep -E "FIC|PT|FT"
+태그명들이 줄줄이 나오면 성공입니다.
+
+작업지시서에 반영
+이 발견을 작업지시서에 추가하는 건 좋은데, **사실 이건 작업지시서에 들어갈 게 아니라 운영 사전조건(precondition)**입니다. 코딩 에이전트가 해결할 수 있는 게 아니라 도면 공급자가 해결해야 하니까요.
+
+작업지시서에는 "입력 PDF는 PDFSHX=1 또는 TTF 폰트로 플롯된 것"이라는 전제조건과 검증 절차를 추가하면 됩니다.
+
+추가할까요? 그리고 본문 도면 한 장 같은 방식으로 출력해보시고 결과 공유해주시면, 실제 본문에서 태그 추출이 잘 되는지 끝까지 확인해드릴 수 있습니다.
+
+입력 내용: 현실적으로 불가능함 OCR을 해야함
+현실적으로 불가능함 OCR을 해야함
+
+오전 4:25
+Claude 응답: 동의합니다.
+동의합니다. 현장에서 받는 도면을 매번 재플롯 요청하는 건 비현실적이고, EPC사/벤더 측 SHX 사용 관행이 워낙 굳어져 있어서 OCR이 정공법입니다.
+
+작업지시서를 OCR 기반으로 다시 설계하겠습니다. 큰 변화점:
+
+구조 변경:
+
+텍스트 추출: PyMuPDF → PaddleOCR (회전 텍스트 강함, GPU 가속, 한국어 지원)
+입력: PDF → 고DPI 래스터 이미지 (300 DPI 권장)
+좌표: PDF 좌표 → 픽셀 좌표 (DBSCAN은 그대로)
+DGX Spark 환경 활용: PaddleOCR을 vLLM과 같은 GPU에 올려도 부담 적습니다. 모델 크기 작음.
+
+질문 한 가지만 확인하고 작업지시서 작성 들어가겠습니다:
+
+세 가지만 알려주시면 그 답변에 맞춰 OCR 기반 작업지시서 다시 작성하겠습니다. 옵션 보고 선택만 해주세요.
+
+입력 내용: 질문: OCR 엔진 어떤 걸로 갈까요?
+질문: OCR 엔진 어떤 걸로 갈까요?
+답변: PaddleOCR (한/영, GPU, 회전 강함, 가장 권장)
+
+질문: 처리 단위는?
+답변: 둘 다 지원
+
+질문: 한글 라벨도 추출해야 하나요? (예: 장비 한글 설명)
+답변: 한글 라벨도 함께 (혼합)
+
+오전 4:26
+Claude 응답: 답변 반영해서 OCR 기반으로 작업지시서를 다시 작성하겠습니다.
+답변 반영해서 OCR 기반으로 작업지시서를 다시 작성하겠습니다.
+
+
+2개의 파일 생성됨, 명령 실행함, 파일 읽음
+2개의 파일 생성됨, 명령 실행함, 파일 읽음
+작업지시서 v2(OCR 기반)로 다시 만들었습니다. v1 대비 주요 변화:
+
+구조 변경
+
+텍스트 추출: PyMuPDF 텍스트 → PDF 래스터화 + PaddleOCR
+좌표계: PDF 포인트 → 픽셀 (DPI 비례 cluster_eps 자동 보정)
+cluster_eps 기본값: 40 → 50 (300 DPI 픽셀 기준)
+한글 처리 추가
+
+has_korean 플래그를 cluster마다 표시
+출력 스키마에 equipmentNameKo 필드 추가
+프롬프트 예시에 '저장탱크 A' 같은 한글 케이스 포함
+다중 페이지 지원
+
+page_range="1-3", "1,3,5", "2-" 등 유연한 범위 지정
+단일/다중 모두 같은 함수로 처리
+OCR 특화 보정
+
+신뢰도 임계값(min_confidence) 파라미터
+클러스터별 평균 OCR 신뢰도 노출 (ocr_confidence)
+LLM 프롬프트에 OCR 오인식 패턴 보정 규칙 추가 (O↔0, I↔1, S↔5 등)
+운영 고려
+
+GPU/CPU 자동 fallback (PADDLE_USE_GPU 환경변수)
+처리 시간 벤치마크 인수조건 포함
+vLLM과 GPU 동거 시 OOM 주의사항 명시
+향후 작업 우선순위 명확화
+
+심볼 형상 인식(DCS 판별)과 LEGEND 자동 파싱이 phase 2 최우선
+LEGEND 사전을 Qdrant에 캐시해서 프로젝트 단위 재사용
+테스트 코드의 EXPECTED_TAGS는 실제 보유하신 샘플 도면에 있는 태그 3~5개로 채워넣으시면 바로 사용 가능합니다.
+
+
+Pid pdf parser 작업지시서
+문서 · MD 
+입력 내용: OCR 로 하면 DWG 나 DXF 도 바로 읽을수있나?
+OCR 로 하면  DWG 나 DXF 도 바로 읽을수있나?
+
+오전 4:39
+Claude 응답: 좋은 질문입니다.
+좋은 질문입니다. OCR로는 DWG/DXF를 직접 읽을 수 없습니다. 이유와 대안을 짚어드리겠습니다.
+
+왜 OCR이 안 되나
+OCR은 이미지에서 텍스트를 인식하는 기술입니다. DWG/DXF는 이미지가 아니라:
+
+DWG: AutoCAD 바이너리 포맷. 텍스트, 좌표, 레이어 등이 이미 구조화된 데이터로 저장됨
+DXF: AutoCAD 텍스트 교환 포맷. ASCII 또는 바이너리. 마찬가지로 구조화된 데이터
+즉 OCR을 쓸 이유가 없습니다 — 텍스트가 이미 데이터로 들어있으니 그냥 읽으면 됩니다. SHX 폰트 path 변환 문제도 없습니다 (그건 PDF 출력 단계의 문제).
+
+포맷별 권장 처리 방법
+포맷	처리 방법	라이브러리	텍스트 추출
+DXF	직접 파싱	ezdxf (Python, MIT)	✅ 즉시 가능
+DWG	DXF로 변환 후 파싱	ODA File Converter, LibreDWG	✅ 변환 후 가능
+PDF (벡터)	PyMuPDF	pymupdf	✅ 가능 (단, SHX 제외)
+PDF (SHX path)	OCR	PaddleOCR	✅ OCR 필요
+PDF (스캔)	OCR	PaddleOCR	✅ OCR 필요
+DXF 처리는 PDF보다 훨씬 쉽습니다
+ezdxf로 DXF에서 텍스트와 좌표를 뽑는 건 몇 줄이면 됩니다:
+
+python
+import ezdxf
+
+doc = ezdxf.readfile("drawing.dxf")
+msp = doc.modelspace()
+
+# TEXT, MTEXT, ATTRIB 모두 잡기
+for entity in msp.query("TEXT MTEXT ATTRIB"):
+    if entity.dxftype() == "TEXT":
+        text = entity.dxf.text
+        x, y = entity.dxf.insert.x, entity.dxf.insert.y
+    elif entity.dxftype() == "MTEXT":
+        text = entity.text  # 이미 평문화됨
+        x, y = entity.dxf.insert.x, entity.dxf.insert.y
+    elif entity.dxftype() == "ATTRIB":
+        text = entity.dxf.text
+        x, y = entity.dxf.insert.x, entity.dxf.insert.y
+    layer = entity.dxf.layer  # ← PDF에는 없는 추가 정보
+    print(f"{text:<20} @ ({x:.1f}, {y:.1f}) [layer: {layer}]")
+장점:
+
+100% 정확 (OCR 오인식 없음)
+GPU 불필요, 매우 빠름
+레이어 정보까지 추출 가능 — 이게 큰 장점입니다. 보통 P&ID에서 INST_TAG, EQUIP_NAME, PIPE_LINE 같은 레이어로 구분되어 있어서, 태그/장비명/라인번호를 레이어 기준으로 분류할 수 있습니다.
+블록(BLOCK) 정보도 추출 가능 — ISA balloon은 보통 블록 + ATTRIB로 만들어져 있어서, 블록 이름으로 "이건 DCS 심볼", "이건 현장계기"를 직접 판별할 수 있습니다. phase 2의 형상 인식이 phase 1에서 해결됩니다.
+즉, DXF가 있으면 OCR 파이프라인 자체가 불필요
+기존 작업지시서의 extract_pid_tags가 이미 source_type="dxf"를 처리하지만, raw text 한 덩어리로 받게 되어 있어서 좌표/레이어/블록 정보가 살지 않습니다. 이걸 활용 못 하면 DXF의 이점을 다 버리는 셈입니다.
+
+권장 구조 업데이트
+작업지시서에 DXF 전용 도구를 별도로 추가하는 걸 강력히 권장드립니다:
+
+parse_pid_dxf(dxf_path)        ← 신규, ezdxf 기반
+parse_pid_dwg(dwg_path)        ← 신규, ODA Converter로 DXF 변환 후 위 함수 호출
+parse_pid_pdf(pdf_path)        ← 이미 만든 OCR 기반
+    ↓ 모두 같은 cluster JSON 형식으로 출력
+extract_pid_tags(clusters_json, "clusters")  ← 통합 처리
+세 함수가 같은 출력 스키마를 따르게 하면 LLM/하위 로직은 입력 포맷을 신경 쓸 필요가 없습니다.
+
+추가로 DXF 파서는 다음 정보를 cluster에 더 담을 수 있습니다:
+
+json
+{
+  "id": "p1c0",
+  "texts": ["FIC", "10101"],
+  "bbox": [...],
+  "layer": "INST_TAG",          ← DXF만 가능
+  "block_name": "DCS_BALLOON",  ← DXF만 가능 (DCS/현장계기 즉시 판별)
+  "source": "dxf"
+}
+우선순위 제안
+원본 도면을 EPC사에서 DWG/DXF로도 받을 수 있는지 먼저 확인해보시는 게 좋습니다. 가능하다면:
+
+DXF/DWG 우선 처리 — 정확도 100%, 빠름, 레이어/블록까지 활용
+PDF는 fallback — DWG/DXF가 없는 경우만 OCR 파이프라인 사용
+실무적으로 EPC사가 PDF만 주는 경우가 많지만(IP 보호 등), 사내 자체 프로젝트나 협력사 관계가 좋으면 DWG도 받을 수 있습니다. DWG가 있으면 작업 난이도가 한 단계 내려갑니다.
+
+작업지시서에 DXF/DWG 처리 도구를 추가해드릴까요? phase 1.5로 넣으면 적절할 것 같습니다.
+
+
+p-9100.dxf
+dxf
+
+오전 4:42
+Claude 응답: DXF 파일을 직접 분석해서 어떤 정보가 들어있는지 확인해보겠습니다.
+DXF 파일을 직접 분석해서 어떤 정보가 들어있는지 확인해보겠습니다.
+
+
+명령 3개 실행함, 파일 생성됨
+명령 3개 실행함, 파일 생성됨
+훌륭한 데이터입니다. TEXT 3,562개, MTEXT 363개나 있고, 레이어도 잘 나눠져 있습니다(PROCESS LINE, VALVE NO, LINENO, 1-SYMBOL 등). 텍스트 샘플을 보고 태그 분포를 파악해보겠습니다.
+
+
+파일 생성됨, 명령 실행함
+파일 생성됨, 명령 실행함
+완벽한 데이터입니다. 이게 OCR 대비 얼마나 좋은지 명확히 보입니다:
+
+핵심 발견:
+
+장비 태그 282개 명확히 추출 (P-10101, E-10103, T-10101, C-10111 등) — layer=1에 정리되어 있음
+ISA balloon function code 후보 536개 (PG, TG, LT, PT, TE, FICQ, FC, FCV 등) — 빈도까지 정확
+루프번호 후보 666개 — 대부분 layer=INSTRUMENT에 있음
+한글 라벨 59개 (기존 설비, PGMEA REFINE 공정 등) — 깨짐 없이 100% 추출
+레이어가 의미 있게 나뉨: INSTRUMENT, LINENO, VALVE NO, 1-SYMBOL 등
+이제 INSERT(블록 참조)가 ISA balloon인지 확인해봅시다 — 이게 phase 2의 형상 인식을 phase 1에서 해결할 수 있는 결정적 단서입니다.
+
+
+파일 생성됨, 명령 실행함
+파일 생성됨, 명령 실행함
+아주 명확해졌습니다.
+
+핵심 발견:
+
+INSERT(블록)는 ISA balloon이 아닙니다 — 이름이 A$C... 형태로 익명화되어 있고 ATTRIB도 없음. 이 도면은 블록 기반이 아니라 CIRCLE + TEXT 조합으로 그려져 있음
+layer=INSTRUMENT에 CIRCLE 215개 — 이게 바로 ISA balloon입니다!
+반지름 0.4(266개), 0.3(235개)이 압도적 — 이게 표준 ISA balloon 크기. 0.7은 더 큰 심볼
+Numeric texts 666개 중 대부분이 layer=INSTRUMENT — 루프번호가 정확히 같은 레이어에 있음
+즉 DXF에서는 phase 2 작업까지 같이 끝낼 수 있습니다. CIRCLE 좌표 + 반지름으로 balloon 위치를 찾고, 그 안에 들어있는 TEXT를 매칭하면 ISA balloon이 정확히 식별됩니다. 사각형 검출까지 추가하면 DCS vs 현장계기 구분도 가능합니다.
+
+마지막으로 통합 검증해보겠습니다:
+
+
+파일 생성됨, 명령 실행함
+파일 생성됨, 명령 실행함
+완벽합니다. 215개 balloon 중 197개(91.6%)가 LLM 호출 없이 정확한 태그로 추출됐습니다.
+
+LLM이 필요했던 이유(두 줄 정규화)가 DXF에서는 단순 좌표 매칭으로 해결됩니다. OCR 파이프라인의 모든 어려움이 사라졌습니다:
+
+항목	OCR 파이프라인	DXF 파이프라인
+텍스트 정확도	~95% (오인식 보정 필요)	100%
+처리 시간 (1장)	GPU 5~15초, CPU 30~90초	<1초
+ISA balloon 식별	LLM 두 줄 합치기 필요	CIRCLE 좌표로 직접
+DCS vs 현장계기	phase 2 (불가능)	사각형 검출 가능
+LLM 호출	필수 (정규화)	선택사항 (보정용)
+한글 라벨	OCR 의존	MTEXT 직독
+GPU 필요	예	아니오
+레이어/카테고리	없음	자동 분류
+이제 결정할 게 있습니다 — 작업지시서에 DXF 처리 도구를 어떻게 추가할지:
+
+실수로 빈 질문이 갔네요. 다시:
+
+제안 드리는 구조:
+
+작업지시서를 phase 1을 DXF 우선 / PDF는 fallback으로 재구성하는 게 합리적입니다:
+
+[phase 1 - DXF 우선] ← 추천
+  parse_pid_dxf(dxf_path)     ← 신규, ezdxf 기반 (이번 검증으로 91.6% 검증됨)
+  parse_pid_dwg(dwg_path)     ← 신규, ODA Converter로 DXF 변환 후 위 함수 호출
+  parse_pid_pdf(pdf_path)     ← OCR 기반 (이미 만든 것, fallback용)
+  → 모두 같은 cluster JSON 출력
+  
+extract_pid_tags(clusters_json, "clusters")  ← DXF는 LLM 거의 안 거쳐도 되고,
+                                                PDF만 LLM 정규화 필요
+DXF 파서가 OCR 대비 추가로 제공할 수 있는 것:
+
+layer: 텍스트가 속한 레이어 (INSTRUMENT, LINENO, VALVE NO 등)
+balloon_type: 사각형+원(DCS) / 단일 원(FIELD) — 형상 검출로 직접 판별
+is_pre_normalized: DXF에서 좌표 매칭으로 이미 FIC-10101 형태로 합친 태그
+confidence: 좌표 매칭 신뢰도 (LLM 추론 없이도 0.95+)
+어떻게 진행할까요?
+
+작업지시서를 DXF + PDF 통합본으로 재작성 (가장 추천 — 실무에서 가장 쓸만한 구조)
+DXF 처리 도구만 별도 작업지시서로 분리
+현재 OCR 파이프라인 그대로 두고 DXF는 phase 1.5로 추가만
+어느 쪽으로 갈지 선택해주시면 그에 맞춰 작업지시서를 업데이트하겠습니다.
+
+입력 내용: DWG도 로컬LLM이 처리가능하면 그것도 넣어서 작업지시서를 만들어줘
+DWG도 로컬LLM이 처리가능하면 그것도 넣어서 작업지시서를 만들어줘
+
+오전 4:45
+Claude 응답: DWG 처리 가능 여부부터 정확히 짚고 가겠습니다.
+DWG 처리 가능 여부부터 정확히 짚고 가겠습니다.
+
+DWG 처리 옵션 검토
+DWG는 AutoCAD 바이너리 포맷이라 로컬LLM이 직접 읽을 수 없습니다. 반드시 DXF로 변환 후 처리해야 합니다. 변환 옵션:
+
+도구	라이선스	정확도	운영 적합성
+ODA File Converter	무료 (등록 필요)	높음	권장 — Linux/Windows 지원, CLI
+LibreDWG (dwg2dxf)	GPL-3	보통	오픈소스만 써야 할 때
+Teigha (ODA SDK)	상용 라이선스	매우 높음	비싸지만 프로덕션급
+AutoCAD/ODA Viewer	GUI	높음	자동화 어려움
+ODA File Converter가 현실적 선택입니다. Open Design Alliance가 무료 배포하고, CLI로 일괄 변환 가능합니다.
+
+DGX Spark(Linux)에서:
+
+bash
+ODAFileConverter <input_dir> <output_dir> ACAD2018 DXF 0 1 "*.DWG"
+LLM 활용 위치: DWG/DXF 처리에서 LLM은 선택적 보정/검증용입니다 — 좌표 매칭이 91.6% 자동으로 되니까요. 다만 다음에는 LLM이 가치 있습니다:
+
+매칭 안 된 18개(8.4%) 문제 cluster 정규화
+한글 장비 설명을 영문 매핑
+LEGEND 시트 약어 자동 학습 (phase 2)
+이제 통합 작업지시서를 작성하겠습니다.
+
+
+명령 실행함, 파일 생성됨, 파일 읽음
+명령 실행함, 파일 생성됨, 파일 읽음
+작업지시서 v3로 통합 작성 완료했습니다.
+
+v3의 주요 특징
+DWG에 대한 솔직한 답변: 로컬LLM이 DWG를 직접 처리할 수는 없습니다. 바이너리 포맷이라 무조건 DXF 변환이 선행되어야 합니다. 그래서 parse_pid_dwg는 ODA File Converter로 DXF 변환 → parse_pid_dxf 위임하는 구조로 만들었습니다. 사용자 입장에서는 .dwg 파일 그대로 던지면 되고 내부에서 알아서 처리됩니다.
+
+4개 도구 구조:
+
+parse_pid_dxf — 메인 (실제 검증한 91.6% 매칭률)
+parse_pid_dwg — DWG → DXF 변환 후 위 함수 호출
+parse_pid_pdf — OCR fallback (PDF만 있을 때)
+parse_pid_drawing — 확장자로 자동 라우팅하는 디스패처
+핵심 가치 — pre_normalized_tag: DXF에서 좌표 매칭으로 미리 정규화된 태그를 cluster에 담아둡니다. extract_pid_tags가 이를 보면 LLM 호출 없이 _extract_from_dxf_clusters 함수로 즉시 결과 반환. DXF는 LLM 비용 없음 = vLLM 부하 없이 도면 한 장이 1초 내 처리.
+
+실제 검증 데이터를 작업지시서에 명시:
+
+사용자 제공 p-9100.dxf로 검증한 91.6% 매칭률, 215개 balloon 등을 인수조건과 마지막 섹션에 박아뒀습니다. 코딩 에이전트가 "이 정도 성능이 나와야 OK다"는 객관적 기준을 가지고 작업하게 됩니다.
+ODA File Converter 등록 안내: 상용 SaaS 배포 시 라이선스 제약 있으니 사전 검토 필요사항 명시.
+
+이제 코딩 에이전트에 던지시면 됩니다. instrument_layer 기본값은 "INSTRUMENT"로 잡아뒀는데, 다른 EPC사 도면은 INST나 I-1 같은 다른 이름을 쓸 수 있으니 첫 도면 들어올 때 한 번 확인하시면 됩니다.
+
+
+Pid parser 작업지시서 v3
+문서 · MD 
+입력 내용: p-9000.
+
+
--- a/futurePlan/P&ID
+++ b/futurePlan/P&ID
@@ -0,0 +1,3 @@
+1. 리모트 (웹브라우저 실행한 PC)에서 파일을 선택하면, 서버로 전달되지 않는다 ---> 추출시작시 에러남
+2. 파일선택 버튼을 누르면 리모트 PC의 파일을 읽는다. 원격 서버의 파일은 읽히지 않는다. 
+3. 그러면 어쩌란 말인가 ????
--- a/futurePlan/P&ID_AX_Plan2.md
+++ b/futurePlan/P&ID_AX_Plan2.md
--- a/futurePlan/Symbol-Definition.pdf
+++ b/futurePlan/Symbol-Definition.pdf
--- a/futurePlan/extract_pdf.cs
+++ b/futurePlan/extract_pdf.cs
@@ -0,0 +1,57 @@
+
+using System;
+using System.Text;
+using UglyToad.PdfPig;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
+
+class Program
+{
+    static void Main(string[] args)
+    {
+        string pdfPath = "/home/windpacer/projects/ExperionCrawler/futurePlan/plant-9100.pdf";
+        string markdownPath = "/home/windpacer/projects/ExperionCrawler/futurePlan/plant-9100-extracted.md";
+        
+        Console.WriteLine($"PdfPig 버전: {typeof(PdfDocument).Assembly.GetName().Version}");
+        Console.WriteLine($"PDF 파일: {pdfPath}");
+        Console.WriteLine();
+        
+        using (var document = PdfDocument.Open(pdfPath))
+        {
+            var sb = new StringBuilder();
+            
+            sb.AppendLine("# plant-9100.pdf 추출 결과");
+            sb.AppendLine();
+            sb.AppendLine("## PDF 정보");
+            sb.AppendLine();
+            sb.AppendLine($"- **버전**: {document.Version}");
+            sb.AppendLine($"- **페이지 수**: {document.NumberOfPages}");
+            sb.AppendLine($"- **제목**: {document.Information.Title ?? "(없음)"}");
+            sb.AppendLine($"- **작성자**: {document.Information.Author ?? "(없음)"}");
+            sb.AppendLine($"- **생성 프로그램**: {document.Information.Producer ?? "(없음)"}");
+            sb.AppendLine($"- **생성일**: {document.Information.CreationDate ?? "(없음)"}");
+            sb.AppendLine();
+            
+            foreach (var page in document.GetPages())
+            {
+                sb.AppendLine($"## 페이지 {page.Number}");
+                sb.AppendLine();
+                sb.AppendLine($"- **크기**: {page.Width} x {page.Height}");
+                sb.AppendLine();
+                
+                string text = page.Text;
+                sb.AppendLine("### 추출 텍스트");
+                sb.AppendLine();
+                sb.AppendLine("```");
+                sb.AppendLine(text);
+                sb.AppendLine("```");
+                sb.AppendLine();
+                
+                Console.WriteLine($"페이지 {page.Number} 추출 완료 ({text.Length}자)");
+            }
+            
+            System.IO.File.WriteAllText(markdownPath, sb.ToString());
+            Console.WriteLine();
+            Console.WriteLine($"전체 추출 완료. 결과 저장: {markdownPath}");
+        }
+    }
+}
--- a/futurePlan/extract_pdf.csproj
+++ b/futurePlan/extract_pdf.csproj
@@ -0,0 +1,11 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+  <ItemGroup>
+    <PackageReference Include="PdfPig" Version="0.1.9" />
+  </ItemGroup>
+</Project>
--- a/futurePlan/extract_pdf.py
+++ b/futurePlan/extract_pdf.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+PdfPig를 사용하여 plant-9100.pdf에서 모든 문자열을 추출하여 마크다운 파일로 저장
+"""
+import subprocess
+import sys
+
+# C# 코드 작성
+csharp_code = '''
+using System;
+using System.Text;
+using UglyToad.PdfPig;
+using UglyToad.PdfPig.DocumentLayoutAnalysis.TextExtractor;
+
+class Program
+{
+    static void Main(string[] args)
+    {
+        string pdfPath = "/home/windpacer/projects/ExperionCrawler/futurePlan/plant-9100.pdf";
+        string markdownPath = "/home/windpacer/projects/ExperionCrawler/futurePlan/plant-9100-extracted.md";
+        
+        Console.WriteLine($"PdfPig 버전: {typeof(PdfDocument).Assembly.GetName().Version}");
+        Console.WriteLine($"PDF 파일: {pdfPath}");
+        Console.WriteLine();
+        
+        using (var document = PdfDocument.Open(pdfPath))
+        {
+            var sb = new StringBuilder();
+            
+            sb.AppendLine("# plant-9100.pdf 추출 결과");
+            sb.AppendLine();
+            sb.AppendLine("## PDF 정보");
+            sb.AppendLine();
+            sb.AppendLine($"- **버전**: {document.Version}");
+            sb.AppendLine($"- **페이지 수**: {document.NumberOfPages}");
+            sb.AppendLine($"- **제목**: {document.Information.Title ?? \"(없음)\"}");
+            sb.AppendLine($"- **작성자**: {document.Information.Author ?? \"(없음)\"}");
+            sb.AppendLine($"- **생성 프로그램**: {document.Information.Producer ?? \"(없음)\"}");
+            sb.AppendLine($"- **생성일**: {document.Information.CreationDate ?? \"(없음)\"}");
+            sb.AppendLine();
+            
+            foreach (var page in document.GetPages())
+            {
+                sb.AppendLine($"## 페이지 {page.Number}");
+                sb.AppendLine();
+                sb.AppendLine($"- **크기**: {page.Width} x {page.Height}");
+                sb.AppendLine();
+                
+                string text = page.Text;
+                sb.AppendLine("### 추출 텍스트");
+                sb.AppendLine();
+                sb.AppendLine("```");
+                sb.AppendLine(text);
+                sb.AppendLine("```");
+                sb.AppendLine();
+                
+                Console.WriteLine($"페이지 {page.Number} 추출 완료 ({text.Length}자)");
+            }
+            
+            System.IO.File.WriteAllText(markdownPath, sb.ToString());
+            Console.WriteLine();
+            Console.WriteLine($"전체 추출 완료. 결과 저장: {markdownPath}");
+        }
+    }
+}
+'''
+
+# C# 파일 저장
+with open('futurePlan/extract_pdf.cs', 'w') as f:
+    f.write(csharp_code)
+print("C# 코드 작성 완료: futurePlan/extract_pdf.cs")
+
+# 프로젝트 파일 생성
+project_code = '''<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net8.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+  <ItemGroup>
+    <PackageReference Include="PdfPig" Version="0.1.9" />
+  </ItemGroup>
+</Project>
+'''
+
+with open('futurePlan/extract_pdf.csproj', 'w') as f:
+    f.write(project_code)
+print("프로젝트 파일 작성 완료: futurePlan/extract_pdf.csproj")
+
+# 빌드 및 실행
+print()
+print("빌드 및 실행 중...")
+result = subprocess.run(
+    ['dotnet', 'run', '--project', 'futurePlan/extract_pdf.csproj', '--configuration', 'Release'],
+    cwd='/home/windpacer/projects/ExperionCrawler',
+    capture_output=True,
+    text=True
+)
+print(result.stdout)
+if result.stderr:
+    print("STDERR:", result.stderr)
--- a/futurePlan/new_parser_coding_plan.md
+++ b/futurePlan/new_parser_coding_plan.md
--- a/futurePlan/p&id_ax-coding_plan.md
+++ b/futurePlan/p&id_ax-coding_plan.md
--- a/futurePlan/p&id_ax_UI_API_plan.md
+++ b/futurePlan/p&id_ax_UI_API_plan.md
--- a/futurePlan/p&id_ax_coding_plan2.md
+++ b/futurePlan/p&id_ax_coding_plan2.md
--- a/futurePlan/p&id_ax_coding_plan3.md
+++ b/futurePlan/p&id_ax_coding_plan3.md
--- a/futurePlan/p&id_ax_coding_plan4.md
+++ b/futurePlan/p&id_ax_coding_plan4.md
@@ -0,0 +1,795 @@
+# P&ID AX 코딩 플랜 v4 (MCP 기반 로컬 LLM 구현)
+
+> **수정일**: 2026-04-30 (v4 완료 — 백엔드 완료, MCP 툴 추가)
+> **수정일**: 2026-04-30 (v4.1 — 프론트엔드 완료)
+> **기준**: `p&id_ax_coding_plan3.md` 반영
+> **목적**: Anthropic Cloud Vision 제거 → MCP 경유 로컬 LLM(vLLM Qwen3-Coder-Next-FP8)으로 DXF/PDF 텍스트 추출 구현
+
+**백엔드 완료 요약**:
+- 단계 1-8 완료 (P&ID 도메인, 서비스, 컨트롤러, DB, Program.cs 등록)
+- MCP 툴 추가: `extract_pid_tags`, `match_pid_tags` (mcp-server/server.py)
+- 빌드 검증: `dotnet build` 성공 (0 에러)
+- API 엔드포인트: `/api/pid/*` (13개)
+
+**프론트엔드 완료 요약 (v4.1)**:
+- 단계 9-12 완료 (index.html, app.js, style.css, appsettings.json)
+- P&ID 추출 탭 추가 (11개 탭)
+- 추출 결과 테이블 + 페이지네이션
+- CSV/Excel 내보내기 기능
+- 통계 정보 표시
+
+---
+
+## 📋 개요
+
+DXF/PDF 형식의 P&ID 도면에서 장비 및 계기 정보를 AI로 자동 추출하여 ExperionCrawler 데이터베이스와 연동하는 기능입니다.
+
+**주요 변경사항**:
+- ❌ Anthropic Cloud Vision 제거
+- ✅ MCP 경유 로컬 LLM(vLLM Qwen3-Coder-Next-FP8) 사용
+- ✅ netDxf (DXF 파싱) + PdfPig (PDF 텍스트 추출) 사용
+
+---
+
+## 🎯 목표
+
+1. P&ID 도면에서 장비 정보를 추출
+2. 추출된 정보를 PostgreSQL 로 저장
+3. 기존 Experion 데이터와 연동
+4. 웹에서 시각화 및 관리
+
+---
+
+## ✅ 완료된 단계 (v4) — 백엔드 완료 (2026-04-30)
+
+| 단계 | 내용 | 상태 |
+|------|------|------|
+| 1 | P&ID 도메인 엔티티 생성 (`PidEquipment`, `PidAuditLog`) | ✅ 완료 |
+| 2 | DTOs 생성 (`PidEquipmentDto`, `PidExtractionResult`, `TagMappingDtos`) | ✅ 완료 |
+| 3 | Interface 정의 (`IPidExtractorService`, `ITagMappingService`) | ✅ 완료 |
+| 4 | TagMappingService 생성 | ✅ 완료 |
+| 5 | PidExtractorService 생성 (Anthropic → MCP 로컬 LLM) | ✅ 완료 |
+| 6 | Database Migration (`DbSet`, FK 설정) | ✅ 완료 |
+| 7 | PidController 추가 (`ExperionPidController` - 13개 엔드포인트) | ✅ 완료 |
+| 8 | Program.cs 등록 (`AddScoped<IPidExtractorService>`, `AddScoped<ITagMappingService>`) | ✅ 완료 |
+
+---
+
+## ✅ 완료된 단계 (v4) — MCP 툴 추가 (2026-04-30)
+
+| 단계 | 내용 | 상태 |
+|------|------|------|
+| 9 | Python MCP 툴 `extract_pid_tags` 구현 | ✅ 완료 |
+| 10 | Python MCP 툴 `match_pid_tags` 구현 | ✅ 완료 |
+
+---
+
+## ✅ 완료된 단계 (v4) — 상세
+
+### 단계 1: P&ID 도메인 엔티티 생성 ✅
+
+**파일**: [`src/Core/Domain/Entities/PidEquipment.cs`](../src/Core/Domain/Entities/PidEquipment.cs)  
+**파일**: [`src/Core/Domain/Entities/PidAuditLog.cs`](../src/Core/Domain/Entities/PidAuditLog.cs)
+
+```csharp
+// PidEquipment.cs
+[Table("pid_equipment")]
+public class PidEquipment
+{
+    public long Id { get; set; }
+    
+    [Required]
+    [MaxLength(50)]
+    public string TagNo { get; set; } = string.Empty;
+    
+    [MaxLength(200)]
+    public string? EquipmentName { get; set; }
+    
+    [MaxLength(10)]
+    public string? InstrumentType { get; set; }
+    
+    [MaxLength(100)]
+    public string? LineNumber { get; set; }
+    
+    [MaxLength(50)]
+    public string? PidDrawingNo { get; set; }
+    
+    public double Confidence { get; set; }
+    
+    public bool IsActive { get; set; } = true;
+    
+    public DateTime ExtractedAt { get; set; } = DateTime.UtcNow;
+    
+    public DateTime? UpdatedAt { get; set; }
+    
+    public int? ExperionTagId { get; set; }
+    public RealtimePoint? ExperionTag { get; set; }
+}
+
+// PidAuditLog.cs
+[Table("pid_audit_log")]
+public class PidAuditLog
+{
+    public long Id { get; set; }
+    [MaxLength(50)]
+    public string Source { get; set; } = string.Empty;
+    [MaxLength(50)]
+    public string Action { get; set; } = string.Empty;
+    [MaxLength(50)]
+    public string TargetTagNo { get; set; } = string.Empty;
+    public string? OldValue { get; set; }
+    public string? NewValue { get; set; }
+    public DateTime LoggedAt { get; set; } = DateTime.UtcNow;
+}
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `UpdatedAt`이 `DateTime?` (nullable)으로 정의되어 있음.
+
+---
+
+### 단계 2: DTOs 생성 ✅
+
+**파일**: [`src/Core/Application/DTOs/PidEquipmentDto.cs`](../src/Core/Application/DTOs/PidEquipmentDto.cs)  
+**파일**: [`src/Core/Application/DTOs/PidExtractionResult.cs`](../src/Core/Application/DTOs/PidExtractionResult.cs)  
+**파일**: [`src/Core/Application/DTOs/TagMappingDtos.cs`](../src/Core/Application/DTOs/TagMappingDtos.cs)
+
+```csharp
+// PidEquipmentDto.cs
+public record PidEquipmentDto(
+    long Id,
+    string TagNo,
+    string? EquipmentName,
+    string? InstrumentType,
+    string? LineNumber,
+    string? PidDrawingNo,
+    double Confidence,
+    bool IsActive,
+    DateTime ExtractedAt,
+    DateTime? UpdatedAt,
+    int? ExperionTagId,
+    string? ExperionTagName);
+
+// PidExtractionResult.cs
+public record PidExtractionResult(
+    int TotalCount,
+    int ConfidenceItems,
+    int LowConfidenceItems);
+
+// TagMappingDtos.cs
+public record TagMappingResult
+{
+    public long PidEquipmentId { get; set; }
+    public string TagNo { get; set; } = string.Empty;
+    public string? EquipmentName { get; set; }
+    public string? InstrumentType { get; set; }
+    public string? LineNumber { get; set; }
+    public string? PidDrawingNo { get; set; }
+    public double Confidence { get; set; }
+    public bool IsActive { get; set; }
+    public int? ExperionTagId { get; set; }
+    public string? ExperionTagName { get; set; }
+    public string? ExperionNodeId { get; set; }
+}
+
+public record CreateMappingRequest(long PidEquipmentId, int ExperionTagId);
+public record UpdateMappingRequest(int? ExperionTagId, bool? IsActive);
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `ExtractedItem`/`MappingItem`은 `PidExtractorService.cs` 내부에 `public class`로 정의됨.
+
+---
+
+### 단계 3: Interface 정의 ✅
+
+**파일**: [`src/Core/Application/Interfaces/IExperionServices.cs`](../src/Core/Application/Interfaces/IExperionServices.cs)
+
+```csharp
+// P&ID Extractor
+public interface IPidExtractorService
+{
+    Task<PidExtractionResult> ExtractFromFileAsync(string filePath, bool useImageMode = false);
+    Task<PidExtractionResult> ExtractFromStreamAsync(Stream stream, string fileName, bool useImageMode = false);
+    
+    Task<(int Total, IEnumerable<PidEquipment> Items)> GetEquipmentAsync(string? tagNo, int page, int pageSize);
+    Task<PidEquipment?> GetByIdAsync(long id);
+    
+    Task UpdateConfidenceAsync(long id, double confidence);
+    Task ActivateAsync(long id);
+    Task DeactivateAsync(long id);
+    
+    Task<int> GetTotalCountAsync();
+    Task<int> GetConfidenceItemsCountAsync();
+    Task<int> GetLowConfidenceItemsCountAsync();
+    Task<IDictionary<string, int>> GetConfidenceDistributionAsync();
+    Task<int> GetDrawingCountAsync();
+    
+    Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items);
+    Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items);
+}
+
+// P&ID Tag Mapping
+public interface ITagMappingService
+{
+    Task<(int Total, IEnumerable<TagMappingResult> Items)> GetMappingsAsync(int page, int pageSize);
+    Task<TagMappingResult?> GetMappingByIdAsync(long id);
+    Task<TagMappingResult> CreateMappingAsync(CreateMappingRequest request);
+    Task UpdateMappingAsync(long id, UpdateMappingRequest request);
+    Task ClearMappingAsync(long id);
+    
+    Task<int> GetUnmappedCountAsync();
+    Task<int> GetMappedCountAsync();
+    Task<IEnumerable<string>> GetAvailableTagsAsync();
+}
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `IExperionServices.cs`에 `IPidExtractorService`, `ITagMappingService` 모두 정의됨.
+
+---
+
+### 단계 4: TagMappingService 생성 ✅
+
+**파일**: [`src/Core/Application/Services/TagMappingService.cs`](../src/Core/Application/Services/TagMappingService.cs)
+
+```csharp
+public class TagMappingService : ITagMappingService
+{
+    private readonly ExperionDbContext _dbContext;
+    
+    public TagMappingService(ExperionDbContext dbContext)
+    {
+        _dbContext = dbContext;
+    }
+    
+    public async Task<(int Total, IEnumerable<TagMappingResult> Items)> GetMappingsAsync(int page, int pageSize)
+    {
+        var query = from pe in _dbContext.PidEquipment
+                    join rt in _dbContext.RealtimePoints
+                        on pe.ExperionTagId equals rt.Id into joined
+                    from rt in joined.DefaultIfEmpty()
+                    select new TagMappingResult { ... };
+        
+        var total = await query.CountAsync();
+        var items = await query
+            .OrderByDescending(e => e.Confidence)
+            .Skip((page - 1) * pageSize)
+            .Take(pageSize)
+            .ToListAsync();
+        
+        return (total, items);
+    }
+    
+    public async Task<TagMappingResult?> GetMappingByIdAsync(long id) { ... }
+    public async Task<TagMappingResult> CreateMappingAsync(CreateMappingRequest request) { ... }
+    public async Task UpdateMappingAsync(long id, UpdateMappingRequest request) { ... }
+    public async Task ClearMappingAsync(long id) { ... }
+    
+    public async Task<int> GetUnmappedCountAsync()
+        => await _dbContext.PidEquipment.CountAsync(e => e.ExperionTagId == null);
+    
+    public async Task<int> GetMappedCountAsync()
+        => await _dbContext.PidEquipment.CountAsync(e => e.ExperionTagId != null);
+    
+    public async Task<IEnumerable<string>> GetAvailableTagsAsync()
+    {
+        var mappedTagIds = await _dbContext.PidEquipment
+            .Where(e => e.ExperionTagId != null)
+            .Select(e => e.ExperionTagId)
+            .ToListAsync();
+        
+        return await _dbContext.RealtimePoints
+            .Where(t => !mappedTagIds.Contains(t.Id))
+            .Select(t => t.TagName)
+            .OrderBy(t => t)
+            .ToListAsync();
+    }
+}
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `GetAvailableTagsAsync()`가 이미 매핑된 태그를 제외한 목록만 반환하는 올바른 구현이 있음.
+
+---
+
+### 단계 5: PidExtractorService 생성 ✅
+
+**파일**: [`src/Core/Application/Services/PidExtractorService.cs`](../src/Core/Application/Services/PidExtractorService.cs)
+
+```csharp
+public class PidExtractorService : IPidExtractorService
+{
+    private readonly McpClient _mcp;
+    private readonly ExperionDbContext _dbContext;
+    private readonly ILogger<PidExtractorService> _logger;
+
+    public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger<PidExtractorService> logger)
+    {
+        _mcp = mcp;
+        _dbContext = dbContext;
+        _logger = logger;
+    }
+
+    public async Task<PidExtractionResult> ExtractFromFileAsync(string filePath, bool useImageMode = false)
+    {
+        await using var stream = File.OpenRead(filePath);
+        return await ExtractFromStreamAsync(stream, Path.GetFileName(filePath), useImageMode);
+    }
+
+    public async Task<PidExtractionResult> ExtractFromStreamAsync(Stream stream, string fileName, bool useImageMode = false)
+    {
+        var ext = Path.GetExtension(fileName).ToLowerInvariant();
+
+        string text = ext switch
+        {
+            ".dxf" => ExtractDxfText(stream),
+            ".pdf" => ExtractPdfText(stream),
+            _      => throw new NotSupportedException($"지원 형식: .dxf .pdf (스캔본 이미지는 Vision 모드 필요)")
+        };
+
+        if (string.IsNullOrWhiteSpace(text))
+            return new PidExtractionResult(0, 0, 0);
+
+        // MCP → vLLM 태그 추출
+        var sourceType = ext.TrimStart('.');
+        var json = await _mcp.ExtractPidTagsAsync(text, sourceType);
+        var extractedItems = ParseJson(json);
+
+        if (extractedItems.Count == 0)
+        {
+            _logger.LogWarning("P&ID 추출 결과 0건 — 파일: {FileName}", fileName);
+            return new PidExtractionResult(0, 0, 0);
+        }
+
+        // MCP → vLLM 태그 매핑 제안
+        var pidTagNos = extractedItems.Select(i => i.TagNo).Distinct().ToList();
+        var experionTagNames = await _dbContext.RealtimePoints.Select(r => r.TagName).ToListAsync();
+        var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames);
+        var mappings = ParseMappingJson(mappingJson);
+
+        // DB 저장
+        var dbItems = new List<PidEquipment>();
+        foreach (var item in extractedItems)
+        {
+            mappings.TryGetValue(item.TagNo, out var matched);
+            var experionTag = matched != null
+                ? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched)
+                : await FindFallbackTagAsync(item.TagNo);
+
+            dbItems.Add(new PidEquipment
+            {
+                TagNo = item.TagNo,
+                EquipmentName = item.EquipmentName,
+                InstrumentType = item.InstrumentType,
+                LineNumber = item.LineNumber,
+                PidDrawingNo = item.PidDrawingNo,
+                Confidence = item.Confidence,
+                ExperionTagId = experionTag?.Id,
+                ExtractedAt = DateTime.UtcNow,
+                UpdatedAt = DateTime.UtcNow
+            });
+        }
+
+        await _dbContext.PidEquipment.AddRangeAsync(dbItems);
+        await _dbContext.SaveChangesAsync();
+
+        _logger.LogInformation("P&ID 추출 완료: {Total}건 저장 (파일: {FileName})", dbItems.Count, fileName);
+
+        return new PidExtractionResult(
+            TotalCount: dbItems.Count,
+            ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7),
+            LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5));
+    }
+
+    private string ExtractDxfText(Stream stream) { ... }
+    private string ExtractPdfText(Stream stream) { ... }
+    private List<ExtractedItem> ParseJson(string json) { ... }
+    private Dictionary<string, string> ParseMappingJson(string json) { ... }
+    private async Task<RealtimePoint?> FindFallbackTagAsync(string tagNo) { ... }
+
+    // CRUD / 통계 / 내보내기 메서드들...
+}
+```
+
+**내부 파싱용 모델**:
+```csharp
+public class ExtractedItem
+{
+    public string TagNo { get; set; } = "";
+    public string? EquipmentName { get; set; }
+    public string? InstrumentType { get; set; }
+    public string? LineNumber { get; set; }
+    public string? PidDrawingNo { get; set; }
+    public double Confidence { get; set; } = 0.5;
+}
+
+public class MappingItem
+{
+    public string PidTag { get; set; } = "";
+    public string? ExperionTag { get; set; }
+    public double Confidence { get; set; }
+}
+```
+
+> **현황**: `PidExtractorService.cs`가 MCP 기반으로 완료됨. `netDxf`, `PdfPig`, `EPPlus` 패키지 추가 완료.
+
+---
+
+## 📦 추가된 패키지
+
+| 패키지 | 버전 | 용도 |
+|--------|------|------|
+| netDxf | 2022.11.2 | DXF 파일 파싱 |
+| PdfPig | 0.1.9 | PDF 텍스트 추출 |
+| EPPlus | 7.4.2 | Excel 내보내기 |
+
+---
+
+## ✅ 완료된 단계 (v4) - 추가
+
+### 단계 6: Database Migration ✅
+
+**파일**: [`src/Infrastructure/Database/ExperionDbContext.cs`](../src/Infrastructure/Database/ExperionDbContext.cs)
+
+```csharp
+public class ExperionDbContext : DbContext
+{
+    public DbSet<PidEquipment> PidEquipment => Set<PidEquipment>();
+    public DbSet<PidAuditLog> PidAuditLog => Set<PidAuditLog>();
+    
+    protected override void OnModelCreating(ModelBuilder modelBuilder)
+    {
+        // PidEquipment → RealtimePoint FK
+        modelBuilder.Entity<PidEquipment>(entity =>
+        {
+            entity.HasOne(e => e.ExperionTag)
+                  .WithMany()
+                  .HasForeignKey(e => e.ExperionTagId)
+                  .OnDelete(DeleteBehavior.SetNull);
+        });
+    }
+}
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `DbSet`과 FK 설정 모두 정상 구현됨.
+
+---
+
+### 단계 7: PidController 추가 ✅
+
+**파일**: [`src/Web/Controllers/ExperionControllers.cs`](../src/Web/Controllers/ExperionControllers.cs) (810-985행)
+
+```csharp
+[ApiController]
+[Route("api/pid")]
+public class ExperionPidController : ControllerBase
+{
+    private readonly IPidExtractorService _extractor;
+    private readonly ITagMappingService _mapping;
+
+    public ExperionPidController(IPidExtractorService extractor, ITagMappingService mapping)
+    {
+        _extractor = extractor;
+        _mapping = mapping;
+    }
+
+    [HttpPost("extract")]
+    [RequestSizeLimit(100 * 1024 * 1024)]
+    public async Task<IActionResult> Extract(IFormFile file) { ... }
+
+    [HttpGet("equipment")]
+    public async Task<IActionResult> GetEquipment(...) { ... }
+
+    [HttpGet("statistics")]
+    public async Task<IActionResult> GetStatistics() { ... }
+
+    [HttpPut("{id:long}/confidence")]
+    public async Task<IActionResult> UpdateConfidence(long id, [FromBody] double confidence) { ... }
+
+    [HttpPost("{id:long}/activate")]
+    public async Task<IActionResult> Activate(long id) { ... }
+
+    [HttpPost("{id:long}/deactivate")]
+    public async Task<IActionResult> Deactivate(long id) { ... }
+
+    [HttpGet("mappings")]
+    public async Task<IActionResult> GetMappings(...) { ... }
+
+    [HttpPost("mappings")]
+    public async Task<IActionResult> CreateMapping([FromBody] CreateMappingRequest req) { ... }
+
+    [HttpPut("mappings/{id:long}")]
+    public async Task<IActionResult> UpdateMapping(long id, [FromBody] UpdateMappingRequest req) { ... }
+
+    [HttpDelete("mappings/{id:long}")]
+    public async Task<IActionResult> ClearMapping(long id) { ... }
+
+    [HttpGet("mappings/available-tags")]
+    public async Task<IActionResult> GetAvailableTags() { ... }
+
+    [HttpGet("export/csv")]
+    public async Task<IActionResult> ExportCsv([FromQuery] string? tagNo) { ... }
+
+    [HttpGet("export/excel")]
+    public async Task<IActionResult> ExportExcel([FromQuery] string? tagNo) { ... }
+}
+```
+
+> **현황**: `ExperionControllers.cs`에 `ExperionPidController`가 완료됨.
+> **API 엔드포인트**: `/api/pid/*` (13개 엔드포인트)
+
+---
+
+### 단계 8: Program.cs 등록 ✅
+
+**파일**: [`src/Web/Program.cs`](../src/Web/Program.cs)
+
+```csharp
+// Line 85-86
+builder.Services.AddScoped<IPidExtractorService, PidExtractorService>();
+builder.Services.AddScoped<ITagMappingService, TagMappingService>();
+```
+
+> **현황**: 이미 코드베이스에 완료됨. `AddScoped` 등록 정상 구현됨.
+
+---
+
+### 단계 9~14: Frontend 및 기타 (2026-04-30)
+
+> **상태**: 단계 8까지 백엔드 완료. MCP 툴(`extract_pid_tags`, `match_pid_tags`) 구현 완료.
+> 단계 9-12 프론트엔드 구현 완료. 다음 단계는 통합 테스트입니다.
+
+| 단계 | 내용 | 상태 |
+|------|------|------|
+| 9 | `index.html`에 P&ID 탭 + pane 추가 | ✅ 완료 (2026-04-30) |
+| 10 | `app.js`에 P&ID 함수 추가 | ✅ 완료 (2026-04-30) |
+| 11 | `style.css`에 P&ID 스타일 추가 | ✅ 완료 (2026-04-30) |
+| 12 | `appsettings.json` Kestrel 설정 추가 | ✅ 완료 (2026-04-30) |
+| 13 | 실제 DXF/PDF 파일 업로드 테스트 | 대기 |
+| 14 | 수동 매핑 UI 동작 확인 | 대기 |
+
+---
+
+### 단계 9: index.html에 P&ID 탭 + pane 추가 ✅
+
+**파일**: [`src/Web/wwwroot/index.html`](../src/Web/wwwroot/index.html)
+
+**변경 내용**:
+1. 탭 목록에 P&ID 탭 추가 (11번째 탭)
+2. `pane-pid` 섹션 추가 (fastRecord 다음)
+
+**구조**:
+```html
+<li class="nav-item" data-tab="pid">
+  <span class="ni">11</span>
+  <span class="nl">P&ID 추출</span>
+</li>
+
+<section class="pane" id="pane-pid">
+  <!-- 파일 업로드 카드 -->
+  <div class="card">
+    <div class="card-cap">P&ID 파일 업로드</div>
+    <input type="file" id="pid-file-input" accept=".dxf,.pdf"/>
+    <button onclick="pidExtract()">🚀 추출 시작</button>
+  </div>
+  
+  <!-- 추출 결과 테이블 -->
+  <div class="card">
+    <table class="table" id="pid-table">
+      <thead>...</thead>
+      <tbody id="pid-table-body"></tbody>
+    </table>
+  </div>
+  
+  <!-- 통계 카드 -->
+  <div class="card">
+    <div class="stat-box">
+      <div class="stat-label">총 추출 건수</div>
+      <div class="stat-value" id="pid-stat-total">0</div>
+    </div>
+  </div>
+</section>
+```
+
+> **현황**: index.html에 P&ID 탭 및 pane 완료. fastRecord 다음에 추가.
+
+---
+
+### 단계 10: app.js에 P&ID 함수 추가 ✅
+
+**파일**: [`src/Web/wwwroot/js/app.js`](../src/Web/wwwroot/js/app.js)
+
+**추가된 함수**:
+- `pidExtract()` — 파일 업로드 및 추출 시작
+- `pidLoadTable(page)` — 추출 결과 테이블 로드
+- `pidRenderPagination(total, currentPage)` — 페이지네이션 렌더링
+- `pidUpdateStats()` — 통계 정보 업데이트
+- `pidClearLog()` — 로그 지우기
+- `pidOpenMapping(id)` — 매핑 모달 열기
+
+**API 연동**:
+- `/api/pid/extract` — POST (파일 업로드)
+- `/api/pid/equipment` — GET (목록 조회)
+- `/api/pid/export/csv` — GET (CSV 내보내기)
+- `/api/pid/export/excel` — GET (Excel 내보내기)
+
+> **현황**: app.js에 P&ID 함수 완료. 기존 패턴(인증서 관리, Text-to-SQL) 따름.
+
+---
+
+### 단계 11: style.css에 P&ID 스타일 추가 ✅
+
+**파일**: [`src/Web/wwwroot/css/style.css`](../src/Web/wwwroot/css/style.css)
+
+**추가된 스타일**:
+- `#pane-pid .btn-sm` — 버튼 스타일 (btn-a, btn-b)
+- `#pane-pid .badge` — 배지 스타일 (ok, warn, err)
+- `#pane-pid .stat-box` — 통계 박스 스타일
+- `#pane-pid .pagination` — 페이지네이션 스타일
+- `#pane-pid .logbox` — 로그 박스 스타일
+
+> **현황**: style.css에 P&ID 스타일 완료. 다크 테마 색상 사용.
+
+---
+
+### 단계 12: appsettings.json Kestrel 설정 추가 ✅
+
+**파일**: [`src/Web/appsettings.json`](../src/Web/appsettings.json)
+
+**추가된 설정**:
+```json
+{
+  "Kestrel": {
+    "Endpoints": {
+      "Http": {
+        "Url": "http://0.0.0.0:5000"
+      }
+    },
+    "Limits": {
+      "MaxRequestBodySize": 104857600
+    }
+  }
+}
+```
+
+**설명**:
+- `MaxRequestBodySize`: 100MB (DXF/PDF 파일 업로드용)
+- `Url`: 모든 네트워크 인터페이스에서 수신
+
+> **현황**: appsettings.json에 Kestrel 설정 완료.
+
+---
+
+## 📋 구현 순서 및 체크리스트
+
+### Phase 1 — 백엔드 (완료)
+
+- [x] **단계 1**: `ExperionCrawler.csproj`에 `netDxf`, `PdfPig`, `EPPlus` 패키지 추가 후 `dotnet build` 확인
+- [x] **단계 2**: `McpClient.cs`에 `ExtractPidTagsAsync`, `MatchPidTagsAsync` 메서드 추가
+- [x] **단계 3**: Python MCP 서버에 `extract_pid_tags`, `match_pid_tags` 툴 추가 및 테스트
+- [x] **단계 4**: `PidExtractorService.cs` 전체 교체
+- [x] **단계 5**: `ExperionControllers.cs`에 `ExperionPidController` 추가
+- [x] **단계 6**: `dotnet build` 에러 0건 확인 (2026-04-30 확인)
+- [ ] **단계 7**: Swagger(`/swagger`)에서 `/api/pid/*` 엔드포인트 노출 확인 (대기)
+
+### Phase 2 — 프론트엔드 (완료)
+
+- [x] **단계 9**: `index.html`에 P&ID 탭 + pane 추가 (2026-04-30)
+- [x] **단계 10**: `app.js`에 P&ID 함수 추가 (2026-04-30)
+- [x] **단계 11**: `style.css`에 P&ID 스타일 추가 (2026-04-30)
+- [x] **단계 12**: `appsettings.json` Kestrel 설정 추가 (2026-04-30)
+
+### Phase 3 — 통합 테스트 (대기)
+
+- [ ] 실제 DXF 파일 업로드 → 태그 추출 확인
+- [ ] 실제 PDF(텍스트) 파일 업로드 → 태그 추출 확인
+- [ ] 추출 결과 → Experion 태그 자동 매핑 제안 확인
+- [ ] 수동 매핑 UI 동작 확인
+- [ ] CSV/Excel 내보내기 확인
+- [ ] MCP 서버 다운 시 에러 처리 확인
+
+---
+
+## ⚠️ 주요 주의사항
+
+| 항목 | 내용 |
+|------|------|
+| MCP 서버 의존성 | `localhost:5001` 응답 없으면 추출 실패 — UI에서 `ping` 사전 확인 권장 |
+| netDxf 임시 파일 | DXF 파싱 시 `/tmp`에 임시 파일 생성/삭제 — 권한 및 디스크 여유 확인 |
+| PDF 텍스트 추출 실패 | 스캔본 PDF는 `ExtractPdfText()`가 빈 문자열 반환 → Vision 미구현 안내 메시지 |
+| 프롬프트 튜닝 | 도면 특성(한국어/영어, 태그 표기 방식)에 따라 MCP 서버 프롬프트 조정 필요 |
+| 대용량 DXF | 1만 개 이상 엔티티 시 LLM 컨텍스트 초과 가능 → `text[:12000]` 슬라이싱으로 제한 중 |
+| 태그 매핑 확신도 | `confidence < 0.7` 자동 매핑은 저장하지 않음 — 수동 매핑 유도 |
+
+---
+
+## 📝 코딩 가이드
+
+### 1. 기존 패턴 엄수
+
+```
+새 기능 추가 시 반드시 기존 코드 패턴을 따를 것
+
+✅ 엔티티:       [Table("테이블명")], [Column("컬럼명")] 어트리뷰트 필수
+✅ DbContext:    ExperionDbContext 단일 컨텍스트 — 별도 DbContext 생성 금지
+✅ 서비스 등록:  Program.cs에 AddScoped<Interface, Implementation>() 형태
+✅ 컨트롤러:     ExperionControllers.cs 단일 파일에 추가 (별도 파일 생성 금지)
+✅ 탭 진입:      API 자동 호출 금지 — 버튼 클릭으로만 동작
+✅ DOM 렌더링:   innerHTML += 루프 금지 — rows 배열 .join('') 후 한번에 설정
+✅ 로깅:         Console.WriteLine 금지 — ILogger<T> 사용
+```
+
+### 2. MCP 서버 연동 패턴
+
+```
+C# 서비스에서 MCP 호출 시 McpClient를 직접 주입.
+IMcpService를 통하지 않아도 됨 (McpClient는 Singleton 등록).
+```
+
+```csharp
+// 올바른 패턴
+public MyService(McpClient mcp, ExperionDbContext db, ILogger<MyService> logger)
+
+// 금지
+public MyService(IMcpService mcp)  // 래핑 레이어가 필요할 때만 사용
+```
+
+### 3. Python MCP 툴 작성 규칙
+
+```python
+# 응답은 반드시 순수 JSON 문자열 반환
+# 코드펜스(```json) 제거 후 반환
+# LLM 응답에서 JSON 배열 추출: re.search(r'\[.*\]', raw, re.DOTALL)
+# temperature=0.1 고정 (결정론적 출력)
+# 텍스트 슬라이싱: text[:12000] (컨텍스트 초과 방지)
+```
+
+### 4. 프론트엔드 규칙
+
+```
+✅ 함수 기반 작성 — class 사용 금지
+✅ 기존 헬퍼 함수 재사용: esc(), log(), setGlobal(), api()
+✅ 페이지네이션: pidPagination() 패턴 — 전체 페이지 버튼 생성 금지 (±3 범위)
+✅ 다크 테마 색상 사용: #1e1e1e, #2d2d2d, #ccc, .ok/.err/.warn CSS 클래스
+✅ Bootstrap 클래스 사용 금지
+✅ fetch 에러 처리: if (!res.ok) throw new Error(...)
+```
+
+### 5. 빌드 검증 체크포인트
+
+```bash
+# 각 단계 완료 후 실행
+dotnet build src/Web/ExperionCrawler.csproj
+
+# 목표: 경고 N건, 에러 0건
+# netDxf/PdfPig/EPPlus 추가 후 새 경고 없어야 정상
+```
+
+---
+
+## 🧐 감독자 진단 — 단계 8까지 완료 여부 (2026-04-30 기준)
+
+| 항목 | 확인 내용 | 상태 |
+|------|-----------|------|
+| **엔티티** | `PidEquipment.cs`, `PidAuditLog.cs` 존재 | ✅ |
+| **DbSet** | `ExperionDbContext.PidEquipment`, `PidAuditLog` 등록 | ✅ |
+| **FK 설정** | `ExperionTagId` → `RealtimePoint.Id` FK 설정 | ✅ |
+| **DTOs** | `PidEquipmentDto`, `PidExtractionResult`, `TagMappingDtos` 존재 | ✅ |
+| **인터페이스** | `IPidExtractorService`, `ITagMappingService` 정의 | ✅ |
+| **서비스** | `TagMappingService`, `PidExtractorService` 구현 | ✅ |
+| **컨트롤러** | `ExperionPidController` (13개 엔드포인트) | ✅ |
+| **Program.cs** | `AddScoped<IPidExtractorService>`, `AddScoped<ITagMappingService>` | ✅ |
+| **MCP 툴** | `extract_pid_tags`, `match_pid_tags` 구현 | ✅ |
+| **빌드** | `dotnet build` 에러 0건 | ✅ |
+
+> **진단 결과**: 단계 1-8 완료. MCP 툴 추가 완료. 다음 단계는 프론트엔드 구현(단계 9-12) 및 통합 테스트(단계 13-14)입니다.
+
+### 7. MCP 툴 독립 테스트
+
+```bash
+# Python MCP 서버 직접 테스트 (C# 없이)
+curl -X POST http://localhost:5001/mcp \
+  -H "Content-Type: application/json" \
+  -H "mcp-protocol-version: 2025-03-26" \
+  -d '{"jsonrpc":"2.0","id":"1","method":"tools/call",
+       "params":{"name":"extract_pid_tags",
+                 "arguments":{"text":"FT-101 Flow Transmitter\nPT-201 Pressure","source_type":"dxf"}}}'
+# 기대 응답: [{"tagNo":"FT-101",...},{"tagNo":"PT-201",...}]
+```
--- a/futurePlan/p-9100-모형-page1.png
+++ b/futurePlan/p-9100-모형-page1.png
--- a/futurePlan/p-9100-모형.pdf
+++ b/futurePlan/p-9100-모형.pdf
--- a/futurePlan/pid_parser_coding_plan.md
+++ b/futurePlan/pid_parser_coding_plan.md
--- a/futurePlan/plant-9100-extracted.md
+++ b/futurePlan/plant-9100-extracted.md
@@ -0,0 +1,21 @@
+# plant-9100.pdf 추출 결과
+
+## PDF 정보
+
+- **버전**: 1.6
+- **페이지 수**: 1
+- **제목**: (없음)
+- **작성자**: (없음)
+- **생성 프로그램**: ezPDF Builder Supreme
+- **생성일**: D:20260430083400+09'00'
+
+## 페이지 1
+
+- **크기**: 595.2 x 841.92
+
+### 추출 텍스트
+
+```
+E-1011710th PLANT 증설공사주식회사 한울T-10101P-10114P-10101F-10102A/BP-10116E-10115BC-10111E-10117E-10119T-10100T-3210VP-10117SP-10601D-10113E-10112P-10118DP-1010112024.04.04K.S.YJ.O.YH.J.IAS BUILT23---4---5---6---TTE-10115A2025.06.17---REVISIONE-10103OGDEN PUMPTOGDEN PUMP2025.06.20REVISIONREVISION2025.06.232025.07.02REVISION2025.07.07REVISION
+```
+
--- a/futurePlan/plant-9100-extracted.txt
+++ b/futurePlan/plant-9100-extracted.txt
@@ -0,0 +1,70 @@
+--- Page 1 ---
+
+주식회사
+한울
+10th
+PLANT
+증설공사
+1
+2024.04.04
+AS
+BUILT
+K.S.Y
+J.O.Y
+H.J.I
+2
+2025.06.17
+REVISION
+-
+-
+-
+E-10115A
+E-10119
+3
+2025.06.20
+REVISION
+-
+-
+-
+P-10101
+4
+2025.06.23
+REVISION
+-
+-
+-
+P-10116
+5
+2025.07.02
+REVISION
+-
+-
+-
+6
+2025.07.07
+REVISION
+-
+-
+-
+F-10102A/B
+T
+E-10103
+OGDEN
+PUMP
+T
+P-10118
+E-10115B
+T-10101
+T-10100
+DP-10101
+E-10117
+P-10114
+OGDEN
+PUMP
+T
+E-10117
+T-3210
+SP-10601
+VP-10117
+C-10111
+D-10113 E-10112
--- a/futurePlan/plant-9100-page1.png
+++ b/futurePlan/plant-9100-page1.png
--- a/futurePlan/plant-9100-tag.md
+++ b/futurePlan/plant-9100-tag.md
--- a/futurePlan/plant-9100.pdf
+++ b/futurePlan/plant-9100.pdf
--- a/futurePlan/plant-9200.pdf
+++ b/futurePlan/plant-9200.pdf
--- a/log-handling-plan.md
+++ b/log-handling-plan.md
@@ -0,0 +1,394 @@
+# 로그 처리 계획 (Log Handling Plan)
+
+## 문제 상황
+
+터미널에 너무 많은 로그가 출력되어 정신없음
+
+### 발생 로그 예시
+
+```
+info: Microsoft.EntityFrameworkCore.Database.Command[20101]
+      Executed DbCommand (2ms) [Parameters=[@__u_Timestamp_2='?' (DbType = DateTime), @__u_Value_1='?', @__u_NodeId_0='?'], CommandType='Text', CommandTimeout='30']
+      UPDATE realtime_table AS r
+      SET timestamp = @__u_Timestamp_2,
+          livevalue = @__u_Value_1
+      WHERE r.node_id = @__u_NodeId_0
+info: ExperionCrawler.Infrastructure.OpcUa.ExperionRealtimeService[0]
+      Update realtime info.
+```
+
+### 원인 분석
+
+1. **EF Core DB Command 로그** (`Microsoft.EntityFrameworkCore.Database.Command`)
+   - `appsettings.json`에서 `Microsoft.EntityFrameworkCore.Database.Command`가 `"Information"`으로 설정
+   - 매 500ms마다 `UPDATE realtime_table` 쿼리 실행 시 로그 출력
+
+2. **ExperionRealtimeService 로그**
+   - `FlushPendingAsync()`에서 `_logger.LogDebug()`로 배치 업데이트 로그 남김
+   - `ExperionCrawler.Infrastructure.OpcUa.ExperionRealtimeService` 카테고리가 `"Information"`으로 설정
+
+---
+
+## 해결 방안
+
+### 제안 1: appsettings.json 수정 (가장 간단)
+
+```json
+{
+  "Logging": {
+    "LogLevel": {
+      "Default": "Information",
+      "Microsoft.AspNetCore": "Warning",
+      "Microsoft.EntityFrameworkCore": "Information",
+      "Microsoft.EntityFrameworkCore.Database.Command": "Warning",
+      "ExperionCrawler.Infrastructure.OpcUa.ExperionRealtimeService": "Warning"
+    }
+  }
+}
+```
+
+**효과:**
+- `UPDATE realtime_table` 쿼리 로그 숨김
+- `[Realtime] 배치 업데이트: X/Y건` 로그 숨김
+- 경고/오류만 표시
+
+---
+
+### 제안 2: 개발/배포 환경 분리 (추천)
+
+**appsettings.json** (배포용 - 로그 적게)
+```json
+{
+  "Logging": {
+    "LogLevel": {
+      "Default": "Information",
+      "Microsoft.AspNetCore": "Warning",
+      "Microsoft.EntityFrameworkCore": "Warning",
+      "Microsoft.EntityFrameworkCore.Database.Command": "Warning",
+      "ExperionCrawler": "Information"
+    }
+  }
+}
+```
+
+**appsettings.Development.json** (개발용 - 로그 많게)
+```json
+{
+  "Logging": {
+    "LogLevel": {
+      "Default": "Debug",
+      "Microsoft.EntityFrameworkCore": "Debug",
+      "Microsoft.EntityFrameworkCore.Database.Command": "Information",
+      "ExperionCrawler": "Debug"
+    }
+  }
+}
+```
+
+**사용 방법:**
+- 개발 모드: `dotnet run` → 로그 자세히 보임
+- 배포 모드: `dotnet run --configuration Release` → 로그 적게 보임
+
+---
+
+### 제안 3: 코드에서 로그 레벨 조정
+
+**Program.cs**에서 로그 필터 추가:
+```csharp
+builder.Logging.AddFilter("Microsoft.EntityFrameworkCore.Database.Command", LogLevel.Warning);
+builder.Logging.AddFilter("ExperionCrawler", LogLevel.Information);
+```
+
+**ExperionRealtimeService.cs**의 `FlushPendingAsync()`에서:
+- `_logger.LogDebug()` → `_logger.LogTrace()` (가장 자세한 로그는 기본적으로 안 보임)
+
+---
+
+### 제안 4: 주석 처리 + 나중에 복구용
+
+**appsettings.json**에 주석으로 메모:
+```json
+{
+  "Logging": {
+    "LogLevel": {
+      // 개발 시 주석 해제: 로그 자세히 보기
+      // "Microsoft.EntityFrameworkCore.Database.Command": "Information",
+      // "ExperionCrawler": "Debug",
+      
+      // 배포 시: 로그 적게 (기본)
+      "Microsoft.EntityFrameworkCore.Database.Command": "Warning",
+      "ExperionCrawler": "Information"
+    }
+  }
+}
+```
+
+---
+
+## 검수 체크리스트
+
+- [ ] 로그 레벨 조정 후 터미널이 깔끔한지 확인
+- [ ] 필요한 로그가 사라지지 않았는지 확인
+- [ ] 개발 모드에서 로그를 다시 볼 수 있는지 확인
+
+---
+
+## 참고: 로그 레벨 순서
+
+```
+Trace    (가장 자세함 - 기본적으로 안 보임)
+Debug    (개발 시 유용 - 기본적으로 안 보임)
+Information (일반 로그 - 기본 표시)
+Warning  (경고 - 기본 표시)
+Error    (오류 - 기본 표시)
+Critical (치명적 오류 - 기본 표시)
+```
+
+---
+
+## 제안 5: UI에서 동적 로그 레벨 설정 (추천 + 개발자 친화)
+
+### 개요
+웹 UI에서 실시간으로 로그 레벨을 조정할 수 있도록 API를 추가하여, 개발 중에 코드 수정 없이 로그를 켜고 끌 수 있게 함.
+
+### 아키텍처
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        Web UI (app.js)                      │
+│  ┌──────────────────────────────────────────────────────┐   │
+│  │  로그 레벨 설정 UI (Toggle Switch)                   │   │
+│  │  - EF Core Query Log (ON/OFF)                        │   │
+│  │  - Realtime Service Log (ON/OFF)                     │   │
+│  │  - Debug Mode (ON/OFF)                               │   │
+│  └──────────────────────────────────────────────────────┘   │
+└─────────────────────────────────────────────────────────────┘
+                            │
+                            ▼
+┌─────────────────────────────────────────────────────────────┐
+│              LogSettingsController (API)                    │
+│  - GET  /api/log/settings  → 현재 설정 조회                 │
+│  - POST /api/log/settings  → 설정 업데이트                  │
+└─────────────────────────────────────────────────────────────┘
+                            │
+                            ▼
+┌─────────────────────────────────────────────────────────────┐
+│              LogSettingsService (비즈니스 로직)             │
+│  - 로그 레벨 설정 저장 (appsettings.json)                   │
+│  - 로거 리로드 (새로운 설정 적용)                           │
+└─────────────────────────────────────────────────────────────┘
+```
+
+### 구현 계획
+
+#### 1. DTO 정의 (`src/Core/Application/DTOs/LogSettingsDtos.cs`)
+
+```csharp
+namespace ExperionCrawler.Core.Application.DTOs;
+
+public class LogSettingsRequest
+{
+    public bool? EnableEfCoreQueryLog { get; set; }
+    public bool? EnableRealtimeServiceLog { get; set; }
+    public bool? EnableDebugMode { get; set; }
+}
+
+public class LogSettingsResponse
+{
+    public bool EfCoreQueryLog { get; set; }
+    public bool RealtimeServiceLog { get; set; }
+    public bool DebugMode { get; set; }
+}
+```
+
+#### 2. 서비스 인터페이스 (`src/Core/Application/Interfaces/ILogSettingsService.cs`)
+
+```csharp
+namespace ExperionCrawler.Core.Application.Interfaces;
+
+public interface ILogSettingsService
+{
+    Task<LogSettingsResponse> GetSettingsAsync();
+    Task UpdateSettingsAsync(LogSettingsRequest request);
+}
+```
+
+#### 3. 서비스 구현 (`src/Core/Application/Services/LogSettingsService.cs`)
+
+```csharp
+namespace ExperionCrawler.Core.Application.Services;
+
+public class LogSettingsService : ILogSettingsService
+{
+    private readonly IConfiguration _config;
+    private readonly ILogger<LogSettingsService> _logger;
+
+    public LogSettingsService(IConfiguration config, ILogger<LogSettingsService> logger)
+    {
+        _config = config;
+        _logger = logger;
+    }
+
+    public async Task<LogSettingsResponse> GetSettingsAsync()
+    {
+        var loggingSection = _config.GetSection("Logging");
+        
+        return new LogSettingsResponse
+        {
+            EfCoreQueryLog = IsLogLevelEnabled(loggingSection, "Microsoft.EntityFrameworkCore.Database.Command", "Information"),
+            RealtimeServiceLog = IsLogLevelEnabled(loggingSection, "ExperionCrawler", "Information"),
+            DebugMode = IsLogLevelEnabled(loggingSection, "Default", "Debug")
+        };
+    }
+
+    public async Task UpdateSettingsAsync(LogSettingsRequest request)
+    {
+        // appsettings.json 읽기
+        var configPath = "appsettings.json";
+        var json = await File.ReadAllTextAsync(configPath);
+        var doc = JsonDocument.Parse(json);
+        
+        // 로그 레벨 업데이트
+        // (구현은 간단히 appsettings.Development.json 사용하는 방식으로)
+        
+        _logger.LogInformation("[LogSettings] 설정 업데이트: {Request}", request);
+    }
+
+    private bool IsLogLevelEnabled(IConfigurationSection section, string category, string level)
+    {
+        var logLevelSection = section.GetSection("LogLevel");
+        var value = logLevelSection[category];
+        return !string.IsNullOrEmpty(value) && value == level;
+    }
+}
+```
+
+#### 4. 컨트롤러 (`src/Web/Controllers/LogSettingsController.cs`)
+
+```csharp
+namespace ExperionCrawler.Web.Controllers;
+
+[ApiController]
+[Route("api/[controller]")]
+public class LogSettingsController : ControllerBase
+{
+    private readonly ILogSettingsService _service;
+
+    public LogSettingsController(ILogSettingsService service)
+    {
+        _service = service;
+    }
+
+    [HttpGet]
+    public async Task<ActionResult<LogSettingsResponse>> GetSettings()
+    {
+        var settings = await _service.GetSettingsAsync();
+        return Ok(new
+        {
+            efCoreQueryLog = settings.EfCoreQueryLog,
+            realtimeServiceLog = settings.RealtimeServiceLog,
+            debugMode = settings.DebugMode
+        });
+    }
+
+    [HttpPost]
+    public async Task<ActionResult> UpdateSettings([FromBody] LogSettingsRequest request)
+    {
+        await _service.UpdateSettingsAsync(request);
+        return Ok();
+    }
+}
+```
+
+#### 5. UI 변경 (`src/Web/wwwroot/js/app.js`)
+
+```javascript
+// 로그 설정 UI 추가
+function createLogSettingsUI() {
+    const container = document.createElement('div');
+    container.style.cssText = `
+        position: fixed;
+        top: 10px;
+        right: 10px;
+        background: rgba(0,0,0,0.8);
+        padding: 15px;
+        border-radius: 8px;
+        z-index: 1000;
+        color: white;
+        font-family: monospace;
+    `;
+    
+    container.innerHTML = `
+        <h3 style="margin: 0 0 10px 0; font-size: 14px;">Log Settings</h3>
+        <div style="display: flex; flex-direction: column; gap: 5px;">
+            <label style="display: flex; align-items: center; gap: 5px; font-size: 12px;">
+                <input type="checkbox" id="log-efcore"> EF Core Query
+            </label>
+            <label style="display: flex; align-items: center; gap: 5px; font-size: 12px;">
+                <input type="checkbox" id="log-realtime"> Realtime Service
+            </label>
+            <label style="display: flex; align-items: center; gap: 5px; font-size: 12px;">
+                <input type="checkbox" id="log-debug"> Debug Mode
+            </label>
+        </div>
+    `;
+    
+    document.body.appendChild(container);
+    
+    // 초기 설정 로드
+    loadLogSettings();
+    
+    // 체크박스 이벤트
+    document.getElementById('log-efcore').addEventListener('change', (e) => updateLogSetting('efCoreQueryLog', e.target.checked));
+    document.getElementById('log-realtime').addEventListener('change', (e) => updateLogSetting('realtimeServiceLog', e.target.checked));
+    document.getElementById('log-debug').addEventListener('change', (e) => updateLogSetting('debugMode', e.target.checked));
+}
+
+async function loadLogSettings() {
+    try {
+        const res = await fetch('/api/logsettings');
+        const data = await res.json();
+        document.getElementById('log-efcore').checked = data.efCoreQueryLog;
+        document.getElementById('log-realtime').checked = data.realtimeServiceLog;
+        document.getElementById('log-debug').checked = data.debugMode;
+    } catch (e) {
+        console.error('Failed to load log settings:', e);
+    }
+}
+
+async function updateLogSetting(key, value) {
+    try {
+        await fetch('/api/logsettings', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ [key]: value })
+        });
+    } catch (e) {
+        console.error('Failed to update log setting:', e);
+    }
+}
+```
+
+#### 6. Program.cs 등록
+
+```csharp
+// LogSettingsService 등록
+builder.Services.AddScoped<ILogSettingsService, LogSettingsService>();
+```
+
+### 사용 방법
+
+1. 웹 UI에 로그 설정 패널이 오른쪽 상단에 표시됨
+2. 체크박스를 켜고 끄면 실시간으로 로그 레벨이 변경됨
+3. 설정은 세션 간 유지되지 않음 (개발 편의용)
+
+### 장점
+- 코드 수정 없이 로그 테스트 가능
+- 실시간으로 로그 켜고 끌 수 있음
+- UI에서 직관적으로 조작 가능
+
+### 단점
+- 설정이 메모리에만 저장됨 (재시작 시 초기화)
+- 영구 설정을 원하면 appsettings.json 수정 필요
+
+---
--- a/mcp-server/pycache/server.cpython-312.pyc
+++ b/mcp-server/pycache/server.cpython-312.pyc
--- a/mcp-server/pipeline/pycache/analyzer.cpython-312.pyc
+++ b/mcp-server/pipeline/pycache/analyzer.cpython-312.pyc
--- a/mcp-server/pipeline/pycache/extractor.cpython-312.pyc
+++ b/mcp-server/pipeline/pycache/extractor.cpython-312.pyc
--- a/mcp-server/pipeline/pycache/mapper.cpython-312.pyc
+++ b/mcp-server/pipeline/pycache/mapper.cpython-312.pyc
--- a/mcp-server/pipeline/pycache/topology.cpython-312.pyc
+++ b/mcp-server/pipeline/pycache/topology.cpython-312.pyc
--- a/mcp-server/pipeline/analyzer.py
+++ b/mcp-server/pipeline/analyzer.py
@@ -0,0 +1,78 @@
+import networkx as nx
+from typing import Dict, List, Optional
+import json
+import os
+
+class PidAnalysisEngine:
+    def __init__(self, topology_file: str, mapping_file: str):
+        self.topology_file = topology_file
+        self.mapping_file = mapping_file
+        self.graph = nx.DiGraph()
+        self.tag_mapping = {}
+        self.load_data()
+
+    def load_data(self):
+        """그래프 및 매핑 데이터 로드"""
+        try:
+            if os.path.exists(self.topology_file):
+                with open(self.topology_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    # NetworkX 그래프 생성 (node_link_data 형식 가정)
+                    for node in data.get('nodes', []):
+                        self.graph.add_node(node['id'], **node)
+                    for edge in data.get('links', []): # node_link_data는 'links' 사용
+                        self.graph.add_edge(edge['source'], edge['target'], **edge)
+            
+            if os.path.exists(self.mapping_file):
+                with open(self.mapping_file, 'r', encoding='utf-8') as f:
+                    self.tag_mapping = json.load(f)
+        except Exception as e:
+            print(f"Error loading analysis data: {e}")
+
+    def get_propagation_path_with_flow(self, start_node: str):
+        """
+        엣지의 방향성(flow_direction)과 상태(valve_status)를 고려한 실제 영향 전파 경로 추출
+        """
+        if start_node not in self.graph:
+            return {}
+
+        # 1. 유효한 엣지만 필터링 (방향이 forward이고 밸브가 open인 경로)
+        valid_edges = [
+            (u, v) for u, v, d in self.graph.edges(data=True) 
+            if d.get('flow_direction', 'forward') == 'forward' 
+            and d.get('valve_status', 'open') == 'open'
+        ]
+        
+        filtered_graph = nx.DiGraph()
+        filtered_graph.add_edges_from(valid_edges)
+        
+        # 2. 전파 단계별 노드 추출 (BFS)
+        try:
+            propagation_levels = nx.single_source_shortest_path_length(filtered_graph, start_node)
+            return propagation_levels
+        except Exception:
+            return {}
+
+    def analyze_impact(self, node_id: str):
+        """특정 노드 장애 시 영향도 분석 결과 반환"""
+        if node_id not in self.graph:
+            return {"success": False, "error": f"Node {node_id} not found in topology"}
+        
+        impact_map = self.get_propagation_path_with_flow(node_id)
+        
+        # 경로 추출 (시각화를 위해 모든 영향 노드로의 최단 경로 포함)
+        paths = []
+        for target in impact_map.keys():
+            if target != node_id:
+                try:
+                    path = nx.shortest_path(self.graph, source=node_id, target=target)
+                    paths.append(path)
+                except nx.NetworkXNoPath:
+                    continue
+        
+        return {
+            "success": True,
+            "startNode": node_id,
+            "impactedNodes": impact_map,
+            "paths": paths
+        }
--- a/mcp-server/pipeline/extractor.py
+++ b/mcp-server/pipeline/extractor.py
@@ -0,0 +1,173 @@
+import ezdxf
+import re
+import json
+from typing import List, Optional, Tuple, Union
+from pydantic import BaseModel, Field
+from shapely.geometry import box, Point
+
+# --- Data Models ---
+
+class BoundingBox(BaseModel):
+    min_x: float
+    min_y: float
+    max_x: float
+    max_y: float
+    center: Tuple[float, float]
+
+class GeometricEntity(BaseModel):
+    entity_id: str
+    entity_type: str  # TEXT, MTEXT, LINE, LWPOLYLINE, CIRCLE, ARC
+    layer: str
+    bbox: BoundingBox
+    raw_value: Optional[str] = None
+    clean_value: Optional[str] = None
+    coordinates: List[Union[Tuple[float, float], List[float]]] = Field(default_factory=list)
+    properties: dict = Field(default_factory=dict)
+
+# --- Extractor Implementation ---
+
+class PidGeometricExtractor:
+    def __init__(self, file_path: str):
+        try:
+            self.doc = ezdxf.readfile(file_path)
+            self.msp = self.doc.modelspace()
+        except Exception as e:
+            raise IOError(f"Failed to load DXF file: {e}")
+
+    def clean_text(self, text: str) -> str:
+        """
+        DXF 특수 제어 문자 및 MTEXT 포맷팅을 제거하여 정제된 텍스트 반환.
+        """
+        if not text:
+            return ""
+        
+        # 1. MTEXT 포맷팅 및 제어 문자 제거 (\P, \W, \L, \A, \C, \H, \S, \T 등)
+        text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text)
+        
+        # 2. 중괄호 { } 제거
+        text = re.sub(r'[\{\}]', ' ', text)
+        
+        # 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered)
+        text = re.sub(r'%%[U|O|S|R]', ' ', text)
+        
+        # 4. 불필요한 특수 기호 및 반복되는 공백 정제
+        text = re.sub(r'\s+', ' ', text).strip()
+        
+        return text
+
+    def get_bbox(self, entity) -> Optional[BoundingBox]:
+        """
+        엔티티 타입별로 동적인 Bounding Box를 계산하여 반환.
+        """
+        try:
+            if entity.dxftype() == 'TEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.height
+                # 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6)
+                width = len(entity.dxf.text) * h * 0.6
+                return self._create_bbox(p.x, p.y, p.x + width, p.y + h)
+            
+            elif entity.dxftype() == 'MTEXT':
+                p = entity.dxf.insert
+                h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5
+                w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6
+                return self._create_bbox(p.x, p.y, p.x + w, p.y + h)
+            
+            elif entity.dxftype() == 'LINE':
+                start = entity.dxf.start
+                end = entity.dxf.end
+                return self._create_bbox(
+                    min(start.x, end.x), min(start.y, end.y),
+                    max(start.x, end.x), max(start.y, end.y)
+                )
+            
+            elif entity.dxftype() == 'LWPOLYLINE':
+                points = entity.get_points()
+                if not points: return None
+                xs = [p[0] for p in points]
+                ys = [p[1] for p in points]
+                return self._create_bbox(min(xs), min(ys), max(xs), max(ys))
+            
+            elif entity.dxftype() in ('CIRCLE', 'ARC'):
+                center = entity.dxf.center
+                radius = entity.dxf.radius
+                return self._create_bbox(
+                    center.x - radius, center.y - radius,
+                    center.x + radius, center.y + radius
+                )
+                
+        except Exception as e:
+            print(f"Error calculating bbox for {entity.dxftype()} ({entity.dxf.handle}): {e}")
+        return None
+
+    def _create_bbox(self, min_x, min_y, max_x, max_y) -> BoundingBox:
+        return BoundingBox(
+            min_x=min_x,
+            min_y=min_y,
+            max_x=max_x,
+            max_y=max_y,
+            center=((min_x + max_x) / 2, (min_y + max_y) / 2)
+        )
+
+    def extract_and_save(self, output_path: str):
+        """
+        기하학적 데이터를 추출하여 JSON 파일로 저장.
+        """
+        results = []
+        for entity in self.msp:
+            bbox_obj = self.get_bbox(entity)
+            if not bbox_obj:
+                continue
+            
+            raw_text = ""
+            if entity.dxftype() == 'TEXT':
+                raw_text = entity.dxf.text
+            elif entity.dxftype() == 'MTEXT':
+                raw_text = entity.text
+            
+            # 좌표 추출 (3D 좌표를 2D로 변환)
+            coords = []
+            if hasattr(entity, 'get_points'):
+                # ezdxf의 get_points()는 (x, y, z) 튜플 리스트를 반환함
+                coords = [(p[0], p[1]) for p in entity.get_points()]
+            elif entity.dxftype() == 'LINE':
+                coords = [(entity.dxf.start.x, entity.dxf.start.y), (entity.dxf.end.x, entity.dxf.end.y)]
+            elif entity.dxftype() in ('CIRCLE', 'ARC'):
+                coords = [(entity.dxf.center.x, entity.dxf.center.y)]
+
+            entity_data = GeometricEntity(
+                entity_id=entity.dxf.handle,
+                entity_type=entity.dxftype(),
+                layer=entity.dxf.layer,
+                bbox=bbox_obj,
+                raw_value=raw_text if raw_text else None,
+                clean_value=self.clean_text(raw_text) if raw_text else None,
+                coordinates=coords,
+                properties={
+                    "color": entity.dxf.color,
+                    "lineweight": entity.dxf.lineweight if hasattr(entity.dxf, 'lineweight') else None,
+                }
+            )
+            results.append(entity_data.model_dump())
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, ensure_ascii=False, indent=4)
+        
+        return output_path
+
+# --- Proximity Utilities ---
+
+def is_near(bbox_a: BoundingBox, bbox_b: BoundingBox, threshold=5.0) -> bool:
+    """
+    두 Bounding Box 간의 최단 거리가 임계값 이내인지 확인.
+    shapely box를 사용하여 거리 계산.
+    """
+    box_a = box(bbox_a.min_x, bbox_a.min_y, bbox_a.max_x, bbox_a.max_y)
+    box_b = box(bbox_b.min_x, bbox_b.min_y, bbox_b.max_x, bbox_b.max_y)
+    return box_a.distance(box_b) <= threshold
+
+def is_inside(point: Tuple[float, float], bbox: BoundingBox) -> bool:
+    """
+    특정 점이 Bounding Box 내부에 있는지 확인.
+    """
+    return (bbox.min_x <= point[0] <= bbox.max_x) and (bbox.min_y <= point[1] <= bbox.max_y)
--- a/mcp-server/pipeline/mapper.py
+++ b/mcp-server/pipeline/mapper.py
@@ -0,0 +1,122 @@
+import networkx as nx
+import asyncio
+import json
+from typing import List, Optional, Dict, Any, Tuple
+from pydantic import BaseModel, Field
+from rapidfuzz import process, fuzz
+from openai import AsyncOpenAI
+
+# --- 응답 구조화를 위한 Pydantic 모델 ---
+class MappingResult(BaseModel):
+    resolved_tag: str = Field(..., description="The final mapped system tag")
+    reason: str = Field(..., description="Reason for this mapping based on context")
+    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score from 0 to 1")
+
+class IntelligentMapper:
+    def __init__(self, graph: nx.Graph, system_tags: List[str], api_client: Optional[AsyncOpenAI] = None):
+        self.graph = graph          # Phase 2에서 생성된 NetworkX 그래프
+        self.system_tags = system_tags # Experion 시스템의 전체 태그 리스트
+        self.client = api_client
+
+    def get_node_context(self, node_id: str) -> str:
+        """노드의 주변 위상 정보를 텍스트로 변환"""
+        if not self.graph.has_node(node_id):
+            return "Node not found in graph"
+            
+        neighbors = list(self.graph.neighbors(node_id))
+        context = []
+        for n in neighbors:
+            attr = self.graph.nodes[n]
+            val = attr.get('value', n)
+            typ = attr.get('type', 'Unknown')
+            context.append(f"Connected to {val} (Type: {typ})")
+        
+        return ", ".join(context) if context else "No connected neighbors"
+
+    async def _resolve_generic(self, node_id: str, category_prompt: str) -> MappingResult:
+        """공통 매핑 로직 (비동기 + 구조화 응답)"""
+        if not self.client:
+            return MappingResult(resolved_tag="UNKNOWN", reason="API Client not provided", confidence=0.0)
+
+        # Phase 2에서 'value'에 clean_value가 저장됨
+        node_data = self.graph.nodes.get(node_id, {})
+        tag_text = node_data.get('value', '')
+        
+        # 1차 후보 추출 (RapidFuzz)
+        candidates = process.extract(tag_text, self.system_tags, scorer=fuzz.WRatio, limit=5)
+        context = self.get_node_context(node_id)
+        
+        prompt = f"""
+        {category_prompt}
+        P&ID 도면의 태그 '{tag_text}'를 실제 시스템 태그와 매핑해야 합니다.
+        위상 맥락: {context}
+        후보 리스트: {candidates}
+        
+        반드시 다음 JSON 형식으로만 응답하세요:
+        {{
+            "resolved_tag": "태그명 또는 UNKNOWN",
+            "reason": "매핑 이유",
+            "confidence": 0.0~1.0
+        }}
+        """
+        
+        try:
+            response = await self.client.chat.completions.create(
+                model="Qwen/Qwen3-Coder-Next-FP8", # MCP 서버 설정 모델 사용
+                messages=[{"role": "user", "content": prompt}],
+                response_format={ "type": "json_object" }
+            )
+            raw_content = response.choices[0].message.content
+            return MappingResult.model_validate_json(raw_content)
+        except Exception as e:
+            print(f"Error resolving node {node_id}: {e}")
+            return MappingResult(resolved_tag="UNKNOWN", reason=f"Error: {str(e)}", confidence=0.0)
+
+    # --- 전문화된 Worker 함수들 ---
+    async def extract_transmitters(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 계측기 전문 엔지니어입니다. 특히 Pressure/Flow/Level Transmitter 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+    async def extract_valves(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 밸브 및 액추에이터 전문 엔지니어입니다. 밸브의 개폐 상태 및 제어 태그 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+    async def extract_equipment(self, node_ids: List[str]) -> Dict[str, MappingResult]:
+        prompt = "당신은 공정 설비 전문 엔지니어입니다. 펌프, 탱크, 열교환기 등의 메인 설비 태그 매핑에 특화되어 있습니다."
+        tasks = [self._resolve_generic(nid, prompt) for nid in node_ids]
+        results = await asyncio.gather(*tasks)
+        return dict(zip(node_ids, results))
+
+def validate_mapping(resolved_tag: str, symbol_type: str, tag_metadata: Dict[str, Any]) -> Tuple[bool, str]:
+    """심볼 타입과 실제 태그 메타데이터의 엄격한 일치 여부 검증"""
+    if resolved_tag == "UNKNOWN":
+        return False, "Tag not resolved"
+
+    unit_map = {
+        "Pressure Transmitter": ["bar", "psi", "kPa", "Pa"],
+        "Flow Meter": ["m3/h", "lpm", "kg/h"],
+        "Temperature Sensor": ["°C", "C", "K", "°F"]
+    }
+    
+    actual_unit = tag_metadata.get('unit', '').strip()
+    allowed_units = unit_map.get(symbol_type, [])
+    
+    if actual_unit and actual_unit in allowed_units:
+        return True, "Unit Match"
+    
+    actual_desc = tag_metadata.get('description', '').lower()
+    expected_keywords = {
+        "Pressure Transmitter": ["pressure", "press"],
+        "Flow Meter": ["flow", "flowrate"],
+        "Temperature Sensor": ["temp", "temperature"]
+    }
+    
+    keywords = expected_keywords.get(symbol_type, [])
+    if any(kw in actual_desc for kw in keywords):
+        return True, "Description Match (Unit Missing)"
+        
+    return False, "Mismatch: Symbol type and Tag metadata do not align"
--- a/mcp-server/pipeline/topology.py
+++ b/mcp-server/pipeline/topology.py
@@ -0,0 +1,123 @@
+import networkx as nx
+from shapely.geometry import box, Point, LineString
+import json
+from typing import List, Dict, Any, Optional, Tuple
+
+class PidTopologyBuilder:
+    def __init__(self, geometric_data: List[Dict[str, Any]], all_extracted_tags: Optional[List[Dict[str, Any]]] = None, config: Optional[Dict[str, float]] = None):
+        """
+        - geometric_data: Phase 1에서 추출된 기하학적 데이터 (List of dicts)
+        - all_extracted_tags: 통합된 태그 리스트
+        - config: {'dist_threshold': 50.0, 'tag_threshold': 100.0} 등 설정값
+        """
+        self.data = geometric_data
+        self.all_tags = all_extracted_tags if all_extracted_tags else []
+        self.config = config if config else {'dist_threshold': 50.0, 'tag_threshold': 100.0}
+        self.G = nx.DiGraph()      # 방향성 그래프 생성
+
+    def build_graph(self):
+        # 1. 모든 객체를 노드로 추가
+        for item in self.data:
+            bbox_vals = item['bbox']
+            # BoundingBox 모델의 필드명에 맞춰 추출 (min_x, min_y, max_x, max_y)
+            bbox_geom = box(bbox_vals['min_x'], bbox_vals['min_y'], bbox_vals['max_x'], bbox_vals['max_y'])
+            
+            self.G.add_node(item['entity_id'],
+                           type=item['entity_type'],
+                           bbox=bbox_geom,
+                           value=item.get('clean_value'),
+                           layer=item.get('layer'))
+
+        # 2. 분산 추출된 태그 통합 및 노드 추가
+        for tag in self.all_tags:
+            bbox_vals = tag['bbox']
+            bbox_geom = box(bbox_vals['min_x'], bbox_vals['min_y'], bbox_vals['max_x'], bbox_vals['max_y'])
+            self.G.add_node(tag['entity_id'],
+                           type='TEXT',
+                           bbox=bbox_geom,
+                           value=tag.get('clean_value') or tag.get('tagName'))
+
+        # 3. 태그-설비 논리적 연결 (Association)
+        tags = [n for n, d in self.G.nodes(data=True) if d['type'] == 'TEXT']
+        equipments = [n for n, d in self.G.nodes(data=True) if d['type'] not in ['TEXT', 'LINE', 'LWPOLYLINE']]
+
+        for tag in tags:
+            best_match = self._find_nearest_equipment(tag, equipments)
+            if best_match:
+                self.G.add_edge(tag, best_match, relation='associated_with')
+
+        # 4. 배관 기반 물리적 연결 (Pipe) [개선됨: End-point 기반]
+        lines = [n for n, d in self.G.nodes(data=True) if d['type'] in ['LINE', 'LWPOLYLINE']]
+        for line_id in lines:
+            original_item = next((item for item in self.data if item['entity_id'] == line_id), None)
+            if not original_item or not original_item.get('coordinates'):
+                continue
+                
+            coords = original_item['coordinates']
+            line_geom = LineString(coords)
+            endpoints = [line_geom.coords[0], line_geom.coords[-1]]
+            
+            connected_nodes = []
+            for pt in endpoints:
+                p = Point(pt)
+                for eq_id in equipments:
+                    if self.G.nodes[eq_id]['bbox'].distance(p) < self.config['dist_threshold']:
+                        connected_nodes.append(eq_id)
+            
+            # 중복 제거
+            connected_nodes = list(set(connected_nodes))
+            
+            if len(connected_nodes) >= 2:
+                # 방향성 추론 로직 (단순화: 첫 번째 발견된 설비 -> 두 번째 발견된 설비)
+                self.G.add_edge(connected_nodes[0], connected_nodes[1], relation='pipe')
+            elif len(connected_nodes) == 1:
+                pass
+
+    def _find_nearest_equipment(self, tag_id, equipment_ids):
+        tag_bbox = self.G.nodes[tag_id]['bbox']
+        min_dist = float('inf')
+        nearest = None
+        for eq_id in equipment_ids:
+            eq_bbox = self.G.nodes[eq_id]['bbox']
+            dist = tag_bbox.distance(eq_bbox)
+            if dist < min_dist:
+                min_dist = dist
+                nearest = eq_id
+        return nearest if min_dist < self.config['tag_threshold'] else None
+
+    def validate_topology(self):
+        """위상 무결성 검증"""
+        isolated = list(nx.isolates(self.G))
+        return {
+            "isolated_nodes": isolated, 
+            "node_count": self.G.number_of_nodes(), 
+            "edge_count": self.G.number_of_edges()
+        }
+
+    def save_graph(self, output_path: str):
+        """그래프 구조를 JSON 형태로 저장"""
+        from networkx.readwrite import json_graph
+        data = json_graph.node_link_data(self.G)
+        
+        # shapely geometry 객체는 JSON 직렬화가 안 되므로 변환
+        for node in data['nodes']:
+            if 'bbox' in node:
+                bbox = node['bbox']
+                node['bbox'] = {
+                    'min_x': bbox.bounds[0],
+                    'min_y': bbox.bounds[1],
+                    'max_x': bbox.bounds[2],
+                    'max_y': bbox.bounds[3]
+                }
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=4)
+        return output_path
+
+def analyze_impact(graph, start_node):
+    """특정 설비 장애 시 하류(Downstream)에 영향을 받는 모든 노드 추출"""
+    if start_node not in graph:
+        return []
+    # BFS를 통해 도달 가능한 모든 노드 탐색
+    impacted_nodes = nx.descendants(graph, start_node)
+    return list(impacted_nodes)
--- a/mcp-server/pyproject.toml
+++ b/mcp-server/pyproject.toml
@@ -10,6 +10,18 @@ dependencies = [
    "openai>=1.0.0",
    "httpx>=0.27.0",
    "psycopg[binary]>=3.1.0",
+    # P&ID 파싱
+    "ezdxf>=1.3.0",
+    # ARM64 환경 지원: paddlepaddle 3.x는 ARM64 wheel 미지원
+    # 2.6.0~2.9.x는 소스 빌드 가능
+    "paddlepaddle>=2.6.0,<3.0.0",
+    # paddleocr 2.7.0+는 paddlepaddle 3.3.1을 요구하며 ARM64 wheel 미지원
+    # 2.6.0은 paddlepaddle 2.x를 지원하여 ARM64 설치 가능
+    "paddleocr>=2.6.0,<2.7.0",
+    "pymupdf>=1.24.0",
+    "scikit-learn>=1.3.0",
+    "numpy>=1.24.0",
+    "Pillow>=10.0.0",
 ]

 [project.scripts]
--- a/mcp-server/server.py
+++ b/mcp-server/server.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """
 ExperionCrawler Unified MCP Server
- RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen3-Coder-Next-FP8
+- RAG: Qdrant + Ollama nomic-embed-text + vLLM Qwen/Qwen3-Coder-Next-FP8
 - NL2SQL: 자연어 → LLM SQL 생성 → PostgreSQL 실행
 - 사용처:
    stdio 모드 (기본): Claude Code MCP / Roo Code MCP
@@ -41,6 +41,15 @@ mcp = FastMCP(
    stateless_http=True,
 )

+# Pipeline Imports
+from pipeline.extractor import PidGeometricExtractor
+from pipeline.topology import PidTopologyBuilder
+from pipeline.mapper import IntelligentMapper
+from pipeline.analyzer import PidAnalysisEngine
+import networkx as nx
+import os
+import asyncio
+
 # ── 임베딩 (Ollama) ───────────────────────────────────────────────────────────

 def _embed(text: str) -> list[float]:
@@ -53,13 +62,128 @@ def _embed(text: str) -> list[float]:
        resp.raise_for_status()
        return resp.json()["embedding"]

-# ── LLM (vLLM / Qwen3-Coder-Next-FP8) ───────────────────────────────────────
+# ── LLM (vLLM / Qwen/Qwen3-Coder-Next-FP8) ─────────────────────────────────────

@lru_cache(maxsize=1)
 def _llm():
    from openai import OpenAI
    return OpenAI(base_url=VLLM_BASE_URL, api_key="dummy")

+
+# ── PaddleOCR 싱글톤 (PDF fallback용) ──────────────────────────────────────────
+
+@lru_cache(maxsize=1)
+def _ocr():
+    """PaddleOCR 인스턴스 (한/영, GPU). 첫 호출 시 ~50MB 모델 다운로드."""
+    from paddleocr import PaddleOCR
+    import os
+
+    use_gpu = os.environ.get("PADDLE_USE_GPU", "true").lower() == "true"
+    try:
+        ocr = PaddleOCR(
+            use_angle_cls=True,
+            lang="korean",
+            use_gpu=use_gpu,
+            show_log=False,
+        )
+        return ocr
+    except Exception as e:
+        # GPU 실패 시 CPU 폴백
+        if use_gpu:
+            os.environ["PADDLE_USE_GPU"] = "false"
+            return _ocr()
+        raise e
+
+
+# ── DXF/PDF 텍스트 추출 헬퍼 ───────────────────────────────────────────────────
+
+def _extract_text_from_dxf(filepath: str) -> str:
+    """ezdxf로 DXF 파일에서 텍스트 추출 (MTEXT 포맷 코드 제거)."""
+    import ezdxf
+    from ezdxf.tools.text import plain_mtext
+    doc = ezdxf.readfile(filepath)
+    msp = doc.modelspace()
+    texts = []
+    for entity in msp:
+        if entity.dxftype() == "TEXT":
+            texts.append(entity.dxf.text)
+        elif entity.dxftype() == "MTEXT":
+            try:
+                plain = plain_mtext(entity.dxf.text)
+                if plain.strip():
+                    texts.append(plain)
+            except Exception:
+                pass
+    return "\n".join(texts)
+
+
+def _extract_text_from_pdf(filepath: str) -> str:
+    """PyMuPDF로 PDF 파일에서 텍스트 추출."""
+    import fitz  # pymupdf
+    doc = fitz.open(filepath)
+    texts = []
+    for page in doc:
+        texts.append(page.get_text())
+    return "\n".join(texts)
+
+
+def _extract_text_from_pdf_ocr(filepath: str) -> str:
+    """PaddleOCR로 PDF에서 이미지 추출 후 OCR (고정밀도)."""
+    import fitz  # pymupdf
+    from PIL import Image
+    import numpy as np
+
+    doc = fitz.open(filepath)
+    all_texts = []
+
+    for page_idx, page in enumerate(doc):
+        # 페이지를 이미지로 변환
+        mat = fitz.Matrix(300 / 72)  # 300 DPI
+        pix = page.get_pixmap(matrix=mat)
+        img_data = pix.tobytes("png")
+        img = Image.open(__import__("io").BytesIO(img_data))
+
+        # OCR 실행
+        result = _ocr().ocr(np.array(img), cls=True)
+        if result[0]:
+            for line in result[0]:
+                all_texts.append(line[1][0])
+
+    return "\n".join(all_texts)
+
+
+def _convert_dwg_to_dxf_dxflib(filepath: str) -> str:
+    """libreoffice로 DWG를 DXF로 변환."""
+    import subprocess
+    import os
+    
+    dxf_path = filepath.replace(".dwg", ".dxf")
+    
+    try:
+        # LibreOffice로 변환
+        result = subprocess.run(
+            [
+                "libreoffice",
+                "--headless",
+                "--convert-to", "dxf:AutoCAD DXF",
+                "--outdir", os.path.dirname(filepath) or ".",
+                filepath
+            ],
+            check=True,
+            timeout=120,
+            capture_output=True,
+            text=True
+        )
+        
+        if os.path.exists(dxf_path):
+            return dxf_path
+        else:
+            raise FileNotFoundError("DXF 변환 파일이 생성되지 않았습니다.")
+            
+    except subprocess.CalledProcessError as e:
+        raise Exception(f"LibreOffice 변환 실패: {e.stderr}")
+
+
 # ── Qdrant 검색 헬퍼 ──────────────────────────────────────────────────────────

 def _search(collection: str, query: str, top_k: int, threshold: float = 0.25) -> str:
@@ -187,7 +311,7 @@ def search_r530_docs(query: str, top_k: int = 5) -> str:

@mcp.tool()
 def ask_iiot_llm(question: str, context: str = "") -> str:
-    """Qwen3-Coder-Next에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).
+    """Qwen/Qwen3-Coder-Next-FP8에게 IIoT/OPC UA 질문 (컨텍스트 없이 LLM 직접 질문).

    사용 시점: search_codebase 또는 search_r530_docs 결과를 context로 넘겨
    종합 분석·답변이 필요할 때. 또는 일반 IIoT/OPC UA 개념 질문.
@@ -216,7 +340,7 @@ def ask_iiot_llm(question: str, context: str = "") -> str:

@mcp.tool()
 def rag_query(question: str, search_code: bool = False, search_docs: bool = True) -> str:
-    """검색 → Qwen3-Coder-Next 답변 생성 (통합 RAG).
+    """검색 → Qwen/Qwen3-Coder-Next-FP8 답변 생성 (통합 RAG).

    기본값: Experion HS R530 공식 문서만 검색 (search_docs=True, search_code=False).
    ExperionCrawler 코드도 함께 보려면 search_code=True 추가.
@@ -442,6 +566,525 @@ def query_with_nl(question: str) -> str:
    return json.dumps(result, ensure_ascii=False, default=str)


+# ── P&ID 추출 도구 ──────────────────────────────────────────────────────────────
+
+@mcp.tool()
+def extract_pid_tags(text: str, source_type: str) -> str:
+    """P&ID 도면(DXF/PDF)에서 태그 정보를 추출합니다.
+
+    Args:
+        text:        DXF/PDF에서 추출한 텍스트
+        source_type: 'dxf' 또는 'pdf'
+
+    Returns:
+        JSON: { success, count, tags: [{tagNo, equipmentName, instrumentType, lineNumber, pidDrawingNo, confidence}] }
+    """
+    system = (
+        "You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
+        "Extract all instrument and equipment tags from the provided text.\n"
+        "Return ONLY a valid JSON array. Each element must have exactly these fields:\n"
+        '{"tagNo":"FCV-101","equipmentName":null,"instrumentType":"FCV","lineNumber":null,"pidDrawingNo":null,"confidence":0.95}\n'
+        "Rules:\n"
+        "- tagNo: any token matching [LETTERS]-[DIGITS] or [LETTERS]-[DIGITS]-[SUFFIX]\n"
+        "  Examples: FCV-101, P-10101, T-10100, VG-6203-15A-F1A-n, BT-6200, DP-10101\n"
+        "- instrumentType: leading letters of tagNo (e.g. FCV, P, T, VG, BT, DP, PSV)\n"
+        "- equipmentName: descriptive name if present in text near the tag, else null\n"
+        "- lineNumber: null unless a line number is explicitly associated\n"
+        "- pidDrawingNo: null unless a drawing number is explicitly associated\n"
+        "- confidence: 0.95 for clear tags, lower for ambiguous ones\n"
+        "- Output ONLY the JSON array, no markdown, no explanation.\n"
+        "- If no tags found, return: []\n"
+    )
+    
+    import logging
+    import re
+    import json as json_module
+
+    try:
+        truncated_text = text[:100000] if len(text) > 100000 else text
+
+        resp = _llm().chat.completions.create(
+            model=VLLM_MODEL,
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": f"Source: {source_type}\n\nText:\n{truncated_text}"},
+            ],
+            max_tokens=32768,
+            temperature=0.1,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        )
+
+        raw = (resp.choices[0].message.content or "").strip()
+        finish_reason = resp.choices[0].finish_reason
+
+        # 마크다운 코드 블록 제거
+        if raw.startswith("```"):
+            lines = raw.splitlines()
+            raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
+
+        # finish_reason=length 로 잘린 경우: 마지막 완전한 객체까지 살린 뒤 배열 닫기
+        if finish_reason == "length":
+            last_close = raw.rfind("}")
+            if last_close != -1:
+                raw = raw[:last_close + 1] + "]"
+
+        # 유효한 JSON 배열 추출 (가장 긴 균형 잡힌 [...] 선택)
+        def _extract_array(s: str) -> str:
+            depth = 0; start = -1; best = ""
+            for i, c in enumerate(s):
+                if c == '[':
+                    if depth == 0: start = i
+                    depth += 1
+                elif c == ']':
+                    depth -= 1
+                    if depth == 0 and start >= 0:
+                        cand = s[start:i+1]
+                        if len(cand) > len(best): best = cand
+            return best if best else "[]"
+
+        raw = _extract_array(raw)
+
+        # JSON 파싱 — 실패 시 개별 객체 추출로 폴백
+        try:
+            data = json_module.loads(raw)
+        except json_module.JSONDecodeError:
+            objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
+            data = []
+            for obj in objects:
+                try:
+                    data.append(json_module.loads(obj))
+                except json_module.JSONDecodeError:
+                    pass
+            if not data:
+                return json_module.dumps({"success": False, "count": 0, "tags": []}, ensure_ascii=False)
+
+        logging.info(f"[extract_pid_tags] source={source_type} count={len(data) if isinstance(data, list) else 0}")
+        
+        return json_module.dumps({
+            "success": True,
+            "count": len(data),
+            "tags": data
+        }, ensure_ascii=False, indent=2)
+        
+    except Exception as e:
+        logging.error(f"P&ID 태그 추출 실패: {e}")
+        logging.error(f"Raw response: {raw[:1000]}")
+        return json.dumps({"success": False, "error": f"P&ID 태그 추출 실패: {e}"}, ensure_ascii=False)
+
+
+@mcp.tool()
+def match_pid_tags(pid_tags: list[str], experion_tags: list[str]) -> str:
+    """P&ID 태그를 Experion 태그에 매핑합니다.
+
+    Args:
+        pid_tags:       P&ID에서 추출한 태그 목록 (예: ["FT-101", "PT-201"])
+        experion_tags:  Experion 시스템 태그 목록 (예: ["ficq-6113.pv", "pt-201.pv"])
+
+    Returns:
+        JSON: { success, count, mappings: [{pidTag, experionTag, confidence}] }
+    """
+    system = (
+        "You are a P&ID to Experion tag matching expert.\n"
+        "Match P&ID tags to Experion tags based on similarity.\n"
+        "Return ONLY a JSON array of objects with the following structure:\n"
+        '[{"pidTag":"FT-101","experionTag":"ft-101.pv","confidence":0.92},...]\n'
+        "IMPORTANT rules:\n"
+        "- pidTag: The original P&ID tag from input\n"
+        "- experionTag: The matched Experion tag (lowercase, with .pv/.sp/.mv suffix)\n"
+        "- confidence: 0.0 to 1.0 based on match quality\n"
+        "- If no good match found, set confidence < 0.5 and leave experionTag null\n"
+        "- Do NOT include any explanation, only the JSON array.\n"
+        "- If no matches found, return an empty array: []\n"
+        "- temperature=0.1 for deterministic output.\n"
+    )
+    
+    import re
+    import json as json_module
+
+    try:
+        pid_str = "\n".join(pid_tags)
+        experion_str = "\n".join(experion_tags)
+
+        resp = _llm().chat.completions.create(
+            model=VLLM_MODEL,
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": f"P&ID Tags:\n{pid_str}\n\nExperion Tags:\n{experion_str}"},
+            ],
+            max_tokens=16384,
+            temperature=0.1,
+            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        )
+
+        raw = (resp.choices[0].message.content or "").strip()
+        finish_reason = resp.choices[0].finish_reason
+
+        if raw.startswith("```"):
+            lines = raw.splitlines()
+            raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
+
+        if finish_reason == "length":
+            last_close = raw.rfind("}")
+            if last_close != -1:
+                raw = raw[:last_close + 1] + "]"
+
+        match = re.search(r'\[.*\]', raw, re.DOTALL)
+        raw = match.group(0) if match else "[]"
+
+        data = json_module.loads(raw)
+        return json_module.dumps({"success": True, "count": len(data), "mappings": data},
+                                  ensure_ascii=False, indent=2)
+
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"P&ID 태그 매핑 실패: {e}"}, ensure_ascii=False)
+
+
+# ── P&ID 파싱 도구 (DXF/PDF/DWG) ───────────────────────────────────────────────
+
+
+@mcp.tool()
+def parse_pid_dxf(filepath: str) -> str:
+    """ezdxf 기반 DXF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출.
+
+    Args:
+        filepath: DXF 파일 경로
+
+    Returns:
+        JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] }
+    """
+    try:
+        text = _extract_text_from_dxf(filepath)
+        if not text.strip():
+            return json.dumps({
+                "success": True,
+                "text": "",
+                "count": 0,
+                "tags": []
+            }, ensure_ascii=False, indent=2)
+        
+        # LLM으로 태그 추출
+        system = (
+            "You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
+            "Extract instrument and equipment tags from the provided text.\n"
+            "Return ONLY a JSON array of objects with the following structure:\n"
+            '[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n'
+            "IMPORTANT rules:\n"
+            "- tagNo: Standard tag format with these patterns:\n"
+            "  * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n"
+            "  * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n"
+            "  * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n"
+            "  * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n"
+            "- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n"
+            "- equipmentName: Descriptive name if available, otherwise null\n"
+            "- lineNumber: Line number if available, otherwise null\n"
+            "- pidDrawingNo: Drawing number if available, otherwise null\n"
+            "- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n"
+            "- Do NOT include any explanation, only the JSON array.\n"
+            "- If no tags found, return an empty array: []\n"
+            "- temperature=0.1 for deterministic output.\n"
+        )
+        
+        truncated_text = text[:12000] if len(text) > 12000 else text
+        
+        resp = _llm().chat.completions.create(
+            model=VLLM_MODEL,
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": f"Source: dxf\n\nText:\n{truncated_text}"},
+            ],
+            max_tokens=4096,
+            temperature=0.1,
+        )
+        
+        raw = (resp.choices[0].message.content or "").strip()
+        
+        # 마크다운 코드 블록 제거
+        if raw.startswith("```"):
+            lines = raw.splitlines()
+            raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
+        
+        # JSON 배열 추출
+        import re
+        match = re.search(r'\[.*\]', raw, re.DOTALL)
+        if match:
+            raw = match.group(0)
+        
+        # JSON 파싱 시도
+        try:
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            # JSON 배열 추출 시도 (더 엄격한 패턴)
+            match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL)
+            if match:
+                raw_clean = match.group(0)
+                try:
+                    data = json.loads(raw_clean)
+                except json.JSONDecodeError:
+                    # 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도
+                    objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
+                    data = []
+                    for obj in objects:
+                        try:
+                            data.append(json.loads(obj))
+                        except json.JSONDecodeError:
+                            pass
+        
+        if not isinstance(data, list):
+            data = []
+        
+        return json.dumps({
+            "success": True,
+            "text": text[:10000],  # 제한
+            "count": len(text),
+            "tags": data
+        }, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"DXF 파싱 실패: {e}"}, ensure_ascii=False)
+
+
+@mcp.tool()
+def parse_pid_pdf(filepath: str, use_ocr: bool = True) -> str:
+    """PyMuPDF 기반 PDF 파일 파싱. 텍스트 추출 후 LLM으로 태그 자동 추출.
+
+    Args:
+        filepath: PDF 파일 경로
+        use_ocr:  OCR 사용 여부 (기본 True, 고정밀도)
+
+    Returns:
+        JSON: { success, text, count, tags: [{tagNo, equipmentName, ...}] }
+    """
+    try:
+        if use_ocr:
+            text = _extract_text_from_pdf_ocr(filepath)
+        else:
+            text = _extract_text_from_pdf(filepath)
+        
+        if not text.strip():
+            return json.dumps({
+                "success": True,
+                "text": "",
+                "count": 0,
+                "tags": []
+            }, ensure_ascii=False, indent=2)
+        
+        # LLM으로 태그 추출
+        system = (
+            "You are a P&ID (Piping and Instrumentation Diagram) expert.\n"
+            "Extract instrument and equipment tags from the provided text.\n"
+            "Return ONLY a JSON array of objects with the following structure:\n"
+            '[{"tagNo":"FIT-10115","equipmentName":"Flow Transmitter","instrumentType":"FT" OR "FIT OR "TIA","lineNumber":"L-101","pidDrawingNo":"P&ID-001","confidence":0.95},...]\n'
+            "IMPORTANT rules:\n"
+            "- tagNo: Standard tag format with these patterns:\n"
+            "  * Instrument: [Function][Loop]-[Number] (e.g., FT-101, PT-201, LI-301, FICQ-6113)\n"
+            "  * Equipment: [Type]-[Number] (e.g., P-10101, T-10100, C-9111, E-10119)\n"
+            "  * Complex: [Type]-[Number]-[Size]-[Class]-[Material]-[Option] (e.g., VG-6203-15A-F1A-n, CD-10513-40A-S1A-H50)\n"
+            "  * Real examples from DXF: BT-6200, SARF-#6-PID-002, P-6101, DP-10101, CHS-6630-100A-F-C50\n"
+            "- instrumentType: First 2-4 letters of tagNo (FIT, PT, LI, FICQ, TCV, FCV, PCV, PG, TG, etc.)\n"
+            "- equipmentName: Descriptive name if available, otherwise null\n"
+            "- lineNumber: Line number if available, otherwise null\n"
+            "- pidDrawingNo: Drawing number if available, otherwise null\n"
+            "- confidence: 0.0 to 1.0 based on how clearly the tag was identified\n"
+            "- Do NOT include any explanation, only the JSON array.\n"
+            "- If no tags found, return an empty array: []\n"
+            "- temperature=0.1 for deterministic output.\n"
+        )
+        
+        truncated_text = text[:12000] if len(text) > 12000 else text
+        
+        resp = _llm().chat.completions.create(
+            model=VLLM_MODEL,
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": f"Source: pdf\n\nText:\n{truncated_text}"},
+            ],
+            max_tokens=4096,
+            temperature=0.1,
+        )
+        
+        raw = (resp.choices[0].message.content or "").strip()
+        
+        # 마크다운 코드 블록 제거
+        if raw.startswith("```"):
+            lines = raw.splitlines()
+            raw = "\n".join(lines[1:-1] if lines and lines[-1].strip() == "```" else lines[1:]).strip()
+        
+        # JSON 배열 추출
+        import re
+        match = re.search(r'\[.*\]', raw, re.DOTALL)
+        if match:
+            raw = match.group(0)
+        
+        # JSON 파싱 시도
+        try:
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            # JSON 배열 추출 시도 (더 엄격한 패턴)
+            match = re.search(r'\[\s*\{.*?\}\s*\]', raw, re.DOTALL)
+            if match:
+                raw_clean = match.group(0)
+                try:
+                    data = json.loads(raw_clean)
+                except json.JSONDecodeError:
+                    # 마지막으로, JSON 배열을 개별 객체로 분리하여 파싱 시도
+                    objects = re.findall(r'\{[^{}]*\}', raw, re.DOTALL)
+                    data = []
+                    for obj in objects:
+                        try:
+                            data.append(json.loads(obj))
+                        except json.JSONDecodeError:
+                            pass
+        
+        if not isinstance(data, list):
+            data = []
+        
+        return json.dumps({
+            "success": True,
+            "text": text[:10000],
+            "count": len(text),
+            "tags": data
+        }, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"PDF 파싱 실패: {e}"}, ensure_ascii=False)
+
+
+@mcp.tool()
+async def build_pid_graph_parallel(filepath: str) -> str:
+    """
+    분산 처리 기법을 적용한 P&ID 그래프 생성 툴.
+    전처리 -> 병렬 분산 추출 -> 위상 모델링 -> 저장 과정을 수행합니다.
+    """
+    try:
+        # 1. 전처리 (Phase 1: Geometric Extraction)
+        extractor = PidGeometricExtractor(filepath)
+        geo_data_path = f"mcp-server/storage/{os.path.basename(filepath)}_geo.json"
+        geo_data_list = extractor.extract_and_save(geo_data_path)
+        
+        # geo_data_list는 경로를 반환하므로 다시 로드
+        with open(geo_data_path, 'r', encoding='utf-8') as f:
+            geo_data = json.load(f)
+
+        # 2. 병렬 분산 추출 (Phase 3: Intelligent Mapping)
+        # 시스템 태그 목록 가져오기 (DB에서 조회하는 로직 필요, 여기서는 예시로 빈 리스트 또는 기본값)
+        # 실제로는 get_tag_metadata 등을 통해 전체 태그 리스트를 확보해야 함
+        system_tags = []
+        try:
+            conn = _get_db_connection()
+            with conn.cursor() as cur:
+                cur.execute("SELECT tagname FROM realtime_table")
+                system_tags = [r[0] for r in cur.fetchall()]
+        except Exception as e:
+            logging.warning(f"Failed to fetch system tags: {e}")
+
+        # 그래프 임시 생성 (Mapper가 위상 정보를 사용하므로 필요)
+        builder = PidTopologyBuilder(geo_data)
+        builder.build_graph()
+        
+        # Mapper 설정
+        from openai import AsyncOpenAI
+        api_client = AsyncOpenAI(base_url=VLLM_BASE_URL, api_key="dummy")
+        mapper = IntelligentMapper(builder.G, system_tags, api_client=api_client)
+        
+        # 분류별 노드 분리
+        nodes = list(builder.G.nodes())
+        transmitter_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FIT', 'FT', 'LT', 'PT', 'TE']] # 단순화된 필터
+        valve_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('value', '').upper() in ['FCV', 'LCV', 'TCV', 'PCV', 'XV']]
+        equipment_nodes = [n for n, d in builder.G.nodes(data=True) if d.get('type') not in ['TEXT', 'LINE', 'LWPOLYLINE']]
+
+        # 병렬 호출 (vLLM Batching 유도)
+        tasks = [
+            mapper.extract_transmitters(transmitter_nodes),
+            mapper.extract_valves(valve_nodes),
+            mapper.extract_equipment(equipment_nodes)
+        ]
+        extracted_results = await asyncio.gather(*tasks)
+        
+        # 결과 통합
+        all_mapped_tags = []
+        for res_dict in extracted_results:
+            for node_id, mapping in res_dict.items():
+                if mapping.resolved_tag != "UNKNOWN":
+                    # TopologyBuilder가 기대하는 형식으로 변환
+                    node_data = builder.G.nodes[node_id]
+                    all_mapped_tags.append({
+                        "entity_id": node_id,
+                        "tagName": mapping.resolved_tag,
+                        "bbox": node_data['bbox'].bounds if hasattr(node_data['bbox'], 'bounds') else node_data['bbox'],
+                        "clean_value": mapping.resolved_tag
+                    })
+
+        # 3. 최종 위상 모델링 (Phase 2)
+        final_builder = PidTopologyBuilder(geo_data, all_extracted_tags=all_mapped_tags)
+        final_builder.build_graph()
+        
+        # 4. 저장
+        graph_id = os.path.basename(filepath).replace(".dxf", "_graph.json")
+        graph_path = f"mcp-server/storage/{graph_id}"
+        final_builder.save_graph(graph_path)
+        
+        return json.dumps({
+            "success": True,
+            "graph_id": graph_id,
+            "graph_path": graph_path,
+            "nodes": final_builder.G.number_of_nodes(),
+            "edges": final_builder.G.number_of_edges()
+        }, ensure_ascii=False)
+
+    except Exception as e:
+        logging.error(f"build_pid_graph_parallel failed: {e}")
+        return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
+
+@mcp.tool()
+def analyze_pid_impact(graph_id: str, start_node_id: str) -> str:
+    """
+    구축된 그래프를 기반으로 특정 설비 장애 시 영향도 분석을 수행합니다.
+    """
+    try:
+        graph_path = f"mcp-server/storage/{graph_id}"
+        mapping_path = graph_path.replace("_graph.json", "_mapping.json") # 매핑 파일이 따로 저장된다고 가정
+        
+        analyzer = PidAnalysisEngine(graph_path, mapping_path)
+        result = analyzer.analyze_impact(start_node_id)
+        
+        return json.dumps(result, ensure_ascii=False, indent=2)
+    except Exception as e:
+        return json.dumps({"success": False, "error": f"Impact analysis failed: {e}"}, ensure_ascii=False)
+
+@mcp.tool()
+def parse_pid_drawing(filepath: str) -> str:
+    """확장자 자동 감지하여 P&ID 도면 파싱.
+
+    Args:
+        filepath: DXF/DWG/PDF 파일 경로
+
+    Returns:
+        JSON: { success, text, count, tags, format }
+    """
+    import os
+    ext = os.path.splitext(filepath)[1].lower()
+
+    if ext == ".dxf":
+        return parse_pid_dxf(filepath)
+    elif ext == ".dwg":
+        # DWG 파일은 사전에 DXF로 변환하여 업로드해야 합니다.
+        # Linux에서 DWG를 DXF로 변환하는 도구는 제한되어 있습니다.
+        return json.dumps({
+            "success": False,
+            "error": "DWG 파일은 현재 직접 파싱할 수 없습니다.\n" +
+                    "사전에 DXF로 변환하여 업로드해 주세요.\n" +
+                    "\n변환 방법:\n" +
+                    "1. Windows에서 AutoCAD 또는 ODA File Converter 사용\n" +
+                    "2. 온라인 DWG → DXF 변환기 사용\n" +
+                    "3. LibreOffice Draw (Windows/macOS 전용) 사용"
+        }, ensure_ascii=False)
+    elif ext == ".pdf":
+        return parse_pid_pdf(filepath)
+    else:
+        return json.dumps({
+            "success": False,
+            "error": f"Unsupported format: {ext}. Supported: .dxf, .dwg, .pdf"
+        }, ensure_ascii=False)
+
+
 # ── 엔트리포인트 ──────────────────────────────────────────────────────────────

 def main():
--- a/mcp-server/uv.lock
+++ b/mcp-server/uv.lock
--- a/pid_parser_coding_byGemma4.md
+++ b/pid_parser_coding_byGemma4.md
@@ -0,0 +1,91 @@
+# P&ID Parser Detailed Implementation Plan (by Gemma 4)
+
+## 1. 개요 (Overview)
+본 계획은 대용량 DXF 파일 처리 시 발생하는 LLM 프롬프트 복잡도 및 하드웨어 부하 문제를 해결하기 위해, 기존의 단일 프로세스 추출 방식을 **분산 처리 방식(Distributed Processing)**으로 전환하는 상세 설계도입니다.
+
+## 2. 데이터 인터페이스 설계 (Data Interface Specification)
+
+### 2.1 중간 데이터 포맷 (Intermediate JSON Schema)
+메인 프로그램이 `ezdxf`로 전처리하여 서브 프로그램에 전달할 데이터 구조입니다. 서브 프로그램은 이 JSON을 읽어 패턴 매칭을 수행합니다.
+
+```json
+{
+  "metadata": {
+    "filename": "string",
+    imtimestamp": "ISO8601"
+  },
+  "entities": [
+    {
+      "type": "TEXT | MTEXT | LINE | CIRCLE | LWPOLYLINE",
+      "layer": "string",
+      "content": "string (텍스트 내용)",
+      "coordinates": { "x": 0.0, "y": 0.0, "z": 0.0 },
+      "attributes": { "color": "int", "lineweight": "float" }
+    }
+  ]
+}
+```
+
+## 3. 컴포넌트별 상세 설계 (Component Detailed Design)
+
+### 3.1 [Python] Main Processor (Orchestrator)
+**역할**: DXF 전처리, 서브 프로세스 생명주기 관리, 결과 통합 및 DB 저장 요청.
+
+- **Class: `DXFPreprocessor`**
+    - `load_and_parse(file_path)`: `ezdxf`를 사용하여 DXF를 로드하고 엔티티를 추출.
+    - `generate_intermediate_json(output_path)`: 추출된 엔티티를 위 JSON 스키마로 변환하여 저장.
+- **Class: `ExtractionOrchestrator`**
+    - `run_parallel_extractors(input_json_path)`: `subprocess.Popen`을 사용하여 5개의 서브 프로그램을 병렬로 실행.
+    - `monitor_processes(process_list)`: 모든 프로세스가 종료될 때까지 대기하며 에러 발생 시 로그 기록.
+    - `aggregate_results(result_files)`: 각 서브 프로그램이 생성한 JSON 파일들을 읽어 하나의 `MasterExtractionResult`로 병합.
+- **Class: `DatabaseIntegrator`**
+    - `send_to_backend(merged_data)`: 병합된 데이터를 .NET API(C#)로 전송.
+
+### 3.2 [Python] Specialized Extractors (Sub-programs)
+**역할**: 전달받은 JSON에서 특정 정규표현식(Regex) 패턴을 찾아 추출.
+
+- **Common Logic (Base Class: `BaseExtractor`)**
+    - `load_input_json()`: 전처리된 JSON 로드.
+    - `apply_regex_pattern(pattern)`: 엔티티의 `content` 필드에 대해 Regex 매칭 수행.
+    - `save_output_json()`: 추출된 결과를 `result_{type}.json`으로 저장.
+
+- **Specific Patterns (Regex Implementation)**
+    1.  **`dxf_extract_transmitter.py`**: `r"(FIT|FT|LT|PT|TE)\s?-\s?\d+"`
+    2.  **`dxf_extract_valve.py`**: `r"(FCV|LCV|TCV|PCV|XV)\s?-\s?\d+"`
+    3.  **`dxf_extract_gague.py`**: `r"(PG|TG|LG)\s?-\s?\d+"`
+    4.  **`dxf_extract_equipment.py`**: `r"(C|T|F|D|E|B|CT|CH|K)-?\d+"` (상세 규칙은 설계서 참조)
+    5.  **`dxf_extract_pump.py`**: `r"(P|VP)-\d+"`
+
+### 3.3 [C#] Backend Service (Data Persistence)
+**역할**: Python으로부터 전달받은 데이터를 검증하고 `ExperionDbContext`를 통해 DB에 영구 저장.
+
+- **Controller: `PidExtractionController`**
+    - `[HttpPost] PostExtractionResult(ExtractionDto dto)`: Python의 요청을 수신.
+- **Service: `PidProcessingService`**
+    - `ProcessAndSave(ExtractionDto dto)`: 데이터 유효성 검사 후 `PidEquipment` 엔티티로 변환.
+- **Repository: `PidRepository`**
+    - `SaveAsync(PidEquipment entity)`: SQL Server에 저장.
+
+## 4. 상세 구현 로드맵 (Implementation Roadmap)
+
+### Phase 1: 데이터 규격 및 환경 구축
+- [ ] `IntermediateDataFormat.json` 스키마 확정.
+- [ ] Python `ezdxf` 기반의 `DXFPreprocessor` 프로토타입 개발.
+
+### Phase 2: 서브 프로그램(Extractor) 개발
+- [ ] `BaseExtractor` 클래스 구현 (JSON 로드/저장/Regex 공통 로직).
+- [ ] 5종의 특화된 Regex 패턴 적용 및 개별 스크립트 완성.
+- [ ] **검증**: `test_dxf_extract_pid1.py`를 활용한 추출 정확도 테스트.
+
+### Phase 3: 메인 오케스트레이터 개발
+- [ ] `subprocess`를 이용한 병렬 실행 및 프로세스 모int 모니터링 로직 구현.
+- [ ] 결과 파일 병합(Aggregation) 및 중복 제거 로직 구현.
+
+### Phase 4: 백엔드 연동 및 통합 테스트
+- [ ] C# API 엔드포인트 구현 및 Python `DatabaseIntegrator` 연동.
+- [ ] **E2E 테스트**: DXF 파일 투입 $\rightarrow$ 분산 추출 $\rightarrow$ DB 저장 전체 파이프라인 검증.
+
+## 5. 예외 처리 및 안정성 전략 (Error Handling)
+- **DXF 오류**: 파일 손상 시 `DXFPreprocessor`에서 에러 로그 남기고 프로세스 중단.
+- **서브 프로세스 실패**: 특정 서브 프로그램 실패 시, 해당 결과는 제외하되 다른 프로세스 결과는 유지하며 `Partial Success` 상태로 보고.
+- **DB 연결 오류**: 재시도(Retry) 로직 적용 및 실패 시 로컬 파일로 결과 백업.
--- a/src/Core/Application/DTOs/PidEquipmentDto.cs
+++ b/src/Core/Application/DTOs/PidEquipmentDto.cs
@@ -0,0 +1,15 @@
+namespace ExperionCrawler.Core.Application.DTOs;
+
+public record PidEquipmentDto(
+    long Id,
+    string TagNo,
+    string? EquipmentName,
+    string? InstrumentType,
+    string? LineNumber,
+    string? PidDrawingNo,
+    double Confidence,
+    bool IsActive,
+    DateTime ExtractedAt,
+    DateTime? UpdatedAt,
+    int? ExperionTagId,
+    string? ExperionTagName);
--- a/src/Core/Application/DTOs/PidExtractionResult.cs
+++ b/src/Core/Application/DTOs/PidExtractionResult.cs
@@ -0,0 +1,6 @@
+namespace ExperionCrawler.Core.Application.DTOs;
+
+public record PidExtractionResult(
+    int TotalCount,
+    int ConfidenceItems,
+    int LowConfidenceItems);
--- a/src/Core/Application/DTOs/PidGraphDtos.cs
+++ b/src/Core/Application/DTOs/PidGraphDtos.cs
@@ -0,0 +1,16 @@
+namespace ExperionCrawler.Core.Application.DTOs;
+
+using System.Collections.Generic;
+
+public record ImpactResult(
+    string StartNode,
+    Dictionary<string, int> ImpactedNodes,
+    List<List<string>> Paths
+);
+
+public record AnalysisStatus(
+    string TaskId,
+    double Progress,
+    string Status,
+    string Message
+);
--- a/src/Core/Application/DTOs/TagMappingDtos.cs
+++ b/src/Core/Application/DTOs/TagMappingDtos.cs
@@ -0,0 +1,20 @@
+namespace ExperionCrawler.Core.Application.DTOs;
+
+public record TagMappingResult
+{
+    public long PidEquipmentId { get; set; }
+    public string TagNo { get; set; } = string.Empty;
+    public string? EquipmentName { get; set; }
+    public string? InstrumentType { get; set; }
+    public string? LineNumber { get; set; }
+    public string? PidDrawingNo { get; set; }
+    public double Confidence { get; set; }
+    public bool IsActive { get; set; }
+    public int? ExperionTagId { get; set; }
+    public string? ExperionTagName { get; set; }
+    public string? ExperionNodeId { get; set; }
+}
+
+public record CreateMappingRequest(long PidEquipmentId, int ExperionTagId);
+
+public record UpdateMappingRequest(int? ExperionTagId, bool? IsActive);
--- a/src/Core/Application/Interfaces/IExperionServices.cs
+++ b/src/Core/Application/Interfaces/IExperionServices.cs
@@ -256,3 +256,49 @@ public interface IExperionFastService
    Task<FastQueryResult>           GetRecordsAsync(int sessionId, DateTime? from, DateTime? to, string format = "long");
    Task                            ExportCsvAsync(int sessionId, Stream stream, DateTime? from = null, DateTime? to = null);
 }
+
+// ── P&ID Extractor ─────────────────────────────────────────────────────────────
+
+public interface IPidExtractorService
+{
+    // 추출
+    Task<PidExtractionResult> ExtractFromFileAsync(string filePath, bool useImageMode = false);
+    Task<PidExtractionResult> ExtractFromStreamAsync(Stream stream, string fileName, bool useImageMode = false);
+    
+    // 조회 (페이지네이션)
+    Task<(int Total, IEnumerable<PidEquipment> Items)> GetEquipmentAsync(
+        string? tagNo, int page, int pageSize);
+    
+    Task<PidEquipment?> GetByIdAsync(long id);
+    
+    // 업데이트
+    Task UpdateConfidenceAsync(long id, double confidence);
+    Task ActivateAsync(long id);
+    Task DeactivateAsync(long id);
+    
+    // 통계
+    Task<int> GetTotalCountAsync();
+    Task<int> GetConfidenceItemsCountAsync();
+    Task<int> GetLowConfidenceItemsCountAsync();
+    Task<IDictionary<string, int>> GetConfidenceDistributionAsync();
+    Task<int> GetDrawingCountAsync();
+    
+    // 내보내기
+    Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items);
+    Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items);
+}
+
+// ── P&ID Tag Mapping ───────────────────────────────────────────────────────────
+
+public interface ITagMappingService
+{
+    Task<(int Total, IEnumerable<TagMappingResult> Items)> GetMappingsAsync(int page, int pageSize);
+    Task<TagMappingResult?> GetMappingByIdAsync(long id);
+    Task<TagMappingResult> CreateMappingAsync(CreateMappingRequest request);
+    Task UpdateMappingAsync(long id, UpdateMappingRequest request);
+    Task ClearMappingAsync(long id);
+    
+    Task<int> GetUnmappedCountAsync();
+    Task<int> GetMappedCountAsync();
+    Task<IEnumerable<string>> GetAvailableTagsAsync();
+}
--- a/src/Core/Application/Services/PidExtractorService.cs
+++ b/src/Core/Application/Services/PidExtractorService.cs
@@ -0,0 +1,351 @@
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using ExperionCrawler.Core.Application.DTOs;
+using ExperionCrawler.Core.Application.Interfaces;
+using ExperionCrawler.Core.Domain.Entities;
+using ExperionCrawler.Infrastructure.Database;
+using ExperionCrawler.Infrastructure.Mcp;
+using Microsoft.EntityFrameworkCore;
+using netDxf;
+using UglyToad.PdfPig;
+
+namespace ExperionCrawler.Core.Application.Services;
+
+public class PidExtractorService : IPidExtractorService
+{
+    private readonly McpClient _mcp;
+    private readonly ExperionDbContext _dbContext;
+    private readonly ILogger<PidExtractorService> _logger;
+
+    public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger<PidExtractorService> logger)
+    {
+        _mcp = mcp;
+        _dbContext = dbContext;
+        _logger = logger;
+    }
+
+    public async Task<PidExtractionResult> ExtractFromFileAsync(string filePath, bool useImageMode = false)
+    {
+        await using var stream = File.OpenRead(filePath);
+        return await ExtractFromStreamAsync(stream, Path.GetFileName(filePath), useImageMode);
+    }
+
+    public async Task<PidExtractionResult> ExtractFromStreamAsync(Stream stream, string fileName, bool useImageMode = false)
+    {
+        var ext = Path.GetExtension(fileName).ToLowerInvariant();
+
+        string text = ext switch
+        {
+            ".dxf" => ExtractDxfText(stream),
+            ".pdf" => ExtractPdfText(stream),
+            _      => throw new NotSupportedException($"지원 형식: .dxf .pdf (스캔본 이미지는 Vision 모드 필요)")
+        };
+
+        if (string.IsNullOrWhiteSpace(text))
+            return new PidExtractionResult(0, 0, 0);
+
+        // MCP → vLLM 태그 추출
+        var sourceType = ext.TrimStart('.');
+        var json = await _mcp.ExtractPidTagsAsync(text, sourceType);
+        var extractedItems = ParseJson(json);
+
+        if (extractedItems.Count == 0)
+        {
+            _logger.LogWarning("P&ID 추출 결과 0건 — 파일: {FileName}", fileName);
+            return new PidExtractionResult(0, 0, 0);
+        }
+
+        // MCP → vLLM 태그 매핑 제안
+        var pidTagNos = extractedItems.Select(i => i.TagNo).Distinct().ToList();
+        var experionTagNames = await _dbContext.RealtimePoints.Select(r => r.TagName).ToListAsync();
+        var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames);
+        var mappings = ParseMappingJson(mappingJson);
+
+        // DB 저장
+        var dbItems = new List<PidEquipment>();
+        foreach (var item in extractedItems)
+        {
+            mappings.TryGetValue(item.TagNo, out var matched);
+            var experionTag = matched != null
+                ? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched)
+                : await FindFallbackTagAsync(item.TagNo);
+
+            dbItems.Add(new PidEquipment
+            {
+                TagNo = item.TagNo,
+                EquipmentName = item.EquipmentName,
+                InstrumentType = item.InstrumentType,
+                LineNumber = item.LineNumber,
+                PidDrawingNo = item.PidDrawingNo,
+                Confidence = item.Confidence,
+                ExperionTagId = experionTag?.Id,
+                ExtractedAt = DateTime.UtcNow,
+                UpdatedAt = DateTime.UtcNow
+            });
+        }
+
+        await _dbContext.PidEquipment.AddRangeAsync(dbItems);
+        await _dbContext.SaveChangesAsync();
+
+        _logger.LogInformation("P&ID 추출 완료: {Total}건 저장 (파일: {FileName})", dbItems.Count, fileName);
+
+        return new PidExtractionResult(
+            TotalCount: dbItems.Count,
+            ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7),
+            LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5));
+    }
+
+    private string ExtractDxfText(Stream stream)
+    {
+        var tmp = Path.GetTempFileName() + ".dxf";
+        try
+        {
+            using (var fs = File.Create(tmp))
+                stream.CopyTo(fs);
+
+            var doc = DxfDocument.Load(tmp);
+            var sb = new StringBuilder();
+
+            foreach (var txt in doc.Entities.Texts)
+                sb.AppendLine(txt.Value);
+            foreach (var mtxt in doc.Entities.MTexts)
+                sb.AppendLine(mtxt.PlainText());
+            foreach (var blk in doc.Blocks)
+                foreach (var attr in blk.AttributeDefinitions.Values)
+                    sb.AppendLine(attr.Value);
+
+            var text = sb.ToString();
+            
+            // P&ID 태그 관련 정보만 필터링하여 MCP 서버로 전달
+            return FilterDxfText(text);
+        }
+        finally
+        {
+            if (File.Exists(tmp)) File.Delete(tmp);
+        }
+    }
+
+    /// <summary>
+    /// DXF 텍스트에서 P&ID 태그 패턴에 해당하는 라인만 필터링
+    /// 불필요한 텍스트를 제거하여 MCP 서버 부하 감소 및 JSON 파싱 오류 방지
+    /// </summary>
+    private string FilterDxfText(string text)
+    {
+        var lines = text.Split('\n');
+        var filteredLines = new List<string>();
+        
+        foreach (var line in lines)
+        {
+            var trimmed = line.Trim();
+            
+            // P&ID 태그 패턴 포함 라인만 유지
+            // - 단일 글자 장비 태그 포함: P-10101, T-10100, E-10119, C-10111
+            // - 다중 글자 계측 태그: FCV-101, FICQ-6113, PSV-6203
+            // - 복합 태그: VG-6203-15A-F1A-n, CD-10513-40A
+            if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*"))
+            {
+                filteredLines.Add(trimmed);
+            }
+        }
+        
+        return string.Join("\n", filteredLines);
+    }
+
+    private string ExtractPdfText(Stream stream)
+    {
+        using var pdf = PdfDocument.Open(stream);
+        var sb = new StringBuilder();
+        foreach (var page in pdf.GetPages())
+            sb.AppendLine(page.Text);
+        return sb.ToString();
+    }
+
+    private List<ExtractedItem> ParseJson(string json)
+    {
+        try
+        {
+            // MCP 서버 응답 형식: {"success": ..., "count": ..., "tags": [...]}
+            // 또는 기존 형식: [...]
+            using var doc = JsonDocument.Parse(json);
+            var root = doc.RootElement;
+            
+            // "tags" 필드가 있으면 중첩 구조로 간주
+            if (root.TryGetProperty("tags", out var tagsElement))
+            {
+                return JsonSerializer.Deserialize<List<ExtractedItem>>(tagsElement.GetRawText(),
+                    new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? [];
+            }
+            
+            // 루트가 배열이면 직접 파싱
+            if (root.ValueKind == JsonValueKind.Array)
+            {
+                return JsonSerializer.Deserialize<List<ExtractedItem>>(json,
+                    new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? [];
+            }
+            
+            _logger.LogWarning("P&ID JSON 파싱 실패: 'tags' 필드 또는 배열 형식 없음");
+            return [];
+        }
+        catch (Exception ex)
+        {
+            _logger.LogWarning("P&ID JSON 파싱 실패: {Msg} / raw: {Raw}", ex.Message, json[..Math.Min(200, json.Length)]);
+            return [];
+        }
+    }
+
+    private Dictionary<string, string> ParseMappingJson(string json)
+    {
+        try
+        {
+            using var doc = JsonDocument.Parse(json);
+            var root = doc.RootElement;
+
+            // MCP 응답: {"success": ..., "count": ..., "mappings": [...]}
+            JsonElement arrayEl = root.ValueKind == JsonValueKind.Array
+                ? root
+                : root.TryGetProperty("mappings", out var m) ? m : default;
+
+            if (arrayEl.ValueKind != JsonValueKind.Array)
+                return [];
+
+            var list = JsonSerializer.Deserialize<List<MappingItem>>(arrayEl.GetRawText(),
+                new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? [];
+            return list
+                .Where(m => m.Confidence >= 0.7 && !string.IsNullOrEmpty(m.ExperionTag))
+                .ToDictionary(m => m.PidTag, m => m.ExperionTag!);
+        }
+        catch { return []; }
+    }
+
+    private async Task<RealtimePoint?> FindFallbackTagAsync(string tagNo)
+    {
+        var normalized = tagNo.Split('.')[0];
+        return await _dbContext.RealtimePoints
+            .FirstOrDefaultAsync(t => t.TagName == normalized
+                                   || t.TagName.StartsWith(normalized + "."));
+    }
+
+    public async Task<(int Total, IEnumerable<PidEquipment> Items)> GetEquipmentAsync(
+        string? tagNo, int page, int pageSize)
+    {
+        var q = _dbContext.PidEquipment.AsQueryable();
+        if (!string.IsNullOrEmpty(tagNo))
+            q = q.Where(e => e.TagNo.Contains(tagNo));
+        var total = await q.CountAsync();
+        var items = await q.OrderByDescending(e => e.ExtractedAt)
+                           .Skip((page - 1) * pageSize).Take(pageSize).ToListAsync();
+        return (total, items);
+    }
+
+    public async Task<PidEquipment?> GetByIdAsync(long id)
+        => await _dbContext.PidEquipment.Include(e => e.ExperionTag).FirstOrDefaultAsync(e => e.Id == id);
+
+    public async Task UpdateConfidenceAsync(long id, double confidence)
+    {
+        var e = await _dbContext.PidEquipment.FindAsync(id);
+        if (e == null) return;
+        e.Confidence = confidence; e.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+    }
+
+    public async Task ActivateAsync(long id)
+    {
+        var e = await _dbContext.PidEquipment.FindAsync(id);
+        if (e == null) return;
+        e.IsActive = true; e.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+    }
+
+    public async Task DeactivateAsync(long id)
+    {
+        var e = await _dbContext.PidEquipment.FindAsync(id);
+        if (e == null) return;
+        e.IsActive = false; e.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+    }
+
+    public Task<int> GetTotalCountAsync() => _dbContext.PidEquipment.CountAsync();
+    public Task<int> GetConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence >= 0.7);
+    public Task<int> GetLowConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence < 0.5);
+    public Task<int> GetDrawingCountAsync() => _dbContext.PidEquipment.Select(e => e.PidDrawingNo).Distinct().CountAsync();
+
+    public async Task<IDictionary<string, int>> GetConfidenceDistributionAsync()
+    {
+        var items = await _dbContext.PidEquipment.ToListAsync();
+        return new Dictionary<string, int>
+        {
+            ["High (>=0.7)"] = items.Count(i => i.Confidence >= 0.7),
+            ["Medium (0.5-0.7)"] = items.Count(i => i.Confidence >= 0.5 && i.Confidence < 0.7),
+            ["Low (<0.5)"] = items.Count(i => i.Confidence < 0.5)
+        };
+    }
+
+    public async Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items)
+    {
+        var sb = new StringBuilder();
+        sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId");
+        foreach (var i in items)
+            sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId}");
+        return sb.ToString();
+    }
+
+    private static string Csv(string? v)
+    {
+        if (string.IsNullOrEmpty(v)) return "";
+        return (v.Contains(',') || v.Contains('"') || v.Contains('\n'))
+            ? $"\"{v.Replace("\"", "\"\"")}\"" : v;
+    }
+
+    public async Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items)
+    {
+        using var package = new OfficeOpenXml.ExcelPackage();
+        var worksheet = package.Workbook.Worksheets.Add("P&ID Equipment");
+
+        // 헤더
+        worksheet.Cells[1, 1].Value = "태그번호";
+        worksheet.Cells[1, 2].Value = "장비명";
+        worksheet.Cells[1, 3].Value = "계기유형";
+        worksheet.Cells[1, 4].Value = "라인번호";
+        worksheet.Cells[1, 5].Value = "도면번호";
+        worksheet.Cells[1, 6].Value = "신뢰도";
+        worksheet.Cells[1, 7].Value = "상태";
+        worksheet.Cells[1, 8].Value = "추출일시";
+        worksheet.Cells[1, 9].Value = "Experion 태그";
+
+        int row = 2;
+        foreach (var item in items)
+        {
+            worksheet.Cells[row, 1].Value = item.TagNo;
+            worksheet.Cells[row, 2].Value = item.EquipmentName ?? "";
+            worksheet.Cells[row, 3].Value = item.InstrumentType ?? "";
+            worksheet.Cells[row, 4].Value = item.LineNumber ?? "";
+            worksheet.Cells[row, 5].Value = item.PidDrawingNo ?? "";
+            worksheet.Cells[row, 6].Value = item.Confidence;
+            worksheet.Cells[row, 7].Value = item.IsActive ? "활성" : "비활성";
+            worksheet.Cells[row, 8].Value = item.ExtractedAt;
+            worksheet.Cells[row, 9].Value = item.ExperionTag?.TagName ?? "";
+            row++;
+        }
+
+        return package.GetAsByteArray();
+    }
+}
+
+// ── 내부 파싱용 모델 ──────────────────────────────────────────────────────────
+public class ExtractedItem
+{
+    public string TagNo { get; set; } = "";
+    public string? EquipmentName { get; set; }
+    public string? InstrumentType { get; set; }
+    public string? LineNumber { get; set; }
+    public string? PidDrawingNo { get; set; }
+    public double Confidence { get; set; } = 0.5;
+}
+
+public class MappingItem
+{
+    public string PidTag { get; set; } = "";
+    public string? ExperionTag { get; set; }
+    public double Confidence { get; set; }
+}
--- a/src/Core/Application/Services/PidGraphService.cs
+++ b/src/Core/Application/Services/PidGraphService.cs
@@ -0,0 +1,97 @@
+using System.Text.Json;
+using ExperionCrawler.Infrastructure.Mcp;
+using ExperionCrawler.Core.Application.DTOs;
+
+namespace ExperionCrawler.Core.Application.Services;
+
+public interface IPidGraphService
+{
+    Task<PidGraphBuildResult> BuildPidGraphAsync(string filepath, Action<double, string>? progressHandler = null, CancellationToken ct = default);
+    Task<PidImpactResult> AnalyzeImpactAsync(string graphId, string nodeId, CancellationToken ct = default);
+}
+
+public class PidGraphService : IPidGraphService
+{
+    private readonly McpClient _mcpClient;
+    private readonly ILogger<PidGraphService> _logger;
+
+    public PidGraphService(McpClient mcpClient, ILogger<PidGraphService> logger)
+    {
+        _mcpClient = mcpClient;
+        _logger = logger;
+    }
+
+    public async Task<PidGraphBuildResult> BuildPidGraphAsync(string filepath, Action<double, string>? progressHandler = null, CancellationToken ct = default)
+    {
+        try
+        {
+            progressHandler?.Invoke(10, "MCP 서버에 추출 요청 전송 중...");
+            
+            var args = new Dictionary<string, object>
+            {
+                ["filepath"] = filepath
+            };
+
+            progressHandler?.Invoke(30, "도면 기하학적 데이터 추출 중 (Phase 1)...");
+            var jsonResponse = await _mcpClient.CallToolAsync("build_pid_graph_parallel", args, ct);
+            
+            progressHandler?.Invoke(70, "지능형 태그 매핑 및 위상 분석 중 (Phase 2 & 3)...");
+            var result = JsonSerializer.Deserialize<PidGraphBuildResult>(jsonResponse, new JsonSerializerOptions
+            {
+                PropertyNameCaseInsensitive = true
+            });
+
+            progressHandler?.Invoke(90, "최종 그래프 구조 생성 및 저장 중...");
+            return result ?? throw new Exception("Failed to deserialize MCP response");
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Error building PID graph for file {Filepath}", filepath);
+            return new PidGraphBuildResult { Success = false, Error = ex.Message };
+        }
+    }
+
+    public async Task<PidImpactResult> AnalyzeImpactAsync(string graphId, string nodeId, CancellationToken ct = default)
+    {
+        try
+        {
+            var args = new Dictionary<string, object>
+            {
+                ["graph_id"] = graphId,
+                ["start_node_id"] = nodeId
+            };
+
+            var jsonResponse = await _mcpClient.CallToolAsync("analyze_pid_impact", args, ct);
+            var result = JsonSerializer.Deserialize<PidImpactResult>(jsonResponse, new JsonSerializerOptions
+            {
+                PropertyNameCaseInsensitive = true
+            });
+
+            return result ?? throw new Exception("Failed to deserialize MCP response");
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Error analyzing impact for graph {GraphId} node {NodeId}", graphId, nodeId);
+            return new PidImpactResult { Success = false, Error = ex.Message };
+        }
+    }
+}
+
+public class PidGraphBuildResult
+{
+    public bool Success { get; set; }
+    public string? GraphId { get; set; }
+    public string? GraphPath { get; set; }
+    public int Nodes { get; set; }
+    public int Edges { get; set; }
+    public string? Error { get; set; }
+}
+
+public class PidImpactResult
+{
+    public bool Success { get; set; }
+    public string? StartNode { get; set; }
+    public Dictionary<string, int>? ImpactedNodes { get; set; }
+    public List<List<string>>? Paths { get; set; }
+    public string? Error { get; set; }
+}
--- a/src/Core/Application/Services/TagMappingService.cs
+++ b/src/Core/Application/Services/TagMappingService.cs
@@ -0,0 +1,150 @@
+using ExperionCrawler.Core.Application.DTOs;
+using ExperionCrawler.Core.Application.Interfaces;
+using ExperionCrawler.Core.Domain.Entities;
+using ExperionCrawler.Infrastructure.Database;
+using Microsoft.EntityFrameworkCore;
+
+namespace ExperionCrawler.Core.Application.Services;
+
+public class TagMappingService : ITagMappingService
+{
+    private readonly ExperionDbContext _dbContext;
+    
+    public TagMappingService(ExperionDbContext dbContext)
+    {
+        _dbContext = dbContext;
+    }
+    
+    public async Task<(int Total, IEnumerable<TagMappingResult> Items)> GetMappingsAsync(int page, int pageSize)
+    {
+        var query = from pe in _dbContext.PidEquipment
+                    join rt in _dbContext.RealtimePoints
+                        on pe.ExperionTagId equals rt.Id into joined
+                    from rt in joined.DefaultIfEmpty()
+                    select new TagMappingResult
+                    {
+                        PidEquipmentId = pe.Id,
+                        TagNo = pe.TagNo,
+                        EquipmentName = pe.EquipmentName,
+                        InstrumentType = pe.InstrumentType,
+                        LineNumber = pe.LineNumber,
+                        PidDrawingNo = pe.PidDrawingNo,
+                        Confidence = pe.Confidence,
+                        IsActive = pe.IsActive,
+                        ExperionTagId = pe.ExperionTagId,
+                        ExperionTagName = rt == null ? null : rt.TagName,
+                        ExperionNodeId = rt == null ? null : rt.NodeId
+                    };
+        
+        var total = await query.CountAsync();
+        var items = await query
+            .OrderByDescending(e => e.Confidence)
+            .Skip((page - 1) * pageSize)
+            .Take(pageSize)
+            .ToListAsync();
+        
+        return (total, items);
+    }
+    
+    public async Task<TagMappingResult?> GetMappingByIdAsync(long id)
+    {
+        var item = await _dbContext.PidEquipment
+            .Include(e => e.ExperionTag)
+            .FirstOrDefaultAsync(e => e.Id == id);
+        
+        if (item == null) return null;
+        
+        return new TagMappingResult
+        {
+            PidEquipmentId = item.Id,
+            TagNo = item.TagNo,
+            EquipmentName = item.EquipmentName,
+            InstrumentType = item.InstrumentType,
+            LineNumber = item.LineNumber,
+            PidDrawingNo = item.PidDrawingNo,
+            Confidence = item.Confidence,
+            IsActive = item.IsActive,
+            ExperionTagId = item.ExperionTagId,
+            ExperionTagName = item.ExperionTag?.TagName,
+            ExperionNodeId = item.ExperionTag?.NodeId
+        };
+    }
+    
+    public async Task<TagMappingResult> CreateMappingAsync(CreateMappingRequest request)
+    {
+        var equipment = await _dbContext.PidEquipment.FindAsync(request.PidEquipmentId);
+        if (equipment == null) throw new InvalidOperationException("P&ID 장비를 찾을 수 없습니다.");
+        
+        var tag = await _dbContext.RealtimePoints.FindAsync(request.ExperionTagId);
+        if (tag == null) throw new InvalidOperationException("실시간 태그를 찾을 수 없습니다.");
+        
+        equipment.ExperionTagId = tag.Id;
+        equipment.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+        
+        return new TagMappingResult
+        {
+            PidEquipmentId = equipment.Id,
+            TagNo = equipment.TagNo,
+            EquipmentName = equipment.EquipmentName,
+            InstrumentType = equipment.InstrumentType,
+            LineNumber = equipment.LineNumber,
+            PidDrawingNo = equipment.PidDrawingNo,
+            Confidence = equipment.Confidence,
+            IsActive = equipment.IsActive,
+            ExperionTagId = equipment.ExperionTagId,
+            ExperionTagName = tag.TagName,
+            ExperionNodeId = tag.NodeId
+        };
+    }
+    
+    public async Task UpdateMappingAsync(long id, UpdateMappingRequest request)
+    {
+        var equipment = await _dbContext.PidEquipment.FindAsync(id);
+        if (equipment == null) throw new InvalidOperationException("P&ID 장비를 찾을 수 없습니다.");
+        
+        if (request.ExperionTagId.HasValue)
+        {
+            var tag = await _dbContext.RealtimePoints.FindAsync(request.ExperionTagId.Value);
+            if (tag == null) throw new InvalidOperationException("실시간 태그를 찾을 수 없습니다.");
+            
+            equipment.ExperionTagId = tag.Id;
+        }
+        
+        if (request.IsActive.HasValue)
+            equipment.IsActive = request.IsActive.Value;
+        
+        equipment.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+    }
+    
+    public async Task ClearMappingAsync(long id)
+    {
+        var equipment = await _dbContext.PidEquipment.FindAsync(id);
+        if (equipment == null) throw new InvalidOperationException("P&ID 장비를 찾을 수 없습니다.");
+        
+        equipment.ExperionTagId = null;
+        equipment.UpdatedAt = DateTime.UtcNow;
+        await _dbContext.SaveChangesAsync();
+    }
+    
+    public async Task<int> GetUnmappedCountAsync()
+        => await _dbContext.PidEquipment.CountAsync(e => e.ExperionTagId == null);
+    
+    public async Task<int> GetMappedCountAsync()
+        => await _dbContext.PidEquipment.CountAsync(e => e.ExperionTagId != null);
+    
+    public async Task<IEnumerable<string>> GetAvailableTagsAsync()
+    {
+        var mappedTagIds = await _dbContext.PidEquipment
+            .Where(e => e.ExperionTagId != null)
+            .Select(e => e.ExperionTagId)
+            .ToListAsync();
+        
+        return await _dbContext.RealtimePoints
+            .Where(t => !mappedTagIds.Contains(t.Id))
+            .Select(t => t.TagName)
+            .OrderBy(t => t)
+            .ToListAsync();
+    }
+}
--- a/src/Core/Domain/Entities/PidAuditLog.cs
+++ b/src/Core/Domain/Entities/PidAuditLog.cs
@@ -0,0 +1,27 @@
+using System.ComponentModel.DataAnnotations;
+using System.ComponentModel.DataAnnotations.Schema;
+
+namespace ExperionCrawler.Core.Domain.Entities;
+
+/// <summary>P&ID 추출/수정 작업 감사 로그</summary>
+[Table("pid_audit_log")]
+public class PidAuditLog
+{
+    public long Id { get; set; }
+    
+    // 사용자 인증 시스템 부재 → Source 필드로 대체
+    [MaxLength(50)]
+    public string Source { get; set; } = string.Empty;
+    
+    [MaxLength(50)]
+    public string Action { get; set; } = string.Empty;
+    
+    [MaxLength(50)]
+    public string TargetTagNo { get; set; } = string.Empty;
+    
+    public string? OldValue { get; set; }
+    
+    public string? NewValue { get; set; }
+    
+    public DateTime LoggedAt { get; set; } = DateTime.UtcNow;
+}
--- a/src/Core/Domain/Entities/PidEquipment.cs
+++ b/src/Core/Domain/Entities/PidEquipment.cs
@@ -0,0 +1,41 @@
+using System.ComponentModel.DataAnnotations;
+using System.ComponentModel.DataAnnotations.Schema;
+
+namespace ExperionCrawler.Core.Domain.Entities;
+
+/// <summary>P&ID 도면에서 추출한 장비/계기 정보</summary>
+[Table("pid_equipment")]
+public class PidEquipment
+{
+    public long Id { get; set; }
+    
+    [Required]
+    [MaxLength(50)]
+    public string TagNo { get; set; } = string.Empty;
+    
+    [MaxLength(200)]
+    public string? EquipmentName { get; set; }
+    
+    [MaxLength(10)]
+    public string? InstrumentType { get; set; }
+    
+    [MaxLength(100)]
+    public string? LineNumber { get; set; }
+    
+    [MaxLength(50)]
+    public string? PidDrawingNo { get; set; }
+    
+    public double Confidence { get; set; }
+    
+    public bool IsActive { get; set; } = true;
+    
+    public DateTime ExtractedAt { get; set; } = DateTime.UtcNow;
+    
+    public DateTime? UpdatedAt { get; set; }
+    
+    // 외래 키 - 기존 RealtimePoint.Id는 int 타입
+    public int? ExperionTagId { get; set; }
+    
+    // FK 네비게이션 프로퍼티
+    public RealtimePoint? ExperionTag { get; set; }
+}
--- a/src/Infrastructure/Database/ExperionDbContext.cs
+++ b/src/Infrastructure/Database/ExperionDbContext.cs
@@ -21,6 +21,10 @@ public class ExperionDbContext : DbContext
    public DbSet<HistoryRecord>  HistoryRecords  => Set<HistoryRecord>();
    public DbSet<FastSession>    FastSessions    => Set<FastSession>();
    public DbSet<FastRecord>     FastRecords     => Set<FastRecord>();
+    
+    // P&ID 데이터베이스용 DbSet
+    public DbSet<PidEquipment> PidEquipment => Set<PidEquipment>();
+    public DbSet<PidAuditLog>  PidAuditLog  => Set<PidAuditLog>();

    protected override void OnModelCreating(ModelBuilder modelBuilder)
    {
@@ -72,6 +76,72 @@ public class ExperionDbContext : DbContext
            e.HasKey(x => new { x.SessionId, x.RecordedAt, x.TagName });
            e.HasIndex(x => x.SessionId);
        });
+        
+        // P&ID 엔티티 설정
+        modelBuilder.Entity<PidEquipment>(entity =>
+        {
+            entity.ToTable("pid_equipment");
+            entity.HasKey(e => e.Id);
+            
+            entity.Property(e => e.TagNo)
+                .IsRequired()
+                .HasMaxLength(50);
+            
+            entity.Property(e => e.EquipmentName)
+                .HasMaxLength(200);
+            
+            entity.Property(e => e.InstrumentType)
+                .HasMaxLength(10);
+            
+            entity.Property(e => e.LineNumber)
+                .HasMaxLength(100);
+            
+            entity.Property(e => e.PidDrawingNo)
+                .HasMaxLength(50);
+            
+            entity.Property(e => e.Confidence)
+                .HasPrecision(4, 3);
+            
+            entity.Property(e => e.IsActive)
+                .HasDefaultValue(true);
+            
+            entity.Property(e => e.ExtractedAt)
+                .HasDefaultValueSql("NOW()");
+            
+            entity.Property(e => e.UpdatedAt)
+                .ValueGeneratedOnAddOrUpdate()
+                .HasDefaultValueSql("NOW()");
+            
+            entity.HasIndex(e => e.TagNo);
+            entity.HasIndex(e => e.InstrumentType);
+            entity.HasIndex(e => e.ExtractedAt);
+            
+            entity.HasOne(e => e.ExperionTag)
+                  .WithMany()
+                  .HasForeignKey(e => e.ExperionTagId)
+                  .OnDelete(DeleteBehavior.SetNull);
+        });
+        
+        modelBuilder.Entity<PidAuditLog>(entity =>
+        {
+            entity.ToTable("pid_audit_log");
+            entity.HasKey(e => e.Id);
+            
+            entity.Property(e => e.Source)
+                .HasMaxLength(50)
+                .HasDefaultValue("WebUI");
+            
+            entity.Property(e => e.Action)
+                .HasMaxLength(50);
+            
+            entity.Property(e => e.TargetTagNo)
+                .HasMaxLength(50);
+            
+            entity.Property(e => e.LoggedAt)
+                .HasDefaultValueSql("NOW()");
+            
+            entity.HasIndex(e => e.LoggedAt);
+        });
    }
 }

--- a/src/Infrastructure/Mcp/McpClient.cs
+++ b/src/Infrastructure/Mcp/McpClient.cs
@@ -23,7 +23,7 @@ public class McpClient
        _httpClient = httpClient ?? new HttpClient
        {
            BaseAddress = new Uri(BaseUrl),
-            Timeout = TimeSpan.FromSeconds(120)
+            Timeout = TimeSpan.FromSeconds(1800)
        };
    }

@@ -31,8 +31,10 @@ public class McpClient
    {
        try
        {
-            var response = await _httpClient.GetAsync("/health");
-            return response.IsSuccessStatusCode;
+            // FastMCP는 /health 대신 /mcp 엔드포인트를 제공함
+            // 406은 Accept 헤더 문제이지만, MCP 서버가 실행 중이라는 의미
+            var response = await _httpClient.GetAsync("/mcp");
+            return response.IsSuccessStatusCode || response.StatusCode == System.Net.HttpStatusCode.NotAcceptable;
        }
        catch
        {
@@ -40,7 +42,7 @@ public class McpClient
        }
    }

-    public async Task<List<McpTool>> ListToolsAsync()
+    public async Task<List<McpTool>> ListToolsAsync(CancellationToken ct = default)
    {
        var request = new
        {
@@ -49,14 +51,14 @@ public class McpClient
            method = "tools/list"
        };

-        var response = await SendRequestAsync(request);
+        var response = await SendRequestAsync(request, ct);
        if (response?.result?.tools == null)
            return [];

        return [.. response.result.tools];
    }

-    public async Task<string> CallToolAsync(string toolName, Dictionary<string, object> arguments)
+    public async Task<string> CallToolAsync(string toolName, Dictionary<string, object> arguments, CancellationToken ct = default)
    {
        var request = new
        {
@@ -68,7 +70,7 @@ public class McpClient

        try
        {
-            var response = await SendRequestAsync(request);
+            var response = await SendRequestAsync(request, ct);
            var content = response?.result?.content;
            if (content == null || content.Length == 0)
                return "호출 결과 없음";
@@ -89,38 +91,65 @@ public class McpClient
        }
    }

-    public Task<string> RunSqlAsync(string sql) =>
-        CallToolAsync("run_sql", new Dictionary<string, object> { ["sql"] = sql });
+    public Task<string> RunSqlAsync(string sql, CancellationToken ct = default) =>
+        CallToolAsync("run_sql", new Dictionary<string, object> { ["sql"] = sql }, ct);

    public Task<string> QueryPvHistoryAsync(
-        List<string> tagNames, string timeFrom, string timeTo, int limit = 100) =>
+        List<string> tagNames, string timeFrom, string timeTo, int limit = 100, CancellationToken ct = default) =>
        CallToolAsync("query_pv_history", new Dictionary<string, object>
        {
            ["tag_names"] = tagNames,
            ["time_from"] = timeFrom,
            ["time_to"] = timeTo,
            ["limit"] = limit
-        });
+        }, ct);

-    public Task<string> GetTagMetadataAsync(string query, int limit = 10) =>
+    public Task<string> GetTagMetadataAsync(string query, int limit = 10, CancellationToken ct = default) =>
        CallToolAsync("get_tag_metadata", new Dictionary<string, object>
        {
            ["query"] = query,
            ["limit"] = limit
-        });
+        }, ct);

-    public Task<string> ListDrawingsAsync(string? unitNo = null)
+    public Task<string> ListDrawingsAsync(string? unitNo = null, CancellationToken ct = default)
    {
        var args = new Dictionary<string, object>();
        if (!string.IsNullOrEmpty(unitNo))
            args["unit_no"] = unitNo;
-        return CallToolAsync("list_drawings", args);
+        return CallToolAsync("list_drawings", args, ct);
    }

-    public Task<string> QueryWithNlAsync(string question) =>
-        CallToolAsync("query_with_nl", new Dictionary<string, object> { ["question"] = question });
+    public Task<string> QueryWithNlAsync(string question, CancellationToken ct = default) =>
+        CallToolAsync("query_with_nl", new Dictionary<string, object> { ["question"] = question }, ct);

-    private async Task<McpResponse?> SendRequestAsync(object request)
+    public Task<string> ExtractPidTagsAsync(string text, string sourceType, CancellationToken ct = default) =>
+        CallToolAsync("extract_pid_tags", new Dictionary<string, object>
+        {
+            ["text"] = text,
+            ["source_type"] = sourceType
+        }, ct);
+
+    public Task<string> MatchPidTagsAsync(IEnumerable<string> pidTags, IEnumerable<string> experionTags, CancellationToken ct = default) =>
+        CallToolAsync("match_pid_tags", new Dictionary<string, object>
+        {
+            ["pid_tags"] = pidTags.ToList(),
+            ["experion_tags"] = experionTags.ToList()
+        }, ct);
+
+    public Task<string> ParsePidDxfAsync(string filepath, CancellationToken ct = default) =>
+        CallToolAsync("parse_pid_dxf", new Dictionary<string, object> { ["filepath"] = filepath }, ct);
+
+    public Task<string> ParsePidPdfAsync(string filepath, bool useOcr = true, CancellationToken ct = default) =>
+        CallToolAsync("parse_pid_pdf", new Dictionary<string, object>
+        {
+            ["filepath"] = filepath,
+            ["use_ocr"] = useOcr
+        }, ct);
+
+    public Task<string> ParsePidDrawingAsync(string filepath, CancellationToken ct = default) =>
+        CallToolAsync("parse_pid_drawing", new Dictionary<string, object> { ["filepath"] = filepath }, ct);
+
+    private async Task<McpResponse?> SendRequestAsync(object request, CancellationToken ct)
    {
        var json = JsonSerializer.Serialize(request);
        var content = new StringContent(json, Encoding.UTF8, "application/json");
@@ -129,15 +158,15 @@ public class McpClient
        {
            Content = content
        };
-        // MCP 프로토콜: JSON-RPC 통신에는 application/json Accept 헤더 필요
+        // MCP 프로토콜: streamable-http 전송에는 application/json Accept 헤더 필요
        httpRequest.Headers.Add("Accept", "application/json");
        httpRequest.Headers.Add("mcp-protocol-version", "2025-03-26");

-        var response = await _httpClient.SendAsync(httpRequest);
+        var response = await _httpClient.SendAsync(httpRequest, ct);
        if (!response.IsSuccessStatusCode)
            return null;

-        var body = await response.Content.ReadAsStringAsync();
+        var body = await response.Content.ReadAsStringAsync(ct);
        return JsonSerializer.Deserialize<McpResponse>(body, _jsonOptions);
    }
 }
--- a/src/Infrastructure/Mcp/McpServerHostedService.cs
+++ b/src/Infrastructure/Mcp/McpServerHostedService.cs
@@ -0,0 +1,100 @@
+using System.Diagnostics;
+
+namespace ExperionCrawler.Infrastructure.Mcp;
+
+public class McpServerHostedService : IHostedService
+{
+    private readonly McpClient _mcpClient;
+    private readonly ILogger<McpServerHostedService> _logger;
+    private readonly string _workingDirectory;
+    private Process? _process;
+
+    public McpServerHostedService(
+        McpClient mcpClient,
+        ILogger<McpServerHostedService> logger,
+        IConfiguration config)
+    {
+        _mcpClient = mcpClient;
+        _logger = logger;
+        var dir = config["McpServer:WorkingDirectory"] ?? "../../mcp-server";
+        _workingDirectory = Path.IsPathRooted(dir)
+            ? dir
+            : Path.GetFullPath(Path.Combine(Directory.GetCurrentDirectory(), dir));
+    }
+
+    public async Task StartAsync(CancellationToken cancellationToken)
+    {
+        if (!Directory.Exists(_workingDirectory))
+        {
+            _logger.LogWarning("[McpServer] 디렉터리 없음: {Dir} — MCP 서버 시작 스킵", _workingDirectory);
+        }
+        else
+        {
+            // 이미 MCP 서버가 실행 중이면 시작하지 않음
+            if (await _mcpClient.PingAsync())
+            {
+                _logger.LogInformation("[McpServer] 이미 실행 중 (localhost:5001) — 기존 프로세스 사용");
+                return;
+            }
+            else
+            {
+                _logger.LogInformation("[McpServer] Python MCP 서버 시작 중... ({Dir})", _workingDirectory);
+
+                _process = new Process
+                {
+                    StartInfo = new ProcessStartInfo
+                    {
+                        FileName = "uv",
+                        Arguments = "run server.py --http",
+                        WorkingDirectory = _workingDirectory,
+                        UseShellExecute = false,
+                    }
+                };
+
+                try
+                {
+                    _process.Start();
+                }
+                catch (Exception ex)
+                {
+                    _logger.LogError(ex, "[McpServer] 프로세스 시작 실패 (uv 설치 여부 확인)");
+                }
+
+                // MCP 서버가 포트를 bind하기 위해 약간 대기 (0.5초)
+                await Task.Delay(500, cancellationToken);
+
+                // 최대 30초 대기 (1초 간격 health check)
+                for (int i = 0; i < 30; i++)
+                {
+                    try { await Task.Delay(1000, cancellationToken); } catch { return; }
+                    if (_process.HasExited)
+                    {
+                        _logger.LogWarning("[McpServer] 프로세스가 예기치 않게 종료됨 (exit code: {Code})", _process.ExitCode);
+                        return;
+                    }
+                    if (await _mcpClient.PingAsync())
+                    {
+                        _logger.LogInformation("[McpServer] 준비 완료 (localhost:5001, {Sec}초 소요)", i + 1);
+                        return;
+                    }
+                }
+                _logger.LogWarning("[McpServer] 30초 내 응답 없음 — 백그라운드에서 계속 기다림");
+            }
+        }
+    }
+
+    public Task StopAsync(CancellationToken cancellationToken)
+    {
+        if (_process != null && !_process.HasExited)
+        {
+            try
+            {
+                _logger.LogInformation("[McpServer] 앱 종료 — MCP 서버 프로세스 종료 중...");
+                _process.Kill(entireProcessTree: true);
+            }
+            catch (Exception ex) { _logger.LogWarning(ex, "[McpServer] 프로세스 종료 중 오류"); }
+        }
+        _logger.LogInformation("[McpServer] 앱 종료 완료");
+        return Task.CompletedTask;
+    }
+}
--- a/src/Infrastructure/OpcUa/ExperionFastService.cs
+++ b/src/Infrastructure/OpcUa/ExperionFastService.cs
@@ -58,7 +58,14 @@ public class ExperionFastService : IExperionFastService, IHostedService, IDispos
    {
        _cts?.Cancel();
        if (_monitorTask != null)
-            await _monitorTask.WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(false);
+        {
+            try
+            {
+                // 종료 시 대기 시간을 2초로 단축하여 빠른 셧다운 유도
+                await _monitorTask.WaitAsync(TimeSpan.FromSeconds(2), cancellationToken).ConfigureAwait(false);
+            }
+            catch (Exception ex) { _logger.LogDebug(ex, "[Fast] StopAsync 대기 중 타임아웃 또는 취소 발생"); }
+        }
    }

    public void Dispose() => _cts?.Dispose();
--- a/src/Infrastructure/OpcUa/ExperionOpcServerService.cs
+++ b/src/Infrastructure/OpcUa/ExperionOpcServerService.cs
@@ -87,11 +87,26 @@ public class ExperionOpcServerService : IExperionOpcServerService, IHostedServic
        }
    }

-    Task IHostedService.StopAsync(CancellationToken ct)
+    async Task IHostedService.StopAsync(CancellationToken ct)
    {
        // 앱 종료 시: 서버 인스턴스 정리만, 플래그 파일은 유지 → 재기동 후 자동 시작
-        StopInternal(deleteFlag: false);
-        return Task.CompletedTask;
+        if (_server != null)
+        {
+            try
+            {
+                _logger.LogInformation("[OpcServer] 앱 종료 — OPC UA 서버 비동기 중지 중...");
+                await _server.StopAsync(ct).ConfigureAwait(false);
+            }
+            catch (Exception ex)
+            {
+                _logger.LogWarning(ex, "[OpcServer] StopAsync 중 오류 발생");
+                StopInternal(deleteFlag: false);
+            }
+        }
+        else
+        {
+            StopInternal(deleteFlag: false);
+        }
    }

    // ── IExperionOpcServerService ────────────────────────────────────────────
--- a/src/Infrastructure/OpcUa/ExperionRealtimeService.cs
+++ b/src/Infrastructure/OpcUa/ExperionRealtimeService.cs
@@ -92,7 +92,14 @@ public class ExperionRealtimeService : IExperionRealtimeService, IHostedService,
        var tasks = new[] { _monitorTask, _flushTask }
            .Where(t => t != null).Select(t => t!).ToArray();
        if (tasks.Length > 0)
-            await Task.WhenAll(tasks).WaitAsync(TimeSpan.FromSeconds(5)).ConfigureAwait(false);
+        {
+            try
+            {
+                // 종료 시 대기 시간을 2초로 단축하여 빠른 셧다운 유도
+                await Task.WhenAll(tasks).WaitAsync(TimeSpan.FromSeconds(2), cancellationToken).ConfigureAwait(false);
+            }
+            catch (Exception ex) { _logger.LogDebug(ex, "[Realtime] StopAsync 대기 중 타임아웃 또는 취소 발생"); }
+        }
        _running = false;
        _logger.LogInformation("[Realtime] 구독 중지 완료 (앱 종료 — 자동 재시작 플래그 유지)");
    }
--- a/Show More
+++ b/Show More