373 lines
12 KiB
Markdown
373 lines
12 KiB
Markdown
# Actual Implementation: P&ID Parser (Distributed Processing)
|
|
|
|
This document contains the actual implementation of the P&ID Parser based on the design plan.
|
|
|
|
## 1. Python Implementation
|
|
|
|
### 1.1 `dxf_preprocessor.py`
|
|
```python
|
|
import ezdxf
|
|
import json
|
|
from datetime import datetime
|
|
import os
|
|
|
|
class DXFPreprocessor:
|
|
"""
|
|
DXF 파일을 로드하여 핵심 엔티티를 추출하고 중간 JSON 포맷으로 저장합니다.
|
|
"""
|
|
def __init__(self):
|
|
self.entities = []
|
|
|
|
def load_and_parse(self, file_path):
|
|
try:
|
|
if not os.path.exists(file_path):
|
|
print(f"Error: File not found {file_path}")
|
|
return False
|
|
|
|
doc = ezdxf.readfile(file_path)
|
|
msp = doc.modelspace()
|
|
|
|
for entity in msp:
|
|
# 추출 대상 엔티티 타입 정의
|
|
if entity.dxftype() in ['TEXT', 'MTEXT', 'LINE', 'CIRCLE', 'LWPOLYLINE']:
|
|
data = {
|
|
"type": entity.dxftype(),
|
|
"layer": entity.dxf.layer,
|
|
"content": "",
|
|
"coordinates": {"x": 0.0, "y": 0.0, "z": 0.0},
|
|
"attributes": {"color": entity.dxf.color, "lineweight": entity.dxf.lineweight}
|
|
}
|
|
|
|
# 텍스트 내용 추출
|
|
if entity.dxftype() in ['TEXT', 'MTEXT']:
|
|
data["content"] = entity.dxf.text if entity.dxftype() == 'TEXT' else entity.text
|
|
|
|
# 좌표 정보 추출 (단순화)
|
|
try:
|
|
if entity.dxftype() == 'LINE':
|
|
data["coordinates"] = {"x": entity.dxf.start.x, "y": entity.dxf.start.y, "z": entity.dxf.start.z}
|
|
elif entity.dxftype() == 'CIRCLE':
|
|
data["coordinates"] = {"x": entity.dxf.center.x, "y": entity.dxf.center.y, "z": entity.dxf.center.z}
|
|
elif entity.dxftype() == 'LWPOLYLINE':
|
|
data["coordinates"] = {"x": entity.dxf.vertices[0].x, "y": entity.dxf.vertices[0].y, "z": 0.0}
|
|
except Exception:
|
|
pass # 좌표 추출 실패 시 기본값 유지
|
|
|
|
self.entities.append(data)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error parsing DXF: {e}")
|
|
return False
|
|
|
|
def generate_intermediate_json(self, output_path, filename):
|
|
data = {
|
|
"metadata": {
|
|
"filename": filename,
|
|
"timestamp": datetime.now().isoformat()
|
|
},
|
|
"entities": self.entities
|
|
}
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
print(f"Intermediate JSON saved to: {output_path}")
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python dxf_preprocessor.py <input_dxf_path>")
|
|
else:
|
|
input_path = sys.argv[1]
|
|
output_path = input_path.replace(".dxf", "_intermediate.json")
|
|
preprocessor = DXFPreprocessor()
|
|
if preprocessor.load_and_parse(input_path):
|
|
preprocessor.generate_intermediate_json(output_path, os.path.basename(input_path))
|
|
```
|
|
|
|
### 1.2 `extractors/base_extractor.py`
|
|
```python
|
|
import json
|
|
import re
|
|
import sys
|
|
import os
|
|
|
|
class BaseExtractor:
|
|
"""
|
|
모든 특화된 추출기(Specialized Extractors)의 기본 클래스입니다.
|
|
"""
|
|
def __init__(self, input_json_path):
|
|
self.input_json_path = input_json_path
|
|
self.data = None
|
|
self.results = []
|
|
|
|
def load_input_json(self):
|
|
try:
|
|
with open(self.input_json_path, 'r', encoding='utf-8') as f:
|
|
self.data = json.load(f)
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error loading JSON: {e}")
|
|
return False
|
|
|
|
def apply_regex_pattern(self, pattern):
|
|
if not self.data:
|
|
return
|
|
|
|
regex = re.compile(pattern)
|
|
for entity in self.data.get("entities", []):
|
|
content = entity.get("content", "")
|
|
if content:
|
|
match = regex.search(content)
|
|
if match:
|
|
# 매칭된 정보를 결과 리스트에 추가
|
|
self.results.append({
|
|
"tag": match.group(0),
|
|
"type": entity["type"],
|
|
"layer": entity["layer"],
|
|
"content": content,
|
|
"coordinates": entity["coordinates"]
|
|
})
|
|
|
|
def save_output_json(self, output_path):
|
|
output_data = {
|
|
"source_file": self.data["metadata"]["filename"],
|
|
"extracted_count": len(self.results),
|
|
"results": self.results
|
|
}
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
|
print(f"Extraction results saved to: {output_path}")
|
|
|
|
if __name__ == "__main__":
|
|
# This block is replaced by specific extractor scripts
|
|
pass
|
|
```
|
|
|
|
### 1.3 `extractors/transmitter_extractor.py` (Example of Specialized Extractor)
|
|
```python
|
|
import sys
|
|
from base_extractor import BaseExtractor
|
|
|
|
class TransmitterExtractor(BaseExtractor):
|
|
def run(self):
|
|
# Pattern: (FIT|FT|LT|PT|TE) - 123
|
|
pattern = r"(FIT|FT|LT|PT|TE)\s?-\s?\d+"
|
|
self.apply_regex_pattern(pattern)
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python transmitter_extractor.py <input_json_path>")
|
|
else:
|
|
input_path = sys.argv[1]
|
|
output_path = input_path.replace(".json", "_transmitter.json")
|
|
extractor = TransmitterExtractor(input_path)
|
|
if extractor.load_input_json():
|
|
extractor.run()
|
|
extractor.save_output_json(output_path)
|
|
```
|
|
|
|
### 1.4 `extraction_orchestrator.py`
|
|
```python
|
|
import subprocess
|
|
import json
|
|
import os
|
|
import glob
|
|
|
|
class ExtractionOrchestrator:
|
|
"""
|
|
서브 프로세스들을 병렬로 실행하고 결과를 통합합니다.
|
|
"""
|
|
def __init__(self, extractor_scripts):
|
|
self.extractor_scripts = extractor_scripts
|
|
self.processes = []
|
|
|
|
def run_parallel_extractors(self, input_json_path):
|
|
print(f"Starting parallel extraction for: {input_json_path}")
|
|
for script in self.extractor_scripts:
|
|
# 각 스크립트를 별도 프로세스로 실행
|
|
p = subprocess.Popen(['python', script, input_json_path])
|
|
self.processes.append(p)
|
|
|
|
for p in self.processes:
|
|
p.wait()
|
|
print("All extraction processes completed.")
|
|
|
|
def aggregate_results(self, input_json_path, output_master_path):
|
|
master_results = []
|
|
# 모든 _*.json 파일들을 찾아 병합
|
|
pattern = os.path.dirname(input_json_path) + "/*_*.json"
|
|
result_files = glob.glob(pattern)
|
|
|
|
for file_path in result_files:
|
|
if os.path.basename(file_path).startswith("intermediate"):
|
|
continue # 원본 중간 파일은 제외
|
|
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
res_data = json.load(f)
|
|
master_results.extend(res_data["results"])
|
|
except Exception as e:
|
|
print(
|
|
f"Error aggregating {file_path}: {e}"
|
|
)
|
|
|
|
final_output = {
|
|
"source_file": os.path.basename(input_json_path),
|
|
"total_extracted": len(master_results),
|
|
"results": master_results
|
|
}
|
|
|
|
with open(output_master_path, 'w', encoding='utf-8') as f:
|
|
json.dump(final_output, f, indent=2, ensure_ascii=False)
|
|
print(f"Master extraction result saved to: {output_master_path}")
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
scripts = [
|
|
'extractors/transmitter_extractor.py',
|
|
# 'extractors/valve_extractor.py', ...
|
|
]
|
|
orchestrator = ExtractionOrchestrator(scripts)
|
|
input_json = "path/to/intermediate.json"
|
|
orchestrator.run_parallel_extractors(input_json)
|
|
orchestrator.aggregate_results(input_json, "master_result.json")
|
|
```
|
|
|
|
### 1.5 `database_integrator.py`
|
|
```python
|
|
import json
|
|
import requests
|
|
|
|
class DatabaseIntegrator:
|
|
"""
|
|
병합된 결과를 .NET Backend API로 전송합니다.
|
|
"""
|
|
def __init__(self, api_url):
|
|
self.api_url = api_url
|
|
|
|
def send_to_backend(self, master_json_path):
|
|
try:
|
|
with open(master_json_append_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
response = requests.post(self.api_url, json=data)
|
|
if response.status_code == 200:
|
|
print("Successfully sent data to backend.")
|
|
else:
|
|
print(f"Failed to send data. Status: {response.status_code}, Error: {response.text}")
|
|
except Exception as e:
|
|
print(f"Error during integration: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
API_ENDPOINT = "http://localhost:5000/api/pid/extraction"
|
|
integrator = DatabaseIntegrator(API_ENDPOINT)
|
|
integrator.send_to_backend("master_result.json")
|
|
```
|
|
|
|
## 2. C# Backend Implementation
|
|
|
|
### 2.1 `PidExtractionController.cs`
|
|
```csharp
|
|
using Microsoft.AspNetCore.Mvc;
|
|
using ExperionCrawler.Core.Application.DTOs;
|
|
using ExperionCrawler.Core.Application.Services;
|
|
|
|
namespace ExperionCrawler.Web.Controllers
|
|
{
|
|
[ApiController]
|
|
[Route("api/[controller]")]
|
|
public class PidExtractionController : ControllerBase
|
|
{
|
|
private readonly IPidProcessingService _pidService;
|
|
|
|
public PidExtractionController(IPidProcessingService pidService)
|
|
{
|
|
_pidService = pidService;
|
|
}
|
|
|
|
[HttpPost("extraction")]
|
|
public async Task<IActionResult> PostExtractionResult([FromBody] ExtractionDto dto)
|
|
{
|
|
if (dto == null) return BadRequest("Invalid data.");
|
|
|
|
try
|
|
{
|
|
await _pidService.ProcessAndSave(dto);
|
|
return Ok(new { message = "Extraction data processed successfully." });
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
return StatusCode(500, $"Internal server error: {ex.Message}");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### 2.2 `PidProcessingService.cs`
|
|
```csharp
|
|
using ExperionCrawler.Core.Application.DTOs;
|
|
using ExperionCrawler.Core.Application.Interfaces;
|
|
using ExperionCrawler.Core.Domain.Entities;
|
|
|
|
namespace ExperionCrawler.Core.Application.Services
|
|
{
|
|
public class PidProcessingService : IPidProcessingService
|
|
{
|
|
private readonly IPidRepository _repository;
|
|
|
|
public PidProcessing
|
|
{
|
|
_repository = repository;
|
|
}
|
|
|
|
public async Task ProcessAndSave(ExtractionDto dto)
|
|
{
|
|
// 1. Validate DTO
|
|
if (string.IsNullOrEmpty(dto.SourceFile)) throw new ArgumentException("Source file name is required.");
|
|
|
|
// 2. Map DTO to Domain Entity
|
|
foreach (var item in dto.Results)
|
|
{
|
|
var equipment = new PidEquipment
|
|
{
|
|
TagName = item.Tag,
|
|
Layer = item.Layer,
|
|
Description = item.Content,
|
|
SourceFile = dto.SourceFile,
|
|
CreatedAt = DateTime.UtcNow
|
|
};
|
|
|
|
// 3. Save to Database
|
|
await _repository.SaveAsync(equipment);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### 2.3 `PidRepository.cs`
|
|
```csharp
|
|
using ExperionCrawler.Core.Application.Interfaces;
|
|
using ExperionCrawler.Core.Domain.Entities;
|
|
using ExperionCrawler.Infrastructure.Database;
|
|
|
|
namespace ExperionCrawler.Infrastructure.Repositories
|
|
{
|
|
public class PidRepository : IPidRepository
|
|
{
|
|
private readonly ExperionDbContext _context;
|
|
|
|
public PidRepository(ExperionDbContext context)
|
|
{
|
|
_context = context;
|
|
}
|
|
|
|
public async Task SaveAsync(PidEquipment entity)
|
|
{
|
|
await _context.PidEquipments.AddAsync(entity);
|
|
await _context.SaveChangesAsync();
|
|
}
|
|
}
|
|
}
|
|
```
|