12 KiB
12 KiB
Actual Implementation: P&ID Parser (Distributed Processing)
This document contains the actual implementation of the P&ID Parser based on the design plan.
1. Python Implementation
1.1 dxf_preprocessor.py
import ezdxf
import json
from datetime import datetime
import os
class DXFPreprocessor:
"""
DXF 파일을 로드하여 핵심 엔티티를 추출하고 중간 JSON 포맷으로 저장합니다.
"""
def __init__(self):
self.entities = []
def load_and_parse(self, file_path):
try:
if not os.path.exists(file_path):
print(f"Error: File not found {file_path}")
return False
doc = ezdxf.readfile(file_path)
msp = doc.modelspace()
for entity in msp:
# 추출 대상 엔티티 타입 정의
if entity.dxftype() in ['TEXT', 'MTEXT', 'LINE', 'CIRCLE', 'LWPOLYLINE']:
data = {
"type": entity.dxftype(),
"layer": entity.dxf.layer,
"content": "",
"coordinates": {"x": 0.0, "y": 0.0, "z": 0.0},
"attributes": {"color": entity.dxf.color, "lineweight": entity.dxf.lineweight}
}
# 텍스트 내용 추출
if entity.dxftype() in ['TEXT', 'MTEXT']:
data["content"] = entity.dxf.text if entity.dxftype() == 'TEXT' else entity.text
# 좌표 정보 추출 (단순화)
try:
if entity.dxftype() == 'LINE':
data["coordinates"] = {"x": entity.dxf.start.x, "y": entity.dxf.start.y, "z": entity.dxf.start.z}
elif entity.dxftype() == 'CIRCLE':
data["coordinates"] = {"x": entity.dxf.center.x, "y": entity.dxf.center.y, "z": entity.dxf.center.z}
elif entity.dxftype() == 'LWPOLYLINE':
data["coordinates"] = {"x": entity.dxf.vertices[0].x, "y": entity.dxf.vertices[0].y, "z": 0.0}
except Exception:
pass # 좌표 추출 실패 시 기본값 유지
self.entities.append(data)
return True
except Exception as e:
print(f"Error parsing DXF: {e}")
return False
def generate_intermediate_json(self, output_path, filename):
data = {
"metadata": {
"filename": filename,
"timestamp": datetime.now().isoformat()
},
"entities": self.entities
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Intermediate JSON saved to: {output_path}")
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python dxf_preprocessor.py <input_dxf_path>")
else:
input_path = sys.argv[1]
output_path = input_path.replace(".dxf", "_intermediate.json")
preprocessor = DXFPreprocessor()
if preprocessor.load_and_parse(input_path):
preprocessor.generate_intermediate_json(output_path, os.path.basename(input_path))
1.2 extractors/base_extractor.py
import json
import re
import sys
import os
class BaseExtractor:
"""
모든 특화된 추출기(Specialized Extractors)의 기본 클래스입니다.
"""
def __init__(self, input_json_path):
self.input_json_path = input_json_path
self.data = None
self.results = []
def load_input_json(self):
try:
with open(self.input_json_path, 'r', encoding='utf-8') as f:
self.data = json.load(f)
return True
except Exception as e:
print(f"Error loading JSON: {e}")
return False
def apply_regex_pattern(self, pattern):
if not self.data:
return
regex = re.compile(pattern)
for entity in self.data.get("entities", []):
content = entity.get("content", "")
if content:
match = regex.search(content)
if match:
# 매칭된 정보를 결과 리스트에 추가
self.results.append({
"tag": match.group(0),
"type": entity["type"],
"layer": entity["layer"],
"content": content,
"coordinates": entity["coordinates"]
})
def save_output_json(self, output_path):
output_data = {
"source_file": self.data["metadata"]["filename"],
"extracted_count": len(self.results),
"results": self.results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=2, ensure_ascii=False)
print(f"Extraction results saved to: {output_path}")
if __name__ == "__main__":
# This block is replaced by specific extractor scripts
pass
1.3 extractors/transmitter_extractor.py (Example of Specialized Extractor)
import sys
from base_extractor import BaseExtractor
class TransmitterExtractor(BaseExtractor):
def run(self):
# Pattern: (FIT|FT|LT|PT|TE) - 123
pattern = r"(FIT|FT|LT|PT|TE)\s?-\s?\d+"
self.apply_regex_pattern(pattern)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python transmitter_extractor.py <input_json_path>")
else:
input_path = sys.argv[1]
output_path = input_path.replace(".json", "_transmitter.json")
extractor = TransmitterExtractor(input_path)
if extractor.load_input_json():
extractor.run()
extractor.save_output_json(output_path)
1.4 extraction_orchestrator.py
import subprocess
import json
import os
import glob
class ExtractionOrchestrator:
"""
서브 프로세스들을 병렬로 실행하고 결과를 통합합니다.
"""
def __init__(self, extractor_scripts):
self.extractor_scripts = extractor_scripts
self.processes = []
def run_parallel_extractors(self, input_json_path):
print(f"Starting parallel extraction for: {input_json_path}")
for script in self.extractor_scripts:
# 각 스크립트를 별도 프로세스로 실행
p = subprocess.Popen(['python', script, input_json_path])
self.processes.append(p)
for p in self.processes:
p.wait()
print("All extraction processes completed.")
def aggregate_results(self, input_json_path, output_master_path):
master_results = []
# 모든 _*.json 파일들을 찾아 병합
pattern = os.path.dirname(input_json_path) + "/*_*.json"
result_files = glob.glob(pattern)
for file_path in result_files:
if os.path.basename(file_path).startswith("intermediate"):
continue # 원본 중간 파일은 제외
try:
with open(file_path, 'r', encoding='utf-8') as f:
res_data = json.load(f)
master_results.extend(res_data["results"])
except Exception as e:
print(
f"Error aggregating {file_path}: {e}"
)
final_output = {
"source_file": os.path.basename(input_json_path),
"total_extracted": len(master_results),
"results": master_results
}
with open(output_master_path, 'w', encoding='utf-8') as f:
json.dump(final_output, f, indent=2, ensure_ascii=False)
print(f"Master extraction result saved to: {output_master_path}")
if __name__ == "__main__":
# Example usage
scripts = [
'extractors/transmitter_extractor.py',
# 'extractors/valve_extractor.py', ...
]
orchestrator = ExtractionOrchestrator(scripts)
input_json = "path/to/intermediate.json"
orchestrator.run_parallel_extractors(input_json)
orchestrator.aggregate_results(input_json, "master_result.json")
1.5 database_integrator.py
import json
import requests
class DatabaseIntegrator:
"""
병합된 결과를 .NET Backend API로 전송합니다.
"""
def __init__(self, api_url):
self.api_url = api_url
def send_to_backend(self, master_json_path):
try:
with open(master_json_append_path, 'r', encoding='utf-8') as f:
data = json.load(f)
response = requests.post(self.api_url, json=data)
if response.status_code == 200:
print("Successfully sent data to backend.")
else:
print(f"Failed to send data. Status: {response.status_code}, Error: {response.text}")
except Exception as e:
print(f"Error during integration: {e}")
if __name__ == "__main__":
API_ENDPOINT = "http://localhost:5000/api/pid/extraction"
integrator = DatabaseIntegrator(API_ENDPOINT)
integrator.send_to_backend("master_result.json")
2. C# Backend Implementation
2.1 PidExtractionController.cs
using Microsoft.AspNetCore.Mvc;
using ExperionCrawler.Core.Application.DTOs;
using ExperionCrawler.Core.Application.Services;
namespace ExperionCrawler.Web.Controllers
{
[ApiController]
[Route("api/[controller]")]
public class PidExtractionController : ControllerBase
{
private readonly IPidProcessingService _pidService;
public PidExtractionController(IPidProcessingService pidService)
{
_pidService = pidService;
}
[HttpPost("extraction")]
public async Task<IActionResult> PostExtractionResult([FromBody] ExtractionDto dto)
{
if (dto == null) return BadRequest("Invalid data.");
try
{
await _pidService.ProcessAndSave(dto);
return Ok(new { message = "Extraction data processed successfully." });
}
catch (Exception ex)
{
return StatusCode(500, $"Internal server error: {ex.Message}");
}
}
}
}
2.2 PidProcessingService.cs
using ExperionCrawler.Core.Application.DTOs;
using ExperionCrawler.Core.Application.Interfaces;
using ExperionCrawler.Core.Domain.Entities;
namespace ExperionCrawler.Core.Application.Services
{
public class PidProcessingService : IPidProcessingService
{
private readonly IPidRepository _repository;
public PidProcessing
{
_repository = repository;
}
public async Task ProcessAndSave(ExtractionDto dto)
{
// 1. Validate DTO
if (string.IsNullOrEmpty(dto.SourceFile)) throw new ArgumentException("Source file name is required.");
// 2. Map DTO to Domain Entity
foreach (var item in dto.Results)
{
var equipment = new PidEquipment
{
TagName = item.Tag,
Layer = item.Layer,
Description = item.Content,
SourceFile = dto.SourceFile,
CreatedAt = DateTime.UtcNow
};
// 3. Save to Database
await _repository.SaveAsync(equipment);
}
}
}
}
2.3 PidRepository.cs
using ExperionCrawler.Core.Application.Interfaces;
using ExperionCrawler.Core.Domain.Entities;
using ExperionCrawler.Infrastructure.Database;
namespace ExperionCrawler.Infrastructure.Repositories
{
public class PidRepository : IPidRepository
{
private readonly ExperionDbContext _context;
public PidRepository(ExperionDbContext context)
{
_context = context;
}
public async Task SaveAsync(PidEquipment entity)
{
await _context.PidEquipments.AddAsync(entity);
await _context.SaveChangesAsync();
}
}
}