190 lines
7.2 KiB
Python
190 lines
7.2 KiB
Python
import ezdxf
|
|
import re
|
|
import json
|
|
import logging
|
|
from typing import List, Optional, Tuple, Union
|
|
from pydantic import BaseModel, Field
|
|
from shapely.geometry import box, Point
|
|
|
|
# 로깅 설정
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# --- Data Models ---
|
|
|
|
class BoundingBox(BaseModel):
|
|
min_x: float
|
|
min_y: float
|
|
max_x: float
|
|
max_y: float
|
|
center: Tuple[float, float]
|
|
|
|
class GeometricEntity(BaseModel):
|
|
entity_id: str
|
|
entity_type: str # TEXT, MTEXT, LINE, LWPOLYLINE, CIRCLE, ARC
|
|
layer: str
|
|
bbox: BoundingBox
|
|
raw_value: Optional[str] = None
|
|
clean_value: Optional[str] = None
|
|
coordinates: List[Union[Tuple[float, float], List[float]]] = Field(default_factory=list)
|
|
properties: dict = Field(default_factory=dict)
|
|
|
|
# --- Extractor Implementation ---
|
|
|
|
class PidGeometricExtractor:
|
|
def __init__(self, file_path: str):
|
|
try:
|
|
self.doc = ezdxf.readfile(file_path)
|
|
self.msp = self.doc.modelspace()
|
|
except Exception as e:
|
|
raise IOError(f"Failed to load DXF file: {e}")
|
|
|
|
def clean_text(self, text: str) -> str:
|
|
"""
|
|
DXF 특수 제어 문자 및 MTEXT 포맷팅을 제거하여 정제된 텍스트 반환.
|
|
"""
|
|
if not text:
|
|
return ""
|
|
|
|
# 1. MTEXT 포맷팅 및 제어 문자 제거 (\P, \W, \L, \A, \C, \H, \S, \T 등)
|
|
text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text)
|
|
|
|
# 2. 중괄호 { } 제거
|
|
text = re.sub(r'[\{\}]', ' ', text)
|
|
|
|
# 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered)
|
|
text = re.sub(r'%%[U|O|S|R]', ' ', text)
|
|
|
|
# 4. 불필요한 특수 기호 및 반복되는 공백 정제
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
|
return text
|
|
|
|
def get_bbox(self, entity) -> Optional[BoundingBox]:
|
|
"""
|
|
엔티티 타입별로 동적인 Bounding Box를 계산하여 반환.
|
|
"""
|
|
try:
|
|
if entity.dxftype() == 'TEXT':
|
|
p = entity.dxf.insert
|
|
h = entity.dxf.height
|
|
# 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6)
|
|
width = len(entity.dxf.text) * h * 0.6
|
|
return self._create_bbox(p.x, p.y, p.x + width, p.y + h)
|
|
|
|
elif entity.dxftype() == 'MTEXT':
|
|
p = entity.dxf.insert
|
|
h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5
|
|
w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6
|
|
return self._create_bbox(p.x, p.y, p.x + w, p.y + h)
|
|
|
|
elif entity.dxftype() == 'LINE':
|
|
start = entity.dxf.start
|
|
end = entity.dxf.end
|
|
return self._create_bbox(
|
|
min(start.x, end.x), min(start.y, end.y),
|
|
max(start.x, end.x), max(start.y, end.y)
|
|
)
|
|
|
|
elif entity.dxftype() == 'LWPOLYLINE':
|
|
points = entity.get_points()
|
|
if not points: return None
|
|
xs = [p[0] for p in points]
|
|
ys = [p[1] for p in points]
|
|
return self._create_bbox(min(xs), min(ys), max(xs), max(ys))
|
|
|
|
elif entity.dxftype() in ('CIRCLE', 'ARC'):
|
|
center = entity.dxf.center
|
|
radius = entity.dxf.radius
|
|
return self._create_bbox(
|
|
center.x - radius, center.y - radius,
|
|
center.x + radius, center.y + radius
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error calculating bbox for {entity.dxftype()} ({entity.dxf.handle}): {e}", exc_info=True)
|
|
return None
|
|
|
|
def _create_bbox(self, min_x, min_y, max_x, max_y) -> BoundingBox:
|
|
return BoundingBox(
|
|
min_x=min_x,
|
|
min_y=min_y,
|
|
max_x=max_x,
|
|
max_y=max_y,
|
|
center=((min_x + max_x) / 2, (min_y + max_y) / 2)
|
|
)
|
|
|
|
def extract_and_save(self, output_path: str):
|
|
"""
|
|
기하학적 데이터를 추출하여 JSON 파일로 저장.
|
|
"""
|
|
results = []
|
|
logger.info(f"Starting DXF extraction from {self.doc.filename if hasattr(self.doc, 'filename') else 'unknown file'}")
|
|
|
|
for entity in self.msp:
|
|
try:
|
|
bbox_obj = self.get_bbox(entity)
|
|
if not bbox_obj:
|
|
continue
|
|
|
|
raw_text = ""
|
|
if entity.dxftype() == 'TEXT':
|
|
raw_text = entity.dxf.text
|
|
elif entity.dxftype() == 'MTEXT':
|
|
raw_text = entity.text
|
|
|
|
# 좌표 추출 (3D 좌표를 2D로 변환)
|
|
coords = []
|
|
if hasattr(entity, 'get_points'):
|
|
# ezdxf의 get_points()는 (x, y, z) 튜플 리스트를 반환함
|
|
coords = [(p[0], p[1]) for p in entity.get_points()]
|
|
elif entity.dxftype() == 'LINE':
|
|
coords = [(entity.dxf.start.x, entity.dxf.start.y), (entity.dxf.end.x, entity.dxf.end.y)]
|
|
elif entity.dxftype() in ('CIRCLE', 'ARC'):
|
|
coords = [(entity.dxf.center.x, entity.dxf.center.y)]
|
|
|
|
entity_data = GeometricEntity(
|
|
entity_id=entity.dxf.handle,
|
|
entity_type=entity.dxftype(),
|
|
layer=entity.dxf.layer,
|
|
bbox=bbox_obj,
|
|
raw_value=raw_text if raw_text else None,
|
|
clean_value=self.clean_text(raw_text) if raw_text else None,
|
|
coordinates=coords,
|
|
properties={
|
|
"color": entity.dxf.color,
|
|
"lineweight": entity.dxf.lineweight if hasattr(entity.dxf, 'lineweight') else None,
|
|
}
|
|
)
|
|
results.append(entity_data.model_dump())
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error processing entity {entity.dxftype()} ({entity.dxf.handle}): {e}")
|
|
continue
|
|
|
|
try:
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(results, f, ensure_ascii=False, indent=4)
|
|
logger.info(f"Successfully saved {len(results)} entities to {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to save extraction results to {output_path}: {e}")
|
|
raise
|
|
|
|
return output_path
|
|
|
|
# --- Proximity Utilities ---
|
|
|
|
def is_near(bbox_a: BoundingBox, bbox_b: BoundingBox, threshold=5.0) -> bool:
|
|
"""
|
|
두 Bounding Box 간의 최단 거리가 임계값 이내인지 확인.
|
|
shapely box를 사용하여 거리 계산.
|
|
"""
|
|
box_a = box(bbox_a.min_x, bbox_a.min_y, bbox_a.max_x, bbox_a.max_y)
|
|
box_b = box(bbox_b.min_x, bbox_b.min_y, bbox_b.max_x, bbox_b.max_y)
|
|
return box_a.distance(box_b) <= threshold
|
|
|
|
def is_inside(point: Tuple[float, float], bbox: BoundingBox) -> bool:
|
|
"""
|
|
특정 점이 Bounding Box 내부에 있는지 확인.
|
|
"""
|
|
return (bbox.min_x <= point[0] <= bbox.max_x) and (bbox.min_y <= point[1] <= bbox.max_y)
|