import ezdxf import re import json from typing import List, Optional, Tuple, Union from pydantic import BaseModel, Field from shapely.geometry import box, Point # --- Data Models --- class BoundingBox(BaseModel): min_x: float min_y: float max_x: float max_y: float center: Tuple[float, float] class GeometricEntity(BaseModel): entity_id: str entity_type: str # TEXT, MTEXT, LINE, LWPOLYLINE, CIRCLE, ARC layer: str bbox: BoundingBox raw_value: Optional[str] = None clean_value: Optional[str] = None coordinates: List[Union[Tuple[float, float], List[float]]] = Field(default_factory=list) properties: dict = Field(default_factory=dict) # --- Extractor Implementation --- class PidGeometricExtractor: def __init__(self, file_path: str): try: self.doc = ezdxf.readfile(file_path) self.msp = self.doc.modelspace() except Exception as e: raise IOError(f"Failed to load DXF file: {e}") def clean_text(self, text: str) -> str: """ DXF 특수 제어 문자 및 MTEXT 포맷팅을 제거하여 정제된 텍스트 반환. """ if not text: return "" # 1. MTEXT 포맷팅 및 제어 문자 제거 (\P, \W, \L, \A, \C, \H, \S, \T 등) text = re.sub(r'\\([P|W|L|A|C|H|S|T])\d*;?', ' ', text) # 2. 중괄호 { } 제거 text = re.sub(r'[\{\}]', ' ', text) # 3. DXF 특수 제어 문자 제거 (%%U: Underline, %%O: Overline, %%S: Strikethrough, %%R: Registered) text = re.sub(r'%%[U|O|S|R]', ' ', text) # 4. 불필요한 특수 기호 및 반복되는 공백 정제 text = re.sub(r'\s+', ' ', text).strip() return text def get_bbox(self, entity) -> Optional[BoundingBox]: """ 엔티티 타입별로 동적인 Bounding Box를 계산하여 반환. """ try: if entity.dxftype() == 'TEXT': p = entity.dxf.insert h = entity.dxf.height # 텍스트 길이에 따른 대략적인 너비 계산 (글자수 * 높이 * 0.6) width = len(entity.dxf.text) * h * 0.6 return self._create_bbox(p.x, p.y, p.x + width, p.y + h) elif entity.dxftype() == 'MTEXT': p = entity.dxf.insert h = entity.dxf.char_height if hasattr(entity.dxf, 'char_height') else 2.5 w = entity.dxf.width if entity.dxf.width > 0 else len(entity.text) * h * 0.6 return self._create_bbox(p.x, p.y, p.x + w, p.y + h) elif entity.dxftype() == 'LINE': start = entity.dxf.start end = entity.dxf.end return self._create_bbox( min(start.x, end.x), min(start.y, end.y), max(start.x, end.x), max(start.y, end.y) ) elif entity.dxftype() == 'LWPOLYLINE': points = entity.get_points() if not points: return None xs = [p[0] for p in points] ys = [p[1] for p in points] return self._create_bbox(min(xs), min(ys), max(xs), max(ys)) elif entity.dxftype() in ('CIRCLE', 'ARC'): center = entity.dxf.center radius = entity.dxf.radius return self._create_bbox( center.x - radius, center.y - radius, center.x + radius, center.y + radius ) except Exception as e: print(f"Error calculating bbox for {entity.dxftype()} ({entity.dxf.handle}): {e}") return None def _create_bbox(self, min_x, min_y, max_x, max_y) -> BoundingBox: return BoundingBox( min_x=min_x, min_y=min_y, max_x=max_x, max_y=max_y, center=((min_x + max_x) / 2, (min_y + max_y) / 2) ) def extract_and_save(self, output_path: str): """ 기하학적 데이터를 추출하여 JSON 파일로 저장. """ results = [] for entity in self.msp: bbox_obj = self.get_bbox(entity) if not bbox_obj: continue raw_text = "" if entity.dxftype() == 'TEXT': raw_text = entity.dxf.text elif entity.dxftype() == 'MTEXT': raw_text = entity.text # 좌표 추출 (3D 좌표를 2D로 변환) coords = [] if hasattr(entity, 'get_points'): # ezdxf의 get_points()는 (x, y, z) 튜플 리스트를 반환함 coords = [(p[0], p[1]) for p in entity.get_points()] elif entity.dxftype() == 'LINE': coords = [(entity.dxf.start.x, entity.dxf.start.y), (entity.dxf.end.x, entity.dxf.end.y)] elif entity.dxftype() in ('CIRCLE', 'ARC'): coords = [(entity.dxf.center.x, entity.dxf.center.y)] entity_data = GeometricEntity( entity_id=entity.dxf.handle, entity_type=entity.dxftype(), layer=entity.dxf.layer, bbox=bbox_obj, raw_value=raw_text if raw_text else None, clean_value=self.clean_text(raw_text) if raw_text else None, coordinates=coords, properties={ "color": entity.dxf.color, "lineweight": entity.dxf.lineweight if hasattr(entity.dxf, 'lineweight') else None, } ) results.append(entity_data.model_dump()) with open(output_path, 'w', encoding='utf-8') as f: json.dump(results, f, ensure_ascii=False, indent=4) return output_path # --- Proximity Utilities --- def is_near(bbox_a: BoundingBox, bbox_b: BoundingBox, threshold=5.0) -> bool: """ 두 Bounding Box 간의 최단 거리가 임계값 이내인지 확인. shapely box를 사용하여 거리 계산. """ box_a = box(bbox_a.min_x, bbox_a.min_y, bbox_a.max_x, bbox_a.max_y) box_b = box(bbox_b.min_x, bbox_b.min_y, bbox_b.max_x, bbox_b.max_y) return box_a.distance(box_b) <= threshold def is_inside(point: Tuple[float, float], bbox: BoundingBox) -> bool: """ 특정 점이 Bounding Box 내부에 있는지 확인. """ return (bbox.min_x <= point[0] <= bbox.max_x) and (bbox.min_y <= point[1] <= bbox.max_y)