ExperionCrawler/dxf-graph/test_drawing_split.py

#!/usr/bin/env python3
"""DXF 도면 분할 테스트 스크립트

목표: DXF의 X/Y 축 엔티티 밀도 분포를 분석하여
      sparse region(엔티티가 거의 없는 구간)을 감지하고,
      이를 도면 경계로 사용하여 도면을 분할한다.

사용법:
    python test_drawing_split.py [DXF_FILE_PATH]
    (기본값: src/Web/uploads/pid/No-10_Plant_PID.dxf)
"""

import sys
import os
import ezdxf
from typing import List, Tuple


def load_dxf(filepath: str):
    """DXF 파일 로드."""
    print(f"DXF 로드: {filepath}")
    doc = ezdxf.readfile(filepath)
    msp = doc.modelspace()
    total = sum(1 for _ in msp)
    print(f"총 엔티티 수: {total}")
    return doc, msp


def collect_centers(msp) -> List[Tuple[float, float]]:
    """각 엔티티의 중심 좌표 수집."""
    centers = []
    for entity in msp:
        try:
            if hasattr(entity.dxf, 'insert'):
                centers.append((entity.dxf.insert.x, entity.dxf.insert.y))
            elif hasattr(entity.dxf, 'start'):
                cx = (entity.dxf.start.x + entity.dxf.end.x) / 2
                cy = (entity.dxf.start.y + entity.dxf.end.y) / 2
                centers.append((cx, cy))
            elif hasattr(entity.dxf, 'center'):
                centers.append((entity.dxf.center.x, entity.dxf.center.y))
        except Exception:
            pass
    return centers


def compute_density_histogram(
    centers: List[Tuple[float, float]],
    axis: str,
    bucket_size: float = 200.0
) -> dict:
    """
    지정된 축(X 또는 Y)에 대해 밀도 히스토그램 계산.
    bucket_size 단위로 버킷을 만들고 각 버킷의 엔티티 수를 반환.
    """
    if axis == 'x':
        coords = [c[0] for c in centers]
    else:
        coords = [c[1] for c in centers]

    if not coords:
        return {}

    min_val = min(coords)
    max_val = max(coords)

    buckets = {}
    for coord in coords:
        bucket = int(coord / bucket_size) * bucket_size
        buckets[bucket] = buckets.get(bucket, 0) + 1

    return dict(sorted(buckets.items()))


def print_histogram(buckets: dict, title: str, scale: float = 10.0):
    """히스토그램을 콘솔에 출력."""
    print(f"\n=== {title} ===")
    if not buckets:
        print("  (데이터 없음)")
        return

    max_count = max(buckets.values())
    for key in sorted(buckets.keys()):
        count = buckets[key]
        bar_len = min(int(count / scale), 80)
        bar = '█' * bar_len
        print(f"  {key:8.0f}: {bar} ({count})")
    print(f"  (최대: {max_count}, 스케일: 1글자 = {scale:.0f}개)")


def find_sparse_regions(
    buckets: dict,
    bucket_size: float,
    threshold_ratio: float = 0.15,
    min_sparse_width: float = None
) -> List[Tuple[float, float]]:
    """
    밀도 히스토그램에서 sparse region 감지.

    Args:
        buckets: {bucket_start: count} 딕셔너리
        bucket_size: 버킷 크기
        threshold_ratio: 전체 평균 밀도의 몇 % 이하를 sparse로 간주할지
        min_sparse_width: sparse region 최소 너비 (기본: bucket_size * 1.5)

    Returns:
        sparse region의 (시작, 종료) 좌표 목록
    """
    if not buckets:
        return []

    if min_sparse_width is None:
        min_sparse_width = bucket_size * 1.5

    counts = list(buckets.values())
    avg_count = sum(counts) / len(counts)
    threshold = avg_count * threshold_ratio

    sorted_keys = sorted(buckets.keys())
    sparse_regions = []
    in_sparse = False
    sparse_start = 0

    for i, key in enumerate(sorted_keys):
        is_sparse = buckets[key] < threshold

        if is_sparse and not in_sparse:
            sparse_start = key
            in_sparse = True
        elif not is_sparse and in_sparse:
            sparse_end = key
            if (sparse_end - sparse_start) >= min_sparse_width:
                sparse_regions.append((sparse_start, sparse_end))
            in_sparse = False

    # 마지막이 sparse인 경우
    if in_sparse and len(sorted_keys) > 0:
        sparse_end = sorted_keys[-1] + bucket_size
        if (sparse_end - sparse_start) >= min_sparse_width:
            sparse_regions.append((sparse_start, sparse_end))

    return sparse_regions


def find_gaps_in_buckets(
    buckets: dict,
    bucket_size: float,
    min_gap_buckets: int = 1
) -> List[Tuple[float, float]]:
    """
    버킷 간 간격 감지 (데이터가 전혀 없는 구간).
    연속된 버킷 키 사이에 빈 버킷이 있는 경우를 감지.

    Args:
        buckets: {bucket_start: count} 딕셔너리
        bucket_size: 버킷 크기
        min_gap_buckets: 최소 빈 버킷 수 (이 이상이어야 gap으로 인정)

    Returns:
        gap region의 (시작, 종료) 좌표 목록
    """
    if not buckets:
        return []

    sorted_keys = sorted(buckets.keys())
    gaps = []

    for i in range(len(sorted_keys) - 1):
        current = sorted_keys[i]
        next_key = sorted_keys[i + 1]
        gap_size = next_key - current

        # 버킷 크기보다 큰 간격이 있으면 빈 구간
        if gap_size > bucket_size * (min_gap_buckets + 1):
            gaps.append((current, next_key))

    return gaps


def compute_drawing_regions(
    centers: List[Tuple[float, float]],
    x_sparse: List[Tuple[float, float]],
    y_sparse: List[Tuple[float, float]],
    x_range: Tuple[float, float],
    y_range: Tuple[float, float]
) -> List[dict]:
    """
    sparse region을 기반으로 도면 영역 계산.
    X와 Y sparse를 교차하여 2D 영역을 생성.

    sparse region이 없는 축은 전체 범위를 하나의 구간으로 처리.
    """
    # X 축 분할점 생성
    x_boundaries = [x_range[0]]
    for start, end in x_sparse:
        mid = (start + end) / 2
        if mid not in x_boundaries:
            x_boundaries.append(mid)
    x_boundaries.append(x_range[1])
    x_boundaries = sorted(set(x_boundaries))

    # Y 축 분할점 생성
    y_boundaries = [y_range[0]]
    for start, end in y_sparse:
        mid = (start + end) / 2
        if mid not in y_boundaries:
            y_boundaries.append(mid)
    y_boundaries.append(y_range[1])
    y_boundaries = sorted(set(y_boundaries))

    # 2D 영역 생성
    regions = []
    region_no = 1
    for i in range(len(x_boundaries) - 1):
        for j in range(len(y_boundaries) - 1):
            x_min = x_boundaries[i]
            x_max = x_boundaries[i + 1]
            y_min = y_boundaries[j]
            y_max = y_boundaries[j + 1]

            # 해당 영역에 엔티티가 실제로 있는지 확인
            count = sum(
                1 for cx, cy in centers
                if x_min <= cx < x_max and y_min <= cy < y_max
            )

            if count > 0:
                regions.append({
                    'drawing_no': region_no,
                    'x_min': x_min,
                    'x_max': x_max,
                    'y_min': y_min,
                    'y_max': y_max,
                    'entity_count': count,
                    'width': x_max - x_min,
                    'height': y_max - y_min,
                })
                region_no += 1

    return regions


def main():
    # DXF 파일 경로
    if len(sys.argv) > 1:
        filepath = sys.argv[1]
    else:
        filepath = 'src/Web/uploads/pid/No-10_Plant_PID.dxf'

    if not os.path.exists(filepath):
        print(f"파일을 찾을 수 없습니다: {filepath}")
        sys.exit(1)

    # 1. DXF 로드
    doc, msp = load_dxf(filepath)

    # 2. 중심 좌표 수집
    centers = collect_centers(msp)
    print(f"수집된 중심 좌표: {len(centers)}개")

    if not centers:
        print("오류: 중심 좌표를 수집할 수 없습니다.")
        sys.exit(1)

    # 3. 전체 범위 계산
    xs = [c[0] for c in centers]
    ys = [c[1] for c in centers]
    x_range = (min(xs), max(xs))
    y_range = (min(ys), max(ys))

    print(f"\n전체 X 범위: {x_range[0]:.1f} ~ {x_range[1]:.1f} (너비 {x_range[1]-x_range[0]:.1f})")
    print(f"전체 Y 범위: {y_range[0]:.1f} ~ {y_range[1]:.1f} (높이 {y_range[1]-y_range[0]:.1f})")

    # 4. 밀도 히스토그램 계산
    bucket_size = 200.0
    x_buckets = compute_density_histogram(centers, 'x', bucket_size)
    y_buckets = compute_density_histogram(centers, 'y', bucket_size)

    print_histogram(x_buckets, f'X 축 밀도 (버킷={bucket_size:.0f})', scale=50.0)
    print_histogram(y_buckets, f'Y 축 밀도 (버킷={bucket_size:.0f})', scale=50.0)

    # 5. sparse region 감지 (밀도 기반)
    threshold_ratio = 0.15
    x_sparse_density = find_sparse_regions(x_buckets, bucket_size, threshold_ratio)
    y_sparse_density = find_sparse_regions(y_buckets, bucket_size, threshold_ratio)

    # 6. 버킷 간 gap 감지 (데이터가 전혀 없는 구간)
    x_gaps = find_gaps_in_buckets(x_buckets, bucket_size)
    y_gaps = find_gaps_in_buckets(y_buckets, bucket_size)

    # 7. sparse region + gap 합치기
    x_sparse = sorted(set(x_sparse_density + x_gaps))
    y_sparse = sorted(set(y_sparse_density + y_gaps))

    print(f"\n=== Sparse Region 감지 (밀도 임계값: 평균의 {threshold_ratio*100:.0f}%) ===")
    print(f"X 축 sparse region (밀도): {len(x_sparse_density)}개")
    for start, end in x_sparse_density:
        print(f"  X: {start:.0f} ~ {end:.0f} (너비 {end-start:.0f})")

    print(f"Y 축 sparse region (밀도): {len(y_sparse_density)}개")
    for start, end in y_sparse_density:
        print(f"  Y: {start:.0f} ~ {end:.0f} (너비 {end-start:.0f})")

    print(f"\n=== 버킷 Gap 감지 ===")
    print(f"X 축 gap: {len(x_gaps)}개")
    for start, end in x_gaps:
        print(f"  X: {start:.0f} ~ {end:.0f} (너비 {end-start:.0f})")

    print(f"Y 축 gap: {len(y_gaps)}개")
    for start, end in y_gaps:
        print(f"  Y: {start:.0f} ~ {end:.0f} (너비 {end-start:.0f})")

    print(f"\n=== 합산 분할 기준 ===")
    print(f"X 축 분할: {len(x_sparse)}개 sparse/gap")
    print(f"Y 축 분할: {len(y_sparse)}개 sparse/gap")

    # 8. 도면 영역 계산
    regions = compute_drawing_regions(centers, x_sparse, y_sparse, x_range, y_range)

    print(f"\n=== 도면 분할 결과: {len(regions)}개 영역 ===")
    for r in regions:
        print(f"  도면 #{r['drawing_no']}: "
              f"X={r['x_min']:.0f}~{r['x_max']:.0f}, "
              f"Y={r['y_min']:.0f}~{r['y_max']:.0f}, "
              f"엔티티={r['entity_count']}")

    # 9. 검증: 전체 엔티티 수와 일치하는지
    total_region_entities = sum(r['entity_count'] for r in regions)
    print(f"\n검증: 도면별 엔티티 합계 = {total_region_entities} / 전체 = {len(centers)}")

    if total_region_entities == len(centers):
        print("✅ 모든 엔티티가 도면 영역에 할당됨")
    else:
        print(f"⚠️ {len(centers) - total_region_entities}개 엔티티가 미할당됨")

    return regions


if __name__ == "__main__":
    main()