opencode 로 바꾸고 작업전 커밋

2026-05-08 17:22:10 +09:00
parent 15c17522c8
commit e923aab43b
202 changed files with 1336027 additions and 115 deletions
--- a/.rooBackup/2026-05-05-152500/test_dxf_extract_pid3.py
+++ b/.rooBackup/2026-05-05-152500/test_dxf_extract_pid3.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""
+DXF 파일에서 P&ID 태그 추출 스크립트
+ezdxf + OpenAI API 사용 (청크 단위 3회 호출)
+"""
+
+import ezdxf
+import json
+import re
+from ezdxf.tools.text import plain_mtext
+from openai import OpenAI
+
+# DXF 파일 경로
+filepath = '/home/windpacer/projects/ExperionCrawler/src/Web/uploads/pid/p-9100.dxf'
+
+# DXF 파일 읽기
+doc = ezdxf.readfile(filepath)
+msp = doc.modelspace()
+
+# 텍스트 추출
+texts = []
+for entity in msp:
+    if entity.dxftype() == 'TEXT':
+        texts.append(entity.dxf.text)
+    elif entity.dxftype() == 'MTEXT':
+        try:
+            plain = plain_mtext(entity.dxf.text)
+            if plain.strip():
+                texts.append(plain)
+        except Exception:
+            pass
+
+text = '\n'.join(texts)
+print(f'총 텍스트: {len(text)}자')
+
+# OpenAI 클라이언트 생성 (타임아웃 1800초 = 30분)
+llm = OpenAI(
+    base_url='http://localhost:8000/v1',
+    api_key='dummy',
+    timeout=1800
+)
+
+# 청크별 프롬프트
+chunks = [
+    {
+        'name': 'System Tags',
+        'system': (
+            'You are a P&ID expert. Extract system tags only.\n'
+            'Return ONLY a JSON array.\n'
+            '\n'
+            'Instrument types to extract: LI, PI, TI, FIQ, FICQ, TICA, PICA, LICA, FIC, TIC, PIC, LIC\n'
+            'Format: [{"tagNo":"FICQ-10101","confidence":0.95},...]\n'
+        ),
+        'user': 'Extract ALL tags of LI, PI, TI, FIQ, FICQ, TICA, PICA, LICA, FIC, TIC, PIC, LIC from the text below:\n\n{text}'
+    }
+]
+
+# 결과 저장
+all_tags = []
+seen_tags = set()
+
+for chunk in chunks:
+    print(f'\n=== {chunk["name"]} ===')
+    
+    # 프롬프트 생성
+    system = chunk['system']
+    user = chunk['user'].format(text=text[:100000])
+    
+    # LLM 호출
+    resp = llm.chat.completions.create(
+        model='Qwen3.6-27B-FP8',
+        messages=[
+            {'role': 'system', 'content': system},
+            {'role': 'user', 'content': user},
+        ],
+        max_tokens=65536,
+        temperature=0.1,
+        extra_body={'chat_template_kwargs': {'enable_thinking': False}},
+    )
+    
+    raw = (resp.choices[0].message.content or '').strip()
+    finish = resp.choices[0].finish_reason
+    print(f'응답 길이: {len(raw)}자, finish_reason: {finish}')
+    
+    # 잘린 경우 복구
+    if finish == 'length' and not raw.rstrip().endswith(']'):
+        last = raw.rfind('}')
+        if last != -1:
+            raw = raw[:last+1] + ']'
+            print('잘린 JSON 복구')
+    
+    # 배열 추출
+    depth = 0
+    start = -1
+    best = ''
+    for i, c in enumerate(raw):
+        if c == '[':
+            if depth == 0:
+                start = i
+            depth += 1
+        elif c == ']':
+            depth -= 1
+            if depth == 0 and start >= 0:
+                cand = raw[start:i+1]
+                if len(cand) > len(best):
+                    best = cand
+    
+    # JSON 파싱 및 결과 저장
+    try:
+        data = json.loads(best) if best else []
+        print(f'추출된 태그 수: {len(data)}개')
+        
+        for item in data:
+            tag_no = item.get('tagNo')
+            if tag_no and tag_no not in seen_tags:
+                seen_tags.add(tag_no)
+                all_tags.append({
+                    'tagNo': tag_no,
+                    'confidence': item.get('confidence', 0.95)
+                })
+        
+        print(f'중복 제거 후 총 태그 수: {len(all_tags)}개')
+    except json.JSONDecodeError as e:
+        print(f'파싱 실패: {e}')
+        print(repr(best[-300:] if best else ''))
+
+# 최종 결과
+print(f'\n=== 최종 결과 ===')
+print(f'총 추출 태그 수: {len(all_tags)}개')
+print()
+for t in all_tags[:30]:
+    print(f'  {t["tagNo"]}')
+if len(all_tags) > 30:
+    print(f'  ... (총 {len(all_tags)}개)')
+
+# JSON 파일로 저장
+output_path = '/home/windpacer/projects/ExperionCrawler/src/Web/uploads/pid/p-9100_extracted3.json'
+with open(output_path, 'w', encoding='utf-8') as f:
+    json.dump(all_tags, f, indent=2, ensure_ascii=False)
+print(f'\n결과 저장: {output_path}')