using System.Text; using System.Text.Json; using System.Text.RegularExpressions; using ExperionCrawler.Core.Application.DTOs; using ExperionCrawler.Core.Application.Interfaces; using ExperionCrawler.Core.Domain.Entities; using ExperionCrawler.Infrastructure.Database; using ExperionCrawler.Infrastructure.Mcp; using Microsoft.EntityFrameworkCore; using netDxf; using UglyToad.PdfPig; namespace ExperionCrawler.Core.Application.Services; public class PidExtractorService : IPidExtractorService { private readonly McpClient _mcp; private readonly ExperionDbContext _dbContext; private readonly ILogger _logger; public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger logger) { _mcp = mcp; _dbContext = dbContext; _logger = logger; } public async Task ExtractFromFileAsync(string filePath, bool useImageMode = false) { await using var stream = File.OpenRead(filePath); return await ExtractFromStreamAsync(stream, Path.GetFileName(filePath), useImageMode); } public async Task ExtractFromStreamAsync(Stream stream, string fileName, bool useImageMode = false) { var ext = Path.GetExtension(fileName).ToLowerInvariant(); string text = ext switch { ".dxf" => ExtractDxfText(stream), ".pdf" => ExtractPdfText(stream), _ => throw new NotSupportedException($"지원 형식: .dxf .pdf (스캔본 이미지는 Vision 모드 필요)") }; if (string.IsNullOrWhiteSpace(text)) return new PidExtractionResult(0, 0, 0); // MCP → vLLM 태그 추출 var sourceType = ext.TrimStart('.'); var json = await _mcp.ExtractPidTagsAsync(text, sourceType); var extractedItems = ParseJson(json); if (extractedItems.Count == 0) { _logger.LogWarning("P&ID 추출 결과 0건 — 파일: {FileName}", fileName); return new PidExtractionResult(0, 0, 0); } // MCP → vLLM 태그 매핑 제안 var pidTagNos = extractedItems.Select(i => i.TagNo).Distinct().ToList(); var experionTagNames = await _dbContext.RealtimePoints.Select(r => r.TagName).ToListAsync(); var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames); var mappings = ParseMappingJson(mappingJson); // DB 저장 var dbItems = new List(); foreach (var item in extractedItems) { mappings.TryGetValue(item.TagNo, out var matched); var experionTag = matched != null ? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched) : await FindFallbackTagAsync(item.TagNo); dbItems.Add(new PidEquipment { TagNo = item.TagNo, EquipmentName = item.EquipmentName, InstrumentType = item.InstrumentType, LineNumber = item.LineNumber, PidDrawingNo = item.PidDrawingNo, Confidence = item.Confidence, ExperionTagId = experionTag?.Id, ExtractedAt = DateTime.UtcNow, UpdatedAt = DateTime.UtcNow }); } await _dbContext.PidEquipment.AddRangeAsync(dbItems); await _dbContext.SaveChangesAsync(); _logger.LogInformation("P&ID 추출 완료: {Total}건 저장 (파일: {FileName})", dbItems.Count, fileName); return new PidExtractionResult( TotalCount: dbItems.Count, ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7), LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5)); } private string ExtractDxfText(Stream stream) { var tmp = Path.GetTempFileName() + ".dxf"; try { using (var fs = File.Create(tmp)) stream.CopyTo(fs); var doc = DxfDocument.Load(tmp); var sb = new StringBuilder(); foreach (var txt in doc.Entities.Texts) sb.AppendLine(txt.Value); foreach (var mtxt in doc.Entities.MTexts) sb.AppendLine(mtxt.PlainText()); foreach (var blk in doc.Blocks) foreach (var attr in blk.AttributeDefinitions.Values) sb.AppendLine(attr.Value); var text = sb.ToString(); // P&ID 태그 관련 정보만 필터링하여 MCP 서버로 전달 return FilterDxfText(text); } finally { if (File.Exists(tmp)) File.Delete(tmp); } } /// /// DXF 텍스트에서 P&ID 태그 패턴에 해당하는 라인만 필터링 /// 불필요한 텍스트를 제거하여 MCP 서버 부하 감소 및 JSON 파싱 오류 방지 /// private string FilterDxfText(string text) { var lines = text.Split('\n'); var filteredLines = new List(); foreach (var line in lines) { var trimmed = line.Trim(); // P&ID 태그 패턴 포함 라인만 유지 // - 단일 글자 장비 태그 포함: P-10101, T-10100, E-10119, C-10111 // - 다중 글자 계측 태그: FCV-101, FICQ-6113, PSV-6203 // - 복합 태그: VG-6203-15A-F1A-n, CD-10513-40A if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*")) { filteredLines.Add(trimmed); } } return string.Join("\n", filteredLines); } private string ExtractPdfText(Stream stream) { using var pdf = PdfDocument.Open(stream); var sb = new StringBuilder(); foreach (var page in pdf.GetPages()) sb.AppendLine(page.Text); return sb.ToString(); } private List ParseJson(string json) { try { // MCP 서버 응답 형식: {"success": ..., "count": ..., "tags": [...]} // 또는 기존 형식: [...] using var doc = JsonDocument.Parse(json); var root = doc.RootElement; // "tags" 필드가 있으면 중첩 구조로 간주 if (root.TryGetProperty("tags", out var tagsElement)) { return JsonSerializer.Deserialize>(tagsElement.GetRawText(), new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? []; } // 루트가 배열이면 직접 파싱 if (root.ValueKind == JsonValueKind.Array) { return JsonSerializer.Deserialize>(json, new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? []; } _logger.LogWarning("P&ID JSON 파싱 실패: 'tags' 필드 또는 배열 형식 없음"); return []; } catch (Exception ex) { _logger.LogWarning("P&ID JSON 파싱 실패: {Msg} / raw: {Raw}", ex.Message, json[..Math.Min(200, json.Length)]); return []; } } private Dictionary ParseMappingJson(string json) { try { using var doc = JsonDocument.Parse(json); var root = doc.RootElement; // MCP 응답: {"success": ..., "count": ..., "mappings": [...]} JsonElement arrayEl = root.ValueKind == JsonValueKind.Array ? root : root.TryGetProperty("mappings", out var m) ? m : default; if (arrayEl.ValueKind != JsonValueKind.Array) return []; var list = JsonSerializer.Deserialize>(arrayEl.GetRawText(), new JsonSerializerOptions { PropertyNameCaseInsensitive = true }) ?? []; return list .Where(m => m.Confidence >= 0.7 && !string.IsNullOrEmpty(m.ExperionTag)) .ToDictionary(m => m.PidTag, m => m.ExperionTag!); } catch { return []; } } private async Task FindFallbackTagAsync(string tagNo) { var normalized = tagNo.Split('.')[0]; return await _dbContext.RealtimePoints .FirstOrDefaultAsync(t => t.TagName == normalized || t.TagName.StartsWith(normalized + ".")); } public async Task<(int Total, IEnumerable Items)> GetEquipmentAsync( string? tagNo, int page, int pageSize) { var q = _dbContext.PidEquipment.AsQueryable(); if (!string.IsNullOrEmpty(tagNo)) q = q.Where(e => e.TagNo.Contains(tagNo)); var total = await q.CountAsync(); var items = await q.OrderByDescending(e => e.ExtractedAt) .Skip((page - 1) * pageSize).Take(pageSize).ToListAsync(); return (total, items); } public async Task GetByIdAsync(long id) => await _dbContext.PidEquipment.Include(e => e.ExperionTag).FirstOrDefaultAsync(e => e.Id == id); public async Task UpdateConfidenceAsync(long id, double confidence) { var e = await _dbContext.PidEquipment.FindAsync(id); if (e == null) return; e.Confidence = confidence; e.UpdatedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(); } public async Task ActivateAsync(long id) { var e = await _dbContext.PidEquipment.FindAsync(id); if (e == null) return; e.IsActive = true; e.UpdatedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(); } public async Task DeactivateAsync(long id) { var e = await _dbContext.PidEquipment.FindAsync(id); if (e == null) return; e.IsActive = false; e.UpdatedAt = DateTime.UtcNow; await _dbContext.SaveChangesAsync(); } public Task GetTotalCountAsync() => _dbContext.PidEquipment.CountAsync(); public Task GetConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence >= 0.7); public Task GetLowConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence < 0.5); public Task GetDrawingCountAsync() => _dbContext.PidEquipment.Select(e => e.PidDrawingNo).Distinct().CountAsync(); public async Task> GetConfidenceDistributionAsync() { var items = await _dbContext.PidEquipment.ToListAsync(); return new Dictionary { ["High (>=0.7)"] = items.Count(i => i.Confidence >= 0.7), ["Medium (0.5-0.7)"] = items.Count(i => i.Confidence >= 0.5 && i.Confidence < 0.7), ["Low (<0.5)"] = items.Count(i => i.Confidence < 0.5) }; } public Task ExportToCsvAsync(IEnumerable items) { var sb = new StringBuilder(); sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId"); foreach (var i in items) sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId}"); return Task.FromResult(sb.ToString()); } private static string Csv(string? v) { if (string.IsNullOrEmpty(v)) return ""; return (v.Contains(',') || v.Contains('"') || v.Contains('\n')) ? $"\"{v.Replace("\"", "\"\"")}\"" : v; } public Task ExportToExcelAsync(IEnumerable items) { using var package = new OfficeOpenXml.ExcelPackage(); var worksheet = package.Workbook.Worksheets.Add("P&ID Equipment"); // 헤더 worksheet.Cells[1, 1].Value = "태그번호"; worksheet.Cells[1, 2].Value = "장비명"; worksheet.Cells[1, 3].Value = "계기유형"; worksheet.Cells[1, 4].Value = "라인번호"; worksheet.Cells[1, 5].Value = "도면번호"; worksheet.Cells[1, 6].Value = "신뢰도"; worksheet.Cells[1, 7].Value = "상태"; worksheet.Cells[1, 8].Value = "추출일시"; worksheet.Cells[1, 9].Value = "Experion 태그"; int row = 2; foreach (var item in items) { worksheet.Cells[row, 1].Value = item.TagNo; worksheet.Cells[row, 2].Value = item.EquipmentName ?? ""; worksheet.Cells[row, 3].Value = item.InstrumentType ?? ""; worksheet.Cells[row, 4].Value = item.LineNumber ?? ""; worksheet.Cells[row, 5].Value = item.PidDrawingNo ?? ""; worksheet.Cells[row, 6].Value = item.Confidence; worksheet.Cells[row, 7].Value = item.IsActive ? "활성" : "비활성"; worksheet.Cells[row, 8].Value = item.ExtractedAt; worksheet.Cells[row, 9].Value = item.ExperionTag?.TagName ?? ""; row++; } return Task.FromResult(package.GetAsByteArray()); } } // ── 내부 파싱용 모델 ────────────────────────────────────────────────────────── public class ExtractedItem { public string TagNo { get; set; } = ""; public string? EquipmentName { get; set; } public string? InstrumentType { get; set; } public string? LineNumber { get; set; } public string? PidDrawingNo { get; set; } public double Confidence { get; set; } = 0.5; } public class MappingItem { public string PidTag { get; set; } = ""; public string? ExperionTag { get; set; } public double Confidence { get; set; } }