fix: P&ID 배관번호 분류 오류 수정 (power_equipment → pipings)

- _PID_LINENO_FULL_RE: 7필드 고정 regex → 5~7필드 통합 (9차 P-9107-25A-F-n 등 미매칭 수정)
- _extract_pid_dxf_fast: 레이어 이름 하드코딩 제거 → FULL_RE 매칭 우선, LINENO 계열 레이어 힌트 보조
- MatchCategoryAsync: 배관번호 regex(_pipeLineNoRe) 체크를 prefix 룰보다 먼저 실행 → P-9117-20A-F-n 등이 power_equipment로 오분류되던 문제 수정
- pump extractor 프롬프트: 배관번호 SKIP/INCLUDE 예시 추가
- DB 기존 레코드 435건 pipings로 재분류 (직접 SQL)
- .claude/settings.json: LLM 모델명 하드코딩 제거

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
windpacer
2026-05-17 10:36:30 +09:00
parent 0ccec38c18
commit 960bda4a3c
4 changed files with 885 additions and 309 deletions

View File

@@ -6,7 +6,7 @@
"/home/windpacer/projects/ExperionCrawler/mcp-server/server.py" "/home/windpacer/projects/ExperionCrawler/mcp-server/server.py"
], ],
"env": {}, "env": {},
"description": "ExperionCrawler RAG — Qdrant(코드베이스+OPC UA 문서) + GLM-4.7-Flash" "description": "ExperionCrawler RAG — Qdrant(코드베이스+OPC UA 문서), 현재 LLM은 mcp-server/llm-model.json 참조"
} }
} }
} }

File diff suppressed because it is too large Load Diff

View File

@@ -59,13 +59,17 @@ Examples: PG-101, TG-201, LG-301, PG-10101, TG-10201
# 펌프: P-10101, VP-10117, DP-10101 등 # 펌프: P-10101, VP-10117, DP-10101 등
_PUMP_PROMPT = _PROMPT_HEADER + """ _PUMP_PROMPT = _PROMPT_HEADER + """
Extract ONLY pumps and compressors. Extract ONLY pumps and compressors (simple equipment tags, NO pipe size suffix).
Target equipment types: P (pump), VP (vertical pump), DP (dual pump), Target equipment types: P (pump), VP (vertical pump), DP (dual pump),
C (compressor), CP (centrifugal pump), BP (booster pump), C (compressor), CP (centrifugal pump), BP (booster pump), SP (sump pump),
and their variants. and their variants.
Examples: P-10101, VP-10117, DP-10101, C-10201, CP-10301, BP-10401 Examples (4~5 digit loop numbers): P-10101, VP-10117, DP-10101, C-10201, P-9101, P-9116, VP-9201
IMPORTANT: Do NOT extract pipeline/line numbers that have a pipe size suffix (e.g. 25A, 50A, 100A).
SKIP (pipeline, not a pump): P-10101-25A-F1A-n, P-9107-25A-F-n, CHR-9641-50A-F-C50
INCLUDE (pump tag): P-10101, VP-10117, P-9101
""" """
# 프롬프트 매핑 # 프롬프트 매핑

View File

@@ -17,6 +17,8 @@ public class PidExtractorService : IPidExtractorService
private readonly McpClient _mcp; private readonly McpClient _mcp;
private readonly ExperionDbContext _dbContext; private readonly ExperionDbContext _dbContext;
private readonly ILogger<PidExtractorService> _logger; private readonly ILogger<PidExtractorService> _logger;
private readonly SemaphoreSlim _cacheLock = new(1, 1);
private List<PidPrefixRule>? _cachedRules;
public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger<PidExtractorService> logger) public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger<PidExtractorService> logger)
{ {
@@ -62,15 +64,27 @@ public class PidExtractorService : IPidExtractorService
var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames); var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames);
var mappings = ParseMappingJson(mappingJson); var mappings = ParseMappingJson(mappingJson);
// 중복 체크: 기존 DB에 있는 TagNo는 제외 (대소문자 구분 없음)
var existingTagNos = new HashSet<string>(
await _dbContext.PidEquipment.Select(e => e.TagNo).ToListAsync(),
StringComparer.OrdinalIgnoreCase);
var newItems = extractedItems.Where(i => !existingTagNos.Contains(i.TagNo)).ToList();
int skippedCount = extractedItems.Count - newItems.Count;
if (skippedCount > 0)
_logger.LogInformation("P&ID 중복 제외: {Skipped}건 스킵 (이미 존재)", skippedCount);
// DB 저장 // DB 저장
var dbItems = new List<PidEquipment>(); var dbItems = new List<PidEquipment>();
foreach (var item in extractedItems) foreach (var item in newItems)
{ {
mappings.TryGetValue(item.TagNo, out var matched); mappings.TryGetValue(item.TagNo, out var matched);
var experionTag = matched != null var experionTag = matched != null
? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched) ? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched)
: await FindFallbackTagAsync(item.TagNo); : await FindFallbackTagAsync(item.TagNo);
var category = await MatchCategoryAsync(item.TagNo);
dbItems.Add(new PidEquipment dbItems.Add(new PidEquipment
{ {
TagNo = item.TagNo, TagNo = item.TagNo,
@@ -80,20 +94,27 @@ public class PidExtractorService : IPidExtractorService
PidDrawingNo = item.PidDrawingNo, PidDrawingNo = item.PidDrawingNo,
Confidence = item.Confidence, Confidence = item.Confidence,
ExperionTagId = experionTag?.Id, ExperionTagId = experionTag?.Id,
Category = category,
ExtractedAt = DateTime.UtcNow, ExtractedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow UpdatedAt = DateTime.UtcNow
}); });
} }
await _dbContext.PidEquipment.AddRangeAsync(dbItems); if (dbItems.Count > 0)
await _dbContext.SaveChangesAsync(); {
await _dbContext.PidEquipment.AddRangeAsync(dbItems);
await _dbContext.SaveChangesAsync();
}
_logger.LogInformation("P&ID 추출 완료: {Total}건 저장 (파일: {FileName})", dbItems.Count, fileName); _logger.LogInformation(
"P&ID 추출 완료: {Total}건 저장, {Skipped}건 중복 스킵 (파일: {FileName})",
dbItems.Count, skippedCount, fileName);
return new PidExtractionResult( return new PidExtractionResult(
TotalCount: dbItems.Count, TotalCount: dbItems.Count,
ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7), ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7),
LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5)); LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5),
SkippedDuplicates: skippedCount);
} }
private string ExtractDxfText(Stream stream) private string ExtractDxfText(Stream stream)
@@ -143,7 +164,7 @@ public class PidExtractorService : IPidExtractorService
// - 단일 글자 장비 태그 포함: P-10101, T-10100, E-10119, C-10111 // - 단일 글자 장비 태그 포함: P-10101, T-10100, E-10119, C-10111
// - 다중 글자 계측 태그: FCV-101, FICQ-6113, PSV-6203 // - 다중 글자 계측 태그: FCV-101, FICQ-6113, PSV-6203
// - 복합 태그: VG-6203-15A-F1A-n, CD-10513-40A // - 복합 태그: VG-6203-15A-F1A-n, CD-10513-40A
if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*")) if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*", RegexOptions.IgnoreCase))
{ {
filteredLines.Add(trimmed); filteredLines.Add(trimmed);
} }
@@ -265,6 +286,15 @@ public class PidExtractorService : IPidExtractorService
await _dbContext.SaveChangesAsync(); await _dbContext.SaveChangesAsync();
} }
public async Task<bool> DeleteAsync(long id)
{
var e = await _dbContext.PidEquipment.FindAsync(id);
if (e == null) return false;
_dbContext.PidEquipment.Remove(e);
await _dbContext.SaveChangesAsync();
return true;
}
public Task<int> GetTotalCountAsync() => _dbContext.PidEquipment.CountAsync(); public Task<int> GetTotalCountAsync() => _dbContext.PidEquipment.CountAsync();
public Task<int> GetConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence >= 0.7); public Task<int> GetConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence >= 0.7);
public Task<int> GetLowConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence < 0.5); public Task<int> GetLowConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence < 0.5);
@@ -281,13 +311,16 @@ public class PidExtractorService : IPidExtractorService
}; };
} }
public Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items) public async Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items)
{ {
var sb = new StringBuilder(); return await Task.Run(() =>
sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId"); {
foreach (var i in items) var sb = new StringBuilder();
sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId}"); sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId,Category,Role,From,To");
return Task.FromResult(sb.ToString()); foreach (var i in items)
sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId},{Csv(i.Category)},{Csv(i.Role)},{Csv(i.FromTag)},{Csv(i.ToTag)}");
return sb.ToString();
});
} }
private static string Csv(string? v) private static string Csv(string? v)
@@ -297,38 +330,216 @@ public class PidExtractorService : IPidExtractorService
? $"\"{v.Replace("\"", "\"\"")}\"" : v; ? $"\"{v.Replace("\"", "\"\"")}\"" : v;
} }
public Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items) public async Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items)
{ {
using var package = new OfficeOpenXml.ExcelPackage(); return await Task.Run(async () =>
var worksheet = package.Workbook.Worksheets.Add("P&ID Equipment");
// 헤더
worksheet.Cells[1, 1].Value = "태그번호";
worksheet.Cells[1, 2].Value = "장비명";
worksheet.Cells[1, 3].Value = "계기유형";
worksheet.Cells[1, 4].Value = "라인번호";
worksheet.Cells[1, 5].Value = "도면번호";
worksheet.Cells[1, 6].Value = "신뢰도";
worksheet.Cells[1, 7].Value = "상태";
worksheet.Cells[1, 8].Value = "추출일시";
worksheet.Cells[1, 9].Value = "Experion 태그";
int row = 2;
foreach (var item in items)
{ {
worksheet.Cells[row, 1].Value = item.TagNo; using var package = new OfficeOpenXml.ExcelPackage();
worksheet.Cells[row, 2].Value = item.EquipmentName ?? "";
worksheet.Cells[row, 3].Value = item.InstrumentType ?? "";
worksheet.Cells[row, 4].Value = item.LineNumber ?? "";
worksheet.Cells[row, 5].Value = item.PidDrawingNo ?? "";
worksheet.Cells[row, 6].Value = item.Confidence;
worksheet.Cells[row, 7].Value = item.IsActive ? "활성" : "비활성";
worksheet.Cells[row, 8].Value = item.ExtractedAt;
worksheet.Cells[row, 9].Value = item.ExperionTag?.TagName ?? "";
row++;
}
return Task.FromResult(package.GetAsByteArray()); var rules = await GetRulesCachedAsync();
var prefixToDesc = rules
.ToDictionary(r => r.Prefix.ToLowerInvariant(), r => r.Description ?? r.Prefix);
var grouped = items
.GroupBy(i => string.IsNullOrEmpty(i.Category) ? "__unmatched__" : i.Category!)
.ToDictionary(g => g.Key, g => g.ToList());
var sheetOrder = new[]
{
PidEquipment.CategoryInstrument,
PidEquipment.CategoryPowerEquipment,
PidEquipment.CategoryStorageEquipment,
PidEquipment.CategoryProcessEquipment,
PidEquipment.CategoryUtilityEquipment,
PidEquipment.CategoryPipings,
"__unmatched__"
};
var sheetNames = new Dictionary<string, string>
{
[PidEquipment.CategoryInstrument] = "Instrument",
[PidEquipment.CategoryPowerEquipment] = "Power Equipment",
[PidEquipment.CategoryStorageEquipment] = "Storage Equipment",
[PidEquipment.CategoryProcessEquipment] = "Process Equipment",
[PidEquipment.CategoryUtilityEquipment] = "Utility Equipment",
[PidEquipment.CategoryPipings] = "Pipings",
["__unmatched__"] = "Unmatched"
};
foreach (var cat in sheetOrder)
{
if (!grouped.TryGetValue(cat, out var groupItems) || groupItems.Count == 0)
continue;
var sheetName = sheetNames[cat];
var worksheet = package.Workbook.Worksheets.Add(sheetName);
worksheet.Cells[1, 1].Value = "태그번호";
worksheet.Cells[1, 2].Value = "장비명";
worksheet.Cells[1, 3].Value = "장비타입";
worksheet.Cells[1, 4].Value = "라인번호";
worksheet.Cells[1, 5].Value = "도면번호";
worksheet.Cells[1, 6].Value = "신뢰도";
worksheet.Cells[1, 7].Value = "상태";
worksheet.Cells[1, 8].Value = "추출일시";
worksheet.Cells[1, 9].Value = "Experion 태그";
worksheet.Cells[1, 10].Value = "카테고리";
worksheet.Cells[1, 11].Value = "Role";
worksheet.Cells[1, 12].Value = "From";
worksheet.Cells[1, 13].Value = "To";
using var headerRange = worksheet.Cells[1, 1, 1, 13];
headerRange.Style.Font.Bold = true;
headerRange.Style.Fill.PatternType = OfficeOpenXml.Style.ExcelFillStyle.Solid;
headerRange.Style.Fill.BackgroundColor.SetColor(System.Drawing.Color.LightGray);
int row = 2;
foreach (var item in groupItems)
{
worksheet.Cells[row, 1].Value = item.TagNo;
worksheet.Cells[row, 2].Value = item.EquipmentName ?? "";
worksheet.Cells[row, 3].Value = item.InstrumentType ?? "";
worksheet.Cells[row, 4].Value = item.LineNumber ?? "";
worksheet.Cells[row, 5].Value = item.PidDrawingNo ?? "";
worksheet.Cells[row, 6].Value = item.Confidence;
worksheet.Cells[row, 7].Value = item.IsActive ? "활성" : "비활성";
worksheet.Cells[row, 8].Value = item.ExtractedAt;
worksheet.Cells[row, 9].Value = item.ExperionTag?.TagName ?? "";
worksheet.Cells[row, 10].Value = item.Category ?? "";
worksheet.Cells[row, 11].Value = item.Role ?? "";
worksheet.Cells[row, 12].Value = item.FromTag ?? "";
worksheet.Cells[row, 13].Value = item.ToTag ?? "";
row++;
}
worksheet.Cells.AutoFitColumns();
}
return package.GetAsByteArray();
});
}
// ── Prefix Rule Cache ──────────────────────────────────────────────────────
private async Task<List<PidPrefixRule>> GetRulesCachedAsync()
{
var rules = _cachedRules;
if (rules != null) return rules;
await _cacheLock.WaitAsync();
try
{
rules = _cachedRules;
if (rules != null) return rules;
rules = await _dbContext.PidPrefixRules
.OrderByDescending(r => r.Prefix.Length)
.ThenBy(r => r.SortOrder)
.ToListAsync();
_cachedRules = rules;
return rules;
}
finally
{
_cacheLock.Release();
}
}
private void InvalidateRulesCache()
{
Interlocked.Exchange(ref _cachedRules, null);
}
// 배관번호 패턴: SERVICE-LINENUM-SIZE(숫자+알파벳)-... 3번째 필드에 파이프 사이즈 존재
private static readonly Regex _pipeLineNoRe = new(
@"^[A-Z][A-Z0-9]{0,3}-\d{3,6}-\d{1,4}[A-Za-z]-",
RegexOptions.Compiled);
private async Task<string?> MatchCategoryAsync(string tagNo)
{
if (_pipeLineNoRe.IsMatch(tagNo))
return PidEquipment.CategoryPipings;
var rules = await GetRulesCachedAsync();
return rules.FirstOrDefault(r =>
tagNo.StartsWith(r.Prefix, StringComparison.OrdinalIgnoreCase))?.Category;
}
// ── Prefix Rule CRUD ───────────────────────────────────────────────────────
public async Task<List<PidPrefixRule>> GetPrefixRulesAsync()
{
return await _dbContext.PidPrefixRules
.OrderBy(r => r.SortOrder)
.ThenBy(r => r.Prefix)
.ToListAsync();
}
public async Task<PidPrefixRule> CreatePrefixRuleAsync(CreatePidPrefixRuleRequest request)
{
var rule = new PidPrefixRule
{
Prefix = request.Prefix.Trim(),
Category = request.Category,
Description = request.Description?.Trim(),
SortOrder = request.SortOrder,
CreatedAt = DateTime.UtcNow,
UpdatedAt = DateTime.UtcNow
};
_dbContext.PidPrefixRules.Add(rule);
await _dbContext.SaveChangesAsync();
InvalidateRulesCache();
return rule;
}
public async Task<PidPrefixRule?> UpdatePrefixRuleAsync(int id, UpdatePidPrefixRuleRequest request)
{
var rule = await _dbContext.PidPrefixRules.FindAsync(id);
if (rule == null) return null;
rule.Prefix = request.Prefix.Trim();
rule.Category = request.Category;
rule.Description = request.Description?.Trim();
rule.SortOrder = request.SortOrder;
rule.UpdatedAt = DateTime.UtcNow;
await _dbContext.SaveChangesAsync();
InvalidateRulesCache();
return rule;
}
public async Task<bool> DeletePrefixRuleAsync(int id)
{
var rule = await _dbContext.PidPrefixRules.FindAsync(id);
if (rule == null) return false;
_dbContext.PidPrefixRules.Remove(rule);
await _dbContext.SaveChangesAsync();
InvalidateRulesCache();
return true;
}
public async Task<int> ApplyCategoriesToExistingAsync()
{
const int batchSize = 1000;
int total = 0;
while (true)
{
var batch = await _dbContext.PidEquipment
.Where(e => e.Category == null)
.Take(batchSize)
.ToListAsync();
if (!batch.Any()) break;
foreach (var item in batch)
{
var category = await MatchCategoryAsync(item.TagNo);
if (category != null)
{
item.Category = category;
item.UpdatedAt = DateTime.UtcNow;
total++;
}
}
await _dbContext.SaveChangesAsync();
}
return total;
} }
} }