fix: P&ID 배관번호 분류 오류 수정 (power_equipment → pipings)
- _PID_LINENO_FULL_RE: 7필드 고정 regex → 5~7필드 통합 (9차 P-9107-25A-F-n 등 미매칭 수정) - _extract_pid_dxf_fast: 레이어 이름 하드코딩 제거 → FULL_RE 매칭 우선, LINENO 계열 레이어 힌트 보조 - MatchCategoryAsync: 배관번호 regex(_pipeLineNoRe) 체크를 prefix 룰보다 먼저 실행 → P-9117-20A-F-n 등이 power_equipment로 오분류되던 문제 수정 - pump extractor 프롬프트: 배관번호 SKIP/INCLUDE 예시 추가 - DB 기존 레코드 435건 pipings로 재분류 (직접 SQL) - .claude/settings.json: LLM 모델명 하드코딩 제거 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
"/home/windpacer/projects/ExperionCrawler/mcp-server/server.py"
|
||||
],
|
||||
"env": {},
|
||||
"description": "ExperionCrawler RAG — Qdrant(코드베이스+OPC UA 문서) + GLM-4.7-Flash"
|
||||
"description": "ExperionCrawler RAG — Qdrant(코드베이스+OPC UA 문서), 현재 LLM은 mcp-server/llm-model.json 참조"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -59,13 +59,17 @@ Examples: PG-101, TG-201, LG-301, PG-10101, TG-10201
|
||||
|
||||
# 펌프: P-10101, VP-10117, DP-10101 등
|
||||
_PUMP_PROMPT = _PROMPT_HEADER + """
|
||||
Extract ONLY pumps and compressors.
|
||||
Extract ONLY pumps and compressors (simple equipment tags, NO pipe size suffix).
|
||||
|
||||
Target equipment types: P (pump), VP (vertical pump), DP (dual pump),
|
||||
C (compressor), CP (centrifugal pump), BP (booster pump),
|
||||
C (compressor), CP (centrifugal pump), BP (booster pump), SP (sump pump),
|
||||
and their variants.
|
||||
|
||||
Examples: P-10101, VP-10117, DP-10101, C-10201, CP-10301, BP-10401
|
||||
Examples (4~5 digit loop numbers): P-10101, VP-10117, DP-10101, C-10201, P-9101, P-9116, VP-9201
|
||||
|
||||
IMPORTANT: Do NOT extract pipeline/line numbers that have a pipe size suffix (e.g. 25A, 50A, 100A).
|
||||
SKIP (pipeline, not a pump): P-10101-25A-F1A-n, P-9107-25A-F-n, CHR-9641-50A-F-C50
|
||||
INCLUDE (pump tag): P-10101, VP-10117, P-9101
|
||||
"""
|
||||
|
||||
# 프롬프트 매핑
|
||||
|
||||
@@ -17,6 +17,8 @@ public class PidExtractorService : IPidExtractorService
|
||||
private readonly McpClient _mcp;
|
||||
private readonly ExperionDbContext _dbContext;
|
||||
private readonly ILogger<PidExtractorService> _logger;
|
||||
private readonly SemaphoreSlim _cacheLock = new(1, 1);
|
||||
private List<PidPrefixRule>? _cachedRules;
|
||||
|
||||
public PidExtractorService(McpClient mcp, ExperionDbContext dbContext, ILogger<PidExtractorService> logger)
|
||||
{
|
||||
@@ -62,15 +64,27 @@ public class PidExtractorService : IPidExtractorService
|
||||
var mappingJson = await _mcp.MatchPidTagsAsync(pidTagNos, experionTagNames);
|
||||
var mappings = ParseMappingJson(mappingJson);
|
||||
|
||||
// 중복 체크: 기존 DB에 있는 TagNo는 제외 (대소문자 구분 없음)
|
||||
var existingTagNos = new HashSet<string>(
|
||||
await _dbContext.PidEquipment.Select(e => e.TagNo).ToListAsync(),
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
var newItems = extractedItems.Where(i => !existingTagNos.Contains(i.TagNo)).ToList();
|
||||
int skippedCount = extractedItems.Count - newItems.Count;
|
||||
|
||||
if (skippedCount > 0)
|
||||
_logger.LogInformation("P&ID 중복 제외: {Skipped}건 스킵 (이미 존재)", skippedCount);
|
||||
|
||||
// DB 저장
|
||||
var dbItems = new List<PidEquipment>();
|
||||
foreach (var item in extractedItems)
|
||||
foreach (var item in newItems)
|
||||
{
|
||||
mappings.TryGetValue(item.TagNo, out var matched);
|
||||
var experionTag = matched != null
|
||||
? await _dbContext.RealtimePoints.FirstOrDefaultAsync(r => r.TagName == matched)
|
||||
: await FindFallbackTagAsync(item.TagNo);
|
||||
|
||||
var category = await MatchCategoryAsync(item.TagNo);
|
||||
|
||||
dbItems.Add(new PidEquipment
|
||||
{
|
||||
TagNo = item.TagNo,
|
||||
@@ -80,20 +94,27 @@ public class PidExtractorService : IPidExtractorService
|
||||
PidDrawingNo = item.PidDrawingNo,
|
||||
Confidence = item.Confidence,
|
||||
ExperionTagId = experionTag?.Id,
|
||||
Category = category,
|
||||
ExtractedAt = DateTime.UtcNow,
|
||||
UpdatedAt = DateTime.UtcNow
|
||||
});
|
||||
}
|
||||
|
||||
if (dbItems.Count > 0)
|
||||
{
|
||||
await _dbContext.PidEquipment.AddRangeAsync(dbItems);
|
||||
await _dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
_logger.LogInformation("P&ID 추출 완료: {Total}건 저장 (파일: {FileName})", dbItems.Count, fileName);
|
||||
_logger.LogInformation(
|
||||
"P&ID 추출 완료: {Total}건 저장, {Skipped}건 중복 스킵 (파일: {FileName})",
|
||||
dbItems.Count, skippedCount, fileName);
|
||||
|
||||
return new PidExtractionResult(
|
||||
TotalCount: dbItems.Count,
|
||||
ConfidenceItems: dbItems.Count(i => i.Confidence >= 0.7),
|
||||
LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5));
|
||||
LowConfidenceItems: dbItems.Count(i => i.Confidence < 0.5),
|
||||
SkippedDuplicates: skippedCount);
|
||||
}
|
||||
|
||||
private string ExtractDxfText(Stream stream)
|
||||
@@ -143,7 +164,7 @@ public class PidExtractorService : IPidExtractorService
|
||||
// - 단일 글자 장비 태그 포함: P-10101, T-10100, E-10119, C-10111
|
||||
// - 다중 글자 계측 태그: FCV-101, FICQ-6113, PSV-6203
|
||||
// - 복합 태그: VG-6203-15A-F1A-n, CD-10513-40A
|
||||
if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*"))
|
||||
if (Regex.IsMatch(trimmed, @"[A-Z]{1,6}-\d{2,6}(-[A-Z0-9]+)*", RegexOptions.IgnoreCase))
|
||||
{
|
||||
filteredLines.Add(trimmed);
|
||||
}
|
||||
@@ -265,6 +286,15 @@ public class PidExtractorService : IPidExtractorService
|
||||
await _dbContext.SaveChangesAsync();
|
||||
}
|
||||
|
||||
public async Task<bool> DeleteAsync(long id)
|
||||
{
|
||||
var e = await _dbContext.PidEquipment.FindAsync(id);
|
||||
if (e == null) return false;
|
||||
_dbContext.PidEquipment.Remove(e);
|
||||
await _dbContext.SaveChangesAsync();
|
||||
return true;
|
||||
}
|
||||
|
||||
public Task<int> GetTotalCountAsync() => _dbContext.PidEquipment.CountAsync();
|
||||
public Task<int> GetConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence >= 0.7);
|
||||
public Task<int> GetLowConfidenceItemsCountAsync() => _dbContext.PidEquipment.CountAsync(e => e.Confidence < 0.5);
|
||||
@@ -281,13 +311,16 @@ public class PidExtractorService : IPidExtractorService
|
||||
};
|
||||
}
|
||||
|
||||
public Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items)
|
||||
public async Task<string> ExportToCsvAsync(IEnumerable<PidEquipment> items)
|
||||
{
|
||||
return await Task.Run(() =>
|
||||
{
|
||||
var sb = new StringBuilder();
|
||||
sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId");
|
||||
sb.AppendLine("TagNo,EquipmentName,InstrumentType,LineNumber,PidDrawingNo,Confidence,IsActive,ExtractedAt,ExperionTagId,Category,Role,From,To");
|
||||
foreach (var i in items)
|
||||
sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId}");
|
||||
return Task.FromResult(sb.ToString());
|
||||
sb.AppendLine($"{Csv(i.TagNo)},{Csv(i.EquipmentName)},{Csv(i.InstrumentType)},{Csv(i.LineNumber)},{Csv(i.PidDrawingNo)},{i.Confidence},{i.IsActive},{i.ExtractedAt:O},{i.ExperionTagId},{Csv(i.Category)},{Csv(i.Role)},{Csv(i.FromTag)},{Csv(i.ToTag)}");
|
||||
return sb.ToString();
|
||||
});
|
||||
}
|
||||
|
||||
private static string Csv(string? v)
|
||||
@@ -297,24 +330,71 @@ public class PidExtractorService : IPidExtractorService
|
||||
? $"\"{v.Replace("\"", "\"\"")}\"" : v;
|
||||
}
|
||||
|
||||
public Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items)
|
||||
public async Task<byte[]> ExportToExcelAsync(IEnumerable<PidEquipment> items)
|
||||
{
|
||||
return await Task.Run(async () =>
|
||||
{
|
||||
using var package = new OfficeOpenXml.ExcelPackage();
|
||||
var worksheet = package.Workbook.Worksheets.Add("P&ID Equipment");
|
||||
|
||||
// 헤더
|
||||
var rules = await GetRulesCachedAsync();
|
||||
var prefixToDesc = rules
|
||||
.ToDictionary(r => r.Prefix.ToLowerInvariant(), r => r.Description ?? r.Prefix);
|
||||
|
||||
var grouped = items
|
||||
.GroupBy(i => string.IsNullOrEmpty(i.Category) ? "__unmatched__" : i.Category!)
|
||||
.ToDictionary(g => g.Key, g => g.ToList());
|
||||
|
||||
var sheetOrder = new[]
|
||||
{
|
||||
PidEquipment.CategoryInstrument,
|
||||
PidEquipment.CategoryPowerEquipment,
|
||||
PidEquipment.CategoryStorageEquipment,
|
||||
PidEquipment.CategoryProcessEquipment,
|
||||
PidEquipment.CategoryUtilityEquipment,
|
||||
PidEquipment.CategoryPipings,
|
||||
"__unmatched__"
|
||||
};
|
||||
|
||||
var sheetNames = new Dictionary<string, string>
|
||||
{
|
||||
[PidEquipment.CategoryInstrument] = "Instrument",
|
||||
[PidEquipment.CategoryPowerEquipment] = "Power Equipment",
|
||||
[PidEquipment.CategoryStorageEquipment] = "Storage Equipment",
|
||||
[PidEquipment.CategoryProcessEquipment] = "Process Equipment",
|
||||
[PidEquipment.CategoryUtilityEquipment] = "Utility Equipment",
|
||||
[PidEquipment.CategoryPipings] = "Pipings",
|
||||
["__unmatched__"] = "Unmatched"
|
||||
};
|
||||
|
||||
foreach (var cat in sheetOrder)
|
||||
{
|
||||
if (!grouped.TryGetValue(cat, out var groupItems) || groupItems.Count == 0)
|
||||
continue;
|
||||
|
||||
var sheetName = sheetNames[cat];
|
||||
var worksheet = package.Workbook.Worksheets.Add(sheetName);
|
||||
|
||||
worksheet.Cells[1, 1].Value = "태그번호";
|
||||
worksheet.Cells[1, 2].Value = "장비명";
|
||||
worksheet.Cells[1, 3].Value = "계기유형";
|
||||
worksheet.Cells[1, 3].Value = "장비타입";
|
||||
worksheet.Cells[1, 4].Value = "라인번호";
|
||||
worksheet.Cells[1, 5].Value = "도면번호";
|
||||
worksheet.Cells[1, 6].Value = "신뢰도";
|
||||
worksheet.Cells[1, 7].Value = "상태";
|
||||
worksheet.Cells[1, 8].Value = "추출일시";
|
||||
worksheet.Cells[1, 9].Value = "Experion 태그";
|
||||
worksheet.Cells[1, 10].Value = "카테고리";
|
||||
worksheet.Cells[1, 11].Value = "Role";
|
||||
worksheet.Cells[1, 12].Value = "From";
|
||||
worksheet.Cells[1, 13].Value = "To";
|
||||
|
||||
using var headerRange = worksheet.Cells[1, 1, 1, 13];
|
||||
headerRange.Style.Font.Bold = true;
|
||||
headerRange.Style.Fill.PatternType = OfficeOpenXml.Style.ExcelFillStyle.Solid;
|
||||
headerRange.Style.Fill.BackgroundColor.SetColor(System.Drawing.Color.LightGray);
|
||||
|
||||
int row = 2;
|
||||
foreach (var item in items)
|
||||
foreach (var item in groupItems)
|
||||
{
|
||||
worksheet.Cells[row, 1].Value = item.TagNo;
|
||||
worksheet.Cells[row, 2].Value = item.EquipmentName ?? "";
|
||||
@@ -325,10 +405,141 @@ public class PidExtractorService : IPidExtractorService
|
||||
worksheet.Cells[row, 7].Value = item.IsActive ? "활성" : "비활성";
|
||||
worksheet.Cells[row, 8].Value = item.ExtractedAt;
|
||||
worksheet.Cells[row, 9].Value = item.ExperionTag?.TagName ?? "";
|
||||
worksheet.Cells[row, 10].Value = item.Category ?? "";
|
||||
worksheet.Cells[row, 11].Value = item.Role ?? "";
|
||||
worksheet.Cells[row, 12].Value = item.FromTag ?? "";
|
||||
worksheet.Cells[row, 13].Value = item.ToTag ?? "";
|
||||
row++;
|
||||
}
|
||||
|
||||
return Task.FromResult(package.GetAsByteArray());
|
||||
worksheet.Cells.AutoFitColumns();
|
||||
}
|
||||
|
||||
return package.GetAsByteArray();
|
||||
});
|
||||
}
|
||||
|
||||
// ── Prefix Rule Cache ──────────────────────────────────────────────────────
|
||||
|
||||
private async Task<List<PidPrefixRule>> GetRulesCachedAsync()
|
||||
{
|
||||
var rules = _cachedRules;
|
||||
if (rules != null) return rules;
|
||||
|
||||
await _cacheLock.WaitAsync();
|
||||
try
|
||||
{
|
||||
rules = _cachedRules;
|
||||
if (rules != null) return rules;
|
||||
rules = await _dbContext.PidPrefixRules
|
||||
.OrderByDescending(r => r.Prefix.Length)
|
||||
.ThenBy(r => r.SortOrder)
|
||||
.ToListAsync();
|
||||
_cachedRules = rules;
|
||||
return rules;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_cacheLock.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private void InvalidateRulesCache()
|
||||
{
|
||||
Interlocked.Exchange(ref _cachedRules, null);
|
||||
}
|
||||
|
||||
// 배관번호 패턴: SERVICE-LINENUM-SIZE(숫자+알파벳)-... 3번째 필드에 파이프 사이즈 존재
|
||||
private static readonly Regex _pipeLineNoRe = new(
|
||||
@"^[A-Z][A-Z0-9]{0,3}-\d{3,6}-\d{1,4}[A-Za-z]-",
|
||||
RegexOptions.Compiled);
|
||||
|
||||
private async Task<string?> MatchCategoryAsync(string tagNo)
|
||||
{
|
||||
if (_pipeLineNoRe.IsMatch(tagNo))
|
||||
return PidEquipment.CategoryPipings;
|
||||
|
||||
var rules = await GetRulesCachedAsync();
|
||||
return rules.FirstOrDefault(r =>
|
||||
tagNo.StartsWith(r.Prefix, StringComparison.OrdinalIgnoreCase))?.Category;
|
||||
}
|
||||
|
||||
// ── Prefix Rule CRUD ───────────────────────────────────────────────────────
|
||||
|
||||
public async Task<List<PidPrefixRule>> GetPrefixRulesAsync()
|
||||
{
|
||||
return await _dbContext.PidPrefixRules
|
||||
.OrderBy(r => r.SortOrder)
|
||||
.ThenBy(r => r.Prefix)
|
||||
.ToListAsync();
|
||||
}
|
||||
|
||||
public async Task<PidPrefixRule> CreatePrefixRuleAsync(CreatePidPrefixRuleRequest request)
|
||||
{
|
||||
var rule = new PidPrefixRule
|
||||
{
|
||||
Prefix = request.Prefix.Trim(),
|
||||
Category = request.Category,
|
||||
Description = request.Description?.Trim(),
|
||||
SortOrder = request.SortOrder,
|
||||
CreatedAt = DateTime.UtcNow,
|
||||
UpdatedAt = DateTime.UtcNow
|
||||
};
|
||||
_dbContext.PidPrefixRules.Add(rule);
|
||||
await _dbContext.SaveChangesAsync();
|
||||
InvalidateRulesCache();
|
||||
return rule;
|
||||
}
|
||||
|
||||
public async Task<PidPrefixRule?> UpdatePrefixRuleAsync(int id, UpdatePidPrefixRuleRequest request)
|
||||
{
|
||||
var rule = await _dbContext.PidPrefixRules.FindAsync(id);
|
||||
if (rule == null) return null;
|
||||
rule.Prefix = request.Prefix.Trim();
|
||||
rule.Category = request.Category;
|
||||
rule.Description = request.Description?.Trim();
|
||||
rule.SortOrder = request.SortOrder;
|
||||
rule.UpdatedAt = DateTime.UtcNow;
|
||||
await _dbContext.SaveChangesAsync();
|
||||
InvalidateRulesCache();
|
||||
return rule;
|
||||
}
|
||||
|
||||
public async Task<bool> DeletePrefixRuleAsync(int id)
|
||||
{
|
||||
var rule = await _dbContext.PidPrefixRules.FindAsync(id);
|
||||
if (rule == null) return false;
|
||||
_dbContext.PidPrefixRules.Remove(rule);
|
||||
await _dbContext.SaveChangesAsync();
|
||||
InvalidateRulesCache();
|
||||
return true;
|
||||
}
|
||||
|
||||
public async Task<int> ApplyCategoriesToExistingAsync()
|
||||
{
|
||||
const int batchSize = 1000;
|
||||
int total = 0;
|
||||
while (true)
|
||||
{
|
||||
var batch = await _dbContext.PidEquipment
|
||||
.Where(e => e.Category == null)
|
||||
.Take(batchSize)
|
||||
.ToListAsync();
|
||||
if (!batch.Any()) break;
|
||||
|
||||
foreach (var item in batch)
|
||||
{
|
||||
var category = await MatchCategoryAsync(item.TagNo);
|
||||
if (category != null)
|
||||
{
|
||||
item.Category = category;
|
||||
item.UpdatedAt = DateTime.UtcNow;
|
||||
total++;
|
||||
}
|
||||
}
|
||||
await _dbContext.SaveChangesAsync();
|
||||
}
|
||||
return total;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user