Files
HC900-Crawler/scripts/analysis/c6111_extract.py
windpacer b45f0e2481 fix: 누락 솔벤트 컬럼 추가 (5차·9-2·10-2) + 5차 T_C 센서 부재 처리
run_column.py 실행목록에 빠졌던 컬럼 보완 — 전체 8개 측류 솔벤트 컬럼 커버:
- 5차(51/P5) 신규 추가: 완전 누락이었음. 민감단 TI-5111C 센서 부재(A/B/D만)
  → COLUMN_EXCEPTIONS["51"]에서 T_C를 TI-5111B로 대체(사용자 확정).
- 9-2(92), 10-2(102) 추가: COLUMN_EXCEPTIONS엔 있었으나 COLUMNS 실행목록 누락.

검증: 8개 컬럼 핵심 FEATURES 전부 해석 OK, 5차·9-2 추출 스모크 정상(336519행).
(플랜트8은 단일 train=81, 8-2 없음 확인.)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-05 20:45:04 +09:00

205 lines
8.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
컬럼 데이터 추출 + 운전모드 1차 특성 분석.
field_hist DB(shinam 실데이터, WIDE 포맷)에서 ptlist/mapping/tblist로 태그를 디코드해
tidy DataFrame을 만든다. 재사용 가능한 tag_frame() 추출기 포함.
근거: docs/학습형제어-오퍼레이터모방-플랜.md §15(디코드), §16(C-6111 토폴로지).
형제 컬럼 확장: roles_for(prefix, asset)로 파라미터화.
- 6-1: prefix=61, asset=/ASSETS/P6 (기본)
- 6-2: prefix=62, asset=/ASSETS/P6
- 8: prefix=81, asset=/ASSETS/P8
- 9: prefix=91, asset=/ASSETS/P9 (또는 92)
- 10: prefix=101, asset=/ASSETS/P10 (또는 102)
"""
import sys
import psycopg
import pandas as pd
DSN = "host=localhost port=5432 dbname=field_hist user=postgres password=postgres"
ASSET = "/ASSETS/P6"
# --- 형제 컬럼 역할 생성기 ---
# DB 검증 결과(2026-06-05) 기반 예외 오버라이드:
# P8(81): TICA에 A/B/C/D 접미사 없음, PICA-8111A (with A suffix)
# P9(91): PICA-9111A (with A suffix). 92xx 2차 컬럼 존재
# P10(101): FICQ-10114A (not 10114), PICA-10111A, LIA-10111 (not LICA). 102xx 2차 컬럼 존재
COLUMN_EXCEPTIONS = {
"51": {
# P5: 민감단 TI-5111C 센서 없음(A/B/D만 존재) → T_C를 TI-5111B로 대체 (사용자 확정 2026-06-05).
# startup 트리거는 reb-A·ΔT(A-D) 사용이라 영향 없음.
"T_C": "TI-5111B.PV",
},
"81": {
"steam_op": "TICA-8111.OP",
"reb_temp": "TICA-8111.PV",
"vacuum": "PICA-8111A.PV",
},
"91": {
"vacuum": "PICA-9111A.PV",
},
"92": {
"vacuum": "PICA-9211A.PV",
},
"101": {
"light": "FICQ-10114A.PV",
"vacuum": "PICA-10111A.PV",
"reflux_drum": "LIA-10111.PV",
},
"102": {
"light": "FICQ-10214.PV",
"vacuum": "PICA-10211A.PV",
"reflux_drum": "LIA-10211.PV",
},
}
def roles_for(prefix, asset=ASSET):
"""{role: shorttag} dict 생성. prefix 예: '61', '62', '81', '91', '101'.
Base 규칙(6-1 기준, docs/작업지시서-학습형제어-다음단계.md 작업1):
feed=FICQ-{p}01, reflux=FICQ-{p}13, light(D)=FICQ-{p}14,
heavy(B)=FICQ-{p}16, product(P)=FICQ-{p}18,
steam_op=TICA-{p}11A.OP, reb_temp=TICA-{p}11A.PV,
steam_flow=FIQ-{p}15, T_B=TI-{p}11B, T_C=TI-{p}11C, T_D=TI-{p}11D,
vacuum=PICA-{p}11.PV, dp=PI-{p}11B.PV,
reb_level=LI-{p}11.PV, reflux_drum=LICA-{p}13.PV,
feed_preheat=TI-{p}03.PV
COLUMN_EXCEPTIONS에 등록된 prefix는 자동 오버라이드.
"""
p = prefix
roles = {
"feed": f"FICQ-{p}01.PV",
"steam_op": f"TICA-{p}11A.OP",
"steam_flow": f"FIQ-{p}15.PV",
"reb_temp": f"TICA-{p}11A.PV",
"T_B": f"TI-{p}11B.PV",
"T_C": f"TI-{p}11C.PV",
"T_D": f"TI-{p}11D.PV",
"feed_preheat": f"TI-{p}03.PV",
"vacuum": f"PICA-{p}11.PV",
"dp": f"PI-{p}11B.PV",
"product": f"FICQ-{p}18.PV",
"reflux": f"FICQ-{p}13.PV",
"light": f"FICQ-{p}14.PV",
"heavy": f"FICQ-{p}16.PV",
"reb_level": f"LI-{p}11.PV",
"reflux_drum": f"LICA-{p}13.PV",
}
ov = COLUMN_EXCEPTIONS.get(prefix, {})
roles.update(ov)
return roles
# C-6111 (6-1) 역할별 태그 — legacy 직접 참조 호환용
ROLES = roles_for("61", ASSET)
def resolve(conn, shorttags, asset=ASSET):
"""shortptname 목록 -> {tag: (tblname, colnum)}"""
with conn.cursor() as cur:
cur.execute("""
SELECT p.shortptname, t.tblname, m.oit
FROM ptlist p JOIN mapping m ON m.pid=p.pid JOIN tblist t ON t.tid=m.tid
WHERE p.asset=%s AND p.shortptname = ANY(%s)
""", (asset, list(shorttags)))
out = {}
for short, tbl, oit in cur.fetchall():
out[short] = (tbl, int(oit))
return out
def tag_frame(conn, role_map, asset=ASSET):
"""{role: shorttag} -> dtat 인덱스 DataFrame(컬럼=role). 테이블별 1쿼리 후 merge."""
loc = resolve(conn, role_map.values(), asset)
missing = [r for r, t in role_map.items() if t not in loc]
if missing:
print(f"[warn] 미해결 태그: {[(r, role_map[r]) for r in missing]}", file=sys.stderr)
# 테이블별 그룹
by_tbl = {}
for role, short in role_map.items():
if short not in loc:
continue
tbl, col = loc[short]
by_tbl.setdefault(tbl, []).append((role, col))
df = None
for tbl, cols in by_tbl.items():
sel = ", ".join([f'col{c:02d} AS "{role}"' for role, c in cols])
q = f"SELECT dtat, {sel} FROM {tbl}"
part = pd.read_sql(q, conn)
df = part if df is None else df.merge(part, on="dtat", how="outer")
return df.sort_values("dtat").reset_index(drop=True)
def classify_phases(df):
"""1차 운전모드 분류 (임계 기반, §16.3-2). 추후 정교화."""
import numpy as np
reb, vac, steam, prod = df["reb_temp"], df["vacuum"], df["steam_op"], df["product"]
hot_vac = (reb > 60) & (vac < 200) & (steam > 5) # 컬럼 가동(hot+진공)
# 온도 추세(60분=120샘플 기울기)로 startup/shutdown 구분
slope = reb.diff().rolling(120, min_periods=10, center=True).mean()
mode = np.where(
hot_vac,
np.where(prod < 80, "LINEOUT", "PROD"), # 제품≈0 → 전환류/라인아웃
np.where(slope > 0.02, "STARTUP",
np.where(slope < -0.02, "SHUTDOWN", "STOPPED")))
return pd.Series(mode, index=df.index, name="mode")
def plot_timeline(df, png):
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
d = df.iloc[::30].copy() # 15분 다운샘플
colors = {"PROD": "#2ca02c", "LINEOUT": "#ff7f0e", "STARTUP": "#1f77b4",
"SHUTDOWN": "#d62728", "STOPPED": "#7f7f7f"}
fig, ax = plt.subplots(5, 1, figsize=(16, 12), sharex=True)
ax[0].plot(d.dtat, d.reb_temp, lw=.5, label="reb_temp(A)")
ax[0].plot(d.dtat, d.T_C, lw=.5, label="T_C(민감단)")
ax[0].plot(d.dtat, d.T_D, lw=.5, label="T_D(탑상)")
ax[0].set_ylabel("온도"); ax[0].legend(loc="upper right", fontsize=7)
ax[1].plot(d.dtat, d.feed, lw=.5, color="purple"); ax[1].set_ylabel("feed FICQ-6101")
ax[2].plot(d.dtat, d["product"], lw=.5, color="orange"); ax[2].set_ylabel("측류제품 6118")
ax[3].plot(d.dtat, d.steam_flow, lw=.5, color="red")
ax[3].plot(d.dtat, d.steam_op * 10, lw=.5, color="brown", alpha=.5, label="OP×10")
ax[3].set_ylabel("스팀유량/OP"); ax[3].legend(loc="upper right", fontsize=7)
ax[4].plot(d.dtat, d.vacuum, lw=.5, color="teal"); ax[4].set_ylabel("진공 PICA-6111")
ax[4].set_ylim(100, 130)
# 모드 배경 음영
for a in ax:
for m, c in colors.items():
seg = d[d["mode"] == m]
a.scatter(seg.dtat, [a.get_ylim()[0]] * len(seg), c=c, s=2, marker="|")
fig.suptitle("C-6111 (6-1차) 전체기간 — 운전모드별 (하단 컬러바)")
fig.tight_layout()
fig.savefig(png, dpi=90)
print(f"플롯 저장: {png}")
def main():
with psycopg.connect(DSN) as conn:
df = tag_frame(conn, ROLES)
print(f"행수={len(df)} 기간={df.dtat.min()} ~ {df.dtat.max()}")
print("\n=== 핵심 신호 분포 (운전모드 임계 설정용) ===")
show = ["feed", "reb_temp", "vacuum", "product", "reflux", "steam_op",
"steam_flow", "T_C", "T_D", "dp"]
desc = df[show].describe(percentiles=[.01, .05, .25, .5, .75, .95, .99]).T
print(desc[["min", "1%", "5%", "50%", "95%", "99%", "max"]].round(2).to_string())
df["mode"] = classify_phases(df)
print("\n=== 운전모드 분포 (30초 샘플 기준) ===")
vc = df["mode"].value_counts()
for m, n in vc.items():
print(f" {m:9s} {n:7d} {100*n/len(df):5.1f}% ≈ {n*30/3600:7.1f} h")
out = "/home/windpacer/projects/hc900_ax/scripts/analysis/c6111_data.pkl"
df.to_pickle(out)
plot_timeline(df, "/home/windpacer/projects/hc900_ax/scripts/analysis/c6111_timeline.png")
print(f"저장: {out}")
if __name__ == "__main__":
main()