""" ① 생산 정상상태 맵. PROD 구간에서 밸브특성 + 스팀유량 회귀. 선행: c6111_extract.py 가 만든 c6111_data.pkl (mode 컬럼 포함). 형제 컬럼 호환: --data, --prefix CLI 인자. """ import argparse import numpy as np import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.ensemble import GradientBoostingRegressor from sklearn.preprocessing import StandardScaler from sklearn.metrics import r2_score, mean_absolute_error BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/" TARGET = "steam_flow" FEATURES = ["feed", "product", "vacuum", "feed_preheat", "T_C", "T_D"] OP_RESAMPLE = "6h" def load(data_path=None): if data_path is None: data_path = BASE + "C-6111_data.pkl" df = pd.read_pickle(data_path) df = df[df["mode"] == "PROD"].copy() # 엔지니어링 피처: 온도 구배(분리도) df["dT_AC"] = df["reb_temp"] - df["T_C"] df["dT_CD"] = df["T_C"] - df["T_D"] # 기본 정합성: 유량/유효범위 (센서 음수노이즈·결측 제거) df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1) & df[FEATURES + [TARGET, "steam_op"]].notna().all(axis=1)] return df.sort_values("dtat").reset_index(drop=True) def valve_char(df): """OP(밸브%) ↔ 스팀유량(FIQ-6115) 특성.""" op, fl = df["steam_op"].values, df["steam_flow"].values # 선형게인 a = np.polyfit(op, fl, 1) # 상승/하강 방향별(히스테리시스 ~ stiction 신호): OP 변화방향으로 분리 dop = np.diff(df["steam_op"].values, prepend=df["steam_op"].values[0]) up, dn = dop > 0.05, dop < -0.05 # OP 빈(bin)별 유량 평균 — 같은 OP에서 상승/하강 유량차 = 히스테리시스 bins = np.arange(np.floor(op.min()), np.ceil(op.max()) + 1, 1.0) rows = [] for lo, hi in zip(bins[:-1], bins[1:]): m = (op >= lo) & (op < hi) if m.sum() < 20: continue fu = fl[m & up].mean() if (m & up).sum() > 5 else np.nan fd = fl[m & dn].mean() if (m & dn).sum() > 5 else np.nan rows.append((lo + .5, fl[m].mean(), fu, fd, m.sum())) hb = pd.DataFrame(rows, columns=["op", "flow", "flow_up", "flow_dn", "n"]) hyst = (hb["flow_dn"] - hb["flow_up"]).abs().mean() print(f"[밸브] 선형 flow ≈ {a[0]:.1f}·OP + {a[1]:.1f} " f"(OP {op.min():.0f}~{op.max():.0f}%, flow {fl.min():.0f}~{fl.max():.0f})") print(f"[밸브] 상승/하강 평균 유량차(히스테리시스≈stiction) = {hyst:.1f} " f"(유량 스팬의 {100*hyst/(fl.max()-fl.min()):.1f}%)") return hb, a def regress(df): from sklearn.model_selection import train_test_split # 운전점 집계: 정상상태 내부 변동이 거의 없어(98% steady) 점단위 학습 불가. # 6h 중앙값 = 캠페인/로드레벨 단위 운전점 → 진짜 f(부하) 신호. ops = (df.set_index("dtat").resample(OP_RESAMPLE).median(numeric_only=True) .dropna(subset=[TARGET, "feed"])) ops = ops[ops["feed"] > 50] print(f"\n[운전점] PROD {len(df)}행 → {OP_RESAMPLE} 운전점 {len(ops)}개") X, y = ops[FEATURES].values, ops[TARGET].values Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=.3, random_state=0) # 베이스라인: 피드만 (steam/feed 비율제어가 얼마나 설명?) lb = LinearRegression().fit(Xtr[:, :1], ytr) r2_feed = r2_score(yte, lb.predict(Xte[:, :1])) sc = StandardScaler().fit(Xtr) lin = LinearRegression().fit(sc.transform(Xtr), ytr) gbm = GradientBoostingRegressor(n_estimators=200, max_depth=2, learning_rate=0.05, random_state=0).fit(Xtr, ytr) span = y.max() - y.min() for name, pred in [("Linear", lin.predict(sc.transform(Xte))), ("GBM", gbm.predict(Xte))]: print(f"[모델 {name:7s}] test R²(FIT)={r2_score(yte,pred):.3f} " f"MAE={mean_absolute_error(yte,pred):.1f} (스팬의 {100*mean_absolute_error(yte,pred)/span:.1f}%)") print(f"[베이스라인 피드단독] test R²={r2_feed:.3f} " f"steam/feed비 중앙값={(ops[TARGET]/ops['feed']).median():.3f}") print("\n[피처 중요도]") coef = pd.Series(lin.coef_, index=FEATURES) # 표준화 → 상대중요도 imp = pd.Series(gbm.feature_importances_, index=FEATURES) tbl = pd.DataFrame({"lin_std계수": coef.round(1), "GBM중요도": imp.round(3)}).sort_values("GBM중요도", ascending=False) print(tbl.to_string()) return ops, gbm, Xte, yte, gbm.predict(Xte), imp def plots(hb, ops, yte, pred, imp, prefix="C-6111"): fig, ax = plt.subplots(1, 4, figsize=(22, 5)) ax[0].scatter(hb["op"], hb["flow"], s=20, c="k", label="mean") ax[0].plot(hb["op"], hb["flow_up"], "b.-", ms=4, label="OP rising") ax[0].plot(hb["op"], hb["flow_dn"], "r.-", ms=4, label="OP falling") ax[0].set_xlabel("steam OP %"); ax[0].set_ylabel("steam flow") ax[0].set_title("Valve char (hysteresis=stiction)"); ax[0].legend() ax[1].scatter(ops["feed"], ops[TARGET], s=10, alpha=.5) ax[1].set_xlabel("feed"); ax[1].set_ylabel("steam flow") ax[1].set_title("steam vs feed (operating points)") ax[2].scatter(yte, pred, s=12, alpha=.5) lim = [min(yte.min(), pred.min()), max(yte.max(), pred.max())] ax[2].plot(lim, lim, "r--"); ax[2].set_xlabel("actual steam flow") ax[2].set_ylabel("predicted (GBM)"); ax[2].set_title("Predicted vs Actual (test ops)") imp.sort_values().plot.barh(ax=ax[3]); ax[3].set_title("GBM feature importance") fig.tight_layout(); fig.savefig(BASE + f"{prefix}_prodmap.png", dpi=95) print(f"\n플롯 저장: {BASE}{prefix}_prodmap.png") def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", default=BASE + "C-6111_data.pkl") parser.add_argument("--prefix", default="C-6111") args = parser.parse_args() df = load(args.data) print(f"PROD 정합데이터 {len(df)}행") hb, a = valve_char(df) ops, gbm, Xte, yte, pred, imp = regress(df) plots(hb, ops, yte, pred, imp, args.prefix) if __name__ == "__main__": main()