""" C-6111 Shadow 예측기 — 히스토리 리플레이 백테스트 (플랜 §7 shadow 진입). 학습기간 운전점으로 `스팀유량=f(피드,제품,목표T_C)` 학습 → held-out 미래기간을 매 시점 리플레이하여 예측 스팀→(밸브 역특성)→예측 OP 를 산출, **실제 오퍼레이터 OP와 비교**. "이 예측기를 shadow로 돌렸다면 오퍼레이터 손과 얼마나 일치했나" 를 정직 검증. 선행: c6111_data.pkl. 포팅대상(추후 C# live shadow)은 동일 로직. """ import numpy as np import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score, mean_absolute_error BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/" FEATURES = ["feed", "product", "T_C"] # 깨끗한 인과/목표 입력 (§16.6) SMOOTH = 40 # 입력 평활 20분(운전점 성격 유지) TRAIN_FRAC = 0.70 # 앞 70% 기간 학습, 뒤 30% held-out shadow class SteamPredictor: """운전점 학습 + 밸브 역특성(flow→OP).""" def fit(self, df_train): ops = (df_train.set_index("dtat").resample("6h").median(numeric_only=True) .dropna(subset=["steam_flow", "feed"])) ops = ops[ops["feed"] > 50] self.model = GradientBoostingRegressor(n_estimators=200, max_depth=2, learning_rate=0.05, random_state=0) self.model.fit(ops[FEATURES].values, ops["steam_flow"].values) # 밸브 역특성: OP = poly(flow) (단조, 3차) self.inv = np.polyfit(df_train["steam_flow"], df_train["steam_op"], 3) return self def predict_flow(self, X): return self.model.predict(X) def flow_to_op(self, flow): return np.clip(np.polyval(self.inv, flow), 0, 100) def main(): df = pd.read_pickle(BASE + "c6111_data.pkl") df = df[df["mode"] == "PROD"].copy() df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1) & df[FEATURES + ["steam_op"]].notna().all(axis=1)].sort_values("dtat") # 입력 평활 (실제 shadow도 노이즈 평활 사용) for c in FEATURES: df[c + "_s"] = df[c].rolling(SMOOTH, min_periods=1, center=True).median() cut = df["dtat"].quantile(TRAIN_FRAC) tr, te = df[df["dtat"] <= cut], df[df["dtat"] > cut] print(f"학습 {tr.dtat.min()}~{tr.dtat.max()} ({len(tr)}) " f"shadow(held-out) {te.dtat.min()}~{te.dtat.max()} ({len(te)})") pred = SteamPredictor().fit(tr) # OOD(학습 운전envelope 밖) 게이트: 입력이 학습 1~99% 범위 밖이면 '저신뢰→오퍼레이터 폴백' lo, hi = tr[FEATURES].quantile(0.01), tr[FEATURES].quantile(0.99) print(f"학습 envelope: " + ", ".join(f"{c}[{lo[c]:.0f},{hi[c]:.1f}]" for c in FEATURES)) def in_env(d): return ((d[FEATURES] >= lo) & (d[FEATURES] <= hi)).all(axis=1) for name, d in [("학습기간", tr), ("★held-out shadow", te)]: Xs = d[[c + "_s" for c in FEATURES]].values pf = pred.predict_flow(Xs) po = pred.flow_to_op(pf) ao = d["steam_op"].values env = in_env(d).values within = np.mean(np.abs(po - ao) <= 2.0) * 100 print(f"\n[{name}] OOD(범위밖)={100*(~env).mean():.1f}%") print(f" 전체 OP MAE={mean_absolute_error(ao,po):.2f} |Δ|≤2%={within:.1f}%") if env.sum() > 50: print(f" in-envelope OP MAE={mean_absolute_error(ao[env],po[env]):.2f} " f"|Δ|≤2%={np.mean(np.abs(po[env]-ao[env])<=2)*100:.1f}% ← shadow가 신뢰구간에서 조언") d = d.assign(pred_flow=pf, pred_op=po, ood=~env) if name.startswith("★"): te = d # 플롯: held-out 시계열 오버레이 + OP 비교 + 오차분포 fig, ax = plt.subplots(3, 1, figsize=(16, 11)) s = te.iloc[::20] ax[0].plot(s.dtat, s["steam_flow"], lw=.6, label="actual steam flow") ax[0].plot(s.dtat, s["pred_flow"], lw=.6, c="r", label="predicted") ax[0].set_title("held-out shadow: steam flow actual vs predicted"); ax[0].legend(fontsize=8) ax[1].plot(s.dtat, s["steam_op"], lw=.6, label="actual operator OP") ax[1].plot(s.dtat, s["pred_op"], lw=.6, c="r", label="predicted OP") ax[1].set_ylabel("OP %"); ax[1].set_title("operator OP vs shadow-predicted OP"); ax[1].legend(fontsize=8) err = te["pred_op"] - te["steam_op"] ax[2].hist(err, bins=80); ax[2].axvline(0, c="k", lw=.5) ax[2].set_title(f"OP error (pred-actual): median {err.median():+.2f}%, std {err.std():.2f}%") fig.tight_layout(); fig.savefig(BASE + "c6111_shadow.png", dpi=95) print(f"\n플롯 저장: {BASE}c6111_shadow.png") if __name__ == "__main__": main()