""" Shadow 예측기 — 히스토리 리플레이 백테스트. 선행: c6111_data.pkl. 형제 컬럼 호환: --data, --prefix CLI 인자. """ import argparse import numpy as np import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score, mean_absolute_error BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/" FEATURES = ["feed", "product", "T_C"] SMOOTH = 40 TRAIN_FRAC = 0.70 class SteamPredictor: """운전점 학습 + 밸브 역특성(flow→OP).""" def fit(self, df_train): ops = (df_train.set_index("dtat").resample("6h").median(numeric_only=True) .dropna(subset=["steam_flow", "feed"])) ops = ops[ops["feed"] > 50] self.model = GradientBoostingRegressor(n_estimators=200, max_depth=2, learning_rate=0.05, random_state=0) self.model.fit(ops[FEATURES].values, ops["steam_flow"].values) # 밸브 역특성: OP = poly(flow) (단조, 3차) self.inv = np.polyfit(df_train["steam_flow"], df_train["steam_op"], 3) return self def predict_flow(self, X): return self.model.predict(X) def flow_to_op(self, flow): return np.clip(np.polyval(self.inv, flow), 0, 100) def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", default=BASE + "C-6111_data.pkl") parser.add_argument("--prefix", default="C-6111") args = parser.parse_args() df = pd.read_pickle(args.data) df = df[df["mode"] == "PROD"].copy() df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1) & df[FEATURES + ["steam_op"]].notna().all(axis=1)].sort_values("dtat") # 입력 평활 (실제 shadow도 노이즈 평활 사용) for c in FEATURES: df[c + "_s"] = df[c].rolling(SMOOTH, min_periods=1, center=True).median() cut = df["dtat"].quantile(TRAIN_FRAC) tr, te = df[df["dtat"] <= cut], df[df["dtat"] > cut] print(f"학습 {tr.dtat.min()}~{tr.dtat.max()} ({len(tr)}) " f"shadow(held-out) {te.dtat.min()}~{te.dtat.max()} ({len(te)})") pred = SteamPredictor().fit(tr) # OOD(학습 운전envelope 밖) 게이트: 입력이 학습 1~99% 범위 밖이면 '저신뢰→오퍼레이터 폴백' lo, hi = tr[FEATURES].quantile(0.01), tr[FEATURES].quantile(0.99) print(f"학습 envelope: " + ", ".join(f"{c}[{lo[c]:.0f},{hi[c]:.1f}]" for c in FEATURES)) def in_env(d): return ((d[FEATURES] >= lo) & (d[FEATURES] <= hi)).all(axis=1) for name, d in [("학습기간", tr), ("★held-out shadow", te)]: Xs = d[[c + "_s" for c in FEATURES]].values pf = pred.predict_flow(Xs) po = pred.flow_to_op(pf) ao = d["steam_op"].values env = in_env(d).values within = np.mean(np.abs(po - ao) <= 2.0) * 100 print(f"\n[{name}] OOD(범위밖)={100*(~env).mean():.1f}%") print(f" 전체 OP MAE={mean_absolute_error(ao,po):.2f} |Δ|≤2%={within:.1f}%") if env.sum() > 50: print(f" in-envelope OP MAE={mean_absolute_error(ao[env],po[env]):.2f} " f"|Δ|≤2%={np.mean(np.abs(po[env]-ao[env])<=2)*100:.1f}% ← shadow가 신뢰구간에서 조언") d = d.assign(pred_flow=pf, pred_op=po, ood=~env) if name.startswith("★"): te = d # 플롯: held-out 시계열 오버레이 + OP 비교 + 오차분포 fig, ax = plt.subplots(3, 1, figsize=(16, 11)) s = te.iloc[::20] ax[0].plot(s.dtat, s["steam_flow"], lw=.6, label="actual steam flow") ax[0].plot(s.dtat, s["pred_flow"], lw=.6, c="r", label="predicted") ax[0].set_title("held-out shadow: steam flow actual vs predicted"); ax[0].legend(fontsize=8) ax[1].plot(s.dtat, s["steam_op"], lw=.6, label="actual operator OP") ax[1].plot(s.dtat, s["pred_op"], lw=.6, c="r", label="predicted OP") ax[1].set_ylabel("OP %"); ax[1].set_title("operator OP vs shadow-predicted OP"); ax[1].legend(fontsize=8) err = te["pred_op"] - te["steam_op"] ax[2].hist(err, bins=80); ax[2].axvline(0, c="k", lw=.5) ax[2].set_title(f"OP error (pred-actual): median {err.median():+.2f}%, std {err.std():.2f}%") fig.tight_layout(); fig.savefig(BASE + f"{args.prefix}_shadow.png", dpi=95) print(f"\n플롯 저장: {BASE}{args.prefix}_shadow.png") if __name__ == "__main__": main()