""" 롤링(walk-forward) 재학습 — OOD/외삽 바이어스 해소 데모. 형제 컬럼 호환: --data, --prefix CLI 인자. """ import argparse import numpy as np import pandas as pd import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from sklearn.metrics import mean_absolute_error from c6111_shadow import SteamPredictor, FEATURES, BASE, SMOOTH HELDOUT_START = "2026-05-01" RETRAIN_EVERY = "1D" def main(): parser = argparse.ArgumentParser() parser.add_argument("--data", default=BASE + "C-6111_data.pkl") parser.add_argument("--prefix", default="C-6111") args = parser.parse_args() df = pd.read_pickle(args.data) df = df[df["mode"] == "PROD"].copy() df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1) & df[FEATURES + ["steam_op"]].notna().all(axis=1)].sort_values("dtat") # 인과(trailing) 평활 — 미래누설 없음 for c in FEATURES: df[c + "_s"] = df[c].rolling(SMOOTH, min_periods=1).median() ho = pd.Timestamp(HELDOUT_START) if df["dtat"].max() < ho: print(f"데이터 종료 {df.dtat.max()} < HELDOUT_START({ho}) — 롤링 재학습 불가. (컬럼 가동기간이 5월 이전)") return days = pd.date_range(ho, df["dtat"].max(), freq=RETRAIN_EVERY) # 정적 모델: 5월 이전 전체로 1회 학습 static = SteamPredictor().fit(df[df["dtat"] < ho]) slo, shi = (df[df["dtat"] < ho][FEATURES].quantile(0.01), df[df["dtat"] < ho][FEATURES].quantile(0.99)) rows = [] for d0, d1 in zip(days[:-1], days[1:]): day = df[(df["dtat"] >= d0) & (df["dtat"] < d1)] if len(day) < 30: continue train = df[df["dtat"] < d0] # expanding: 그 날 이전 전체 roll = SteamPredictor().fit(train) lo, hi = train[FEATURES].quantile(0.01), train[FEATURES].quantile(0.99) Xs = day[[c + "_s" for c in FEATURES]].values ao = day["steam_op"].values po_r = roll.flow_to_op(roll.predict_flow(Xs)) po_s = static.flow_to_op(static.predict_flow(Xs)) ood_r = (~((day[FEATURES] >= lo) & (day[FEATURES] <= hi)).all(axis=1)).mean() rows.append(dict(day=d0, mae_roll=mean_absolute_error(ao, po_r), mae_static=mean_absolute_error(ao, po_s), w2_roll=np.mean(np.abs(po_r - ao) <= 2) * 100, w2_static=np.mean(np.abs(po_s - ao) <= 2) * 100, ood_roll=ood_r * 100)) r = pd.DataFrame(rows) print(f"=== 5월 held-out, 일별 walk-forward 재학습 ({len(r)}일) ===") print(f"정적 모델 : OP MAE {r.mae_static.mean():.2f}% |Δ|≤2% {r.w2_static.mean():.1f}%") print(f"롤링 모델 : OP MAE {r.mae_roll.mean():.2f}% |Δ|≤2% {r.w2_roll.mean():.1f}%") print(f"롤링 OOD 비율: 첫주 {r.head(7).ood_roll.mean():.0f}% → 마지막주 {r.tail(7).ood_roll.mean():.0f}%") print("\n일별(요약):") print(r[["day", "mae_static", "mae_roll", "w2_roll", "ood_roll"]] .assign(day=r.day.dt.strftime("%m-%d")).round(1).to_string(index=False)) fig, ax = plt.subplots(2, 1, figsize=(14, 8), sharex=True) ax[0].plot(r.day, r.mae_static, "r.-", label="static (Feb-Apr model)") ax[0].plot(r.day, r.mae_roll, "g.-", label="rolling retrain") ax[0].axhline(2, color="gray", ls=":", label="2% 허용") ax[0].set_ylabel("OP MAE %"); ax[0].legend(); ax[0].set_title("Rolling vs static — adaptation over May") ax[1].plot(r.day, r.ood_roll, "b.-"); ax[1].set_ylabel("rolling OOD %") ax[1].set_title("OOD fraction (학습 envelope 밖) — 5월 데이터 흡수하며 감소") fig.tight_layout(); fig.savefig(BASE + f"{args.prefix}_rolling.png", dpi=95) print(f"\n플롯 저장: {BASE}{args.prefix}_rolling.png") if __name__ == "__main__": main()