Files
HC900-Crawler/scripts/analysis/c6111_shadow.py
windpacer 4306f76ddb feat: 형제 컬럼(6-2~10차) 분석 + SHUTDOWN + operator-assist + C# SteamAdvisor 포팅
- c6111_extract: roles_for() 동적 생성, COLUMN_EXCEPTIONS per-prefix
- c6111_prodmap/shadow/startup/rolling: --data/--prefix CLI 인자 지원
- run_column.py: 5개 컬럼 전 파이프라인 실행 래퍼
- c6111_shutdown.py: detect_cutoffs + shutdown_milestones (lookback 1200)
- c6111_operator_assist.py: OOD 게이트 + shadow 리플레이
- c6111_export_model.py: 선형근사 JSON export
- SteamAdvisor.cs: Predict+ClassifyMode+InEnvelope (NaN guard, Ood fix)
- SteamAdvisorController: GET/POST /api/steam/predict
- appsettings.json/Program.cs: DI 등록
- docs: 작업지시서 현황 갱신, 진단보고서 작성 (3 MED/8 LOW, 100% 정확도)
2026-06-05 19:46:57 +09:00

101 lines
4.4 KiB
Python

"""
Shadow 예측기 — 히스토리 리플레이 백테스트.
선행: c6111_data.pkl. 형제 컬럼 호환: --data, --prefix CLI 인자.
"""
import argparse
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error
BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/"
FEATURES = ["feed", "product", "T_C"]
SMOOTH = 40
TRAIN_FRAC = 0.70
class SteamPredictor:
"""운전점 학습 + 밸브 역특성(flow→OP)."""
def fit(self, df_train):
ops = (df_train.set_index("dtat").resample("6h").median(numeric_only=True)
.dropna(subset=["steam_flow", "feed"]))
ops = ops[ops["feed"] > 50]
self.model = GradientBoostingRegressor(n_estimators=200, max_depth=2,
learning_rate=0.05, random_state=0)
self.model.fit(ops[FEATURES].values, ops["steam_flow"].values)
# 밸브 역특성: OP = poly(flow) (단조, 3차)
self.inv = np.polyfit(df_train["steam_flow"], df_train["steam_op"], 3)
return self
def predict_flow(self, X):
return self.model.predict(X)
def flow_to_op(self, flow):
return np.clip(np.polyval(self.inv, flow), 0, 100)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data", default=BASE + "c6111_data.pkl")
parser.add_argument("--prefix", default="c6111")
args = parser.parse_args()
df = pd.read_pickle(args.data)
df = df[df["mode"] == "PROD"].copy()
df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1)
& df[FEATURES + ["steam_op"]].notna().all(axis=1)].sort_values("dtat")
# 입력 평활 (실제 shadow도 노이즈 평활 사용)
for c in FEATURES:
df[c + "_s"] = df[c].rolling(SMOOTH, min_periods=1, center=True).median()
cut = df["dtat"].quantile(TRAIN_FRAC)
tr, te = df[df["dtat"] <= cut], df[df["dtat"] > cut]
print(f"학습 {tr.dtat.min()}~{tr.dtat.max()} ({len(tr)}) "
f"shadow(held-out) {te.dtat.min()}~{te.dtat.max()} ({len(te)})")
pred = SteamPredictor().fit(tr)
# OOD(학습 운전envelope 밖) 게이트: 입력이 학습 1~99% 범위 밖이면 '저신뢰→오퍼레이터 폴백'
lo, hi = tr[FEATURES].quantile(0.01), tr[FEATURES].quantile(0.99)
print(f"학습 envelope: " + ", ".join(f"{c}[{lo[c]:.0f},{hi[c]:.1f}]" for c in FEATURES))
def in_env(d):
return ((d[FEATURES] >= lo) & (d[FEATURES] <= hi)).all(axis=1)
for name, d in [("학습기간", tr), ("★held-out shadow", te)]:
Xs = d[[c + "_s" for c in FEATURES]].values
pf = pred.predict_flow(Xs)
po = pred.flow_to_op(pf)
ao = d["steam_op"].values
env = in_env(d).values
within = np.mean(np.abs(po - ao) <= 2.0) * 100
print(f"\n[{name}] OOD(범위밖)={100*(~env).mean():.1f}%")
print(f" 전체 OP MAE={mean_absolute_error(ao,po):.2f} |Δ|≤2%={within:.1f}%")
if env.sum() > 50:
print(f" in-envelope OP MAE={mean_absolute_error(ao[env],po[env]):.2f} "
f"|Δ|≤2%={np.mean(np.abs(po[env]-ao[env])<=2)*100:.1f}% ← shadow가 신뢰구간에서 조언")
d = d.assign(pred_flow=pf, pred_op=po, ood=~env)
if name.startswith(""):
te = d
# 플롯: held-out 시계열 오버레이 + OP 비교 + 오차분포
fig, ax = plt.subplots(3, 1, figsize=(16, 11))
s = te.iloc[::20]
ax[0].plot(s.dtat, s["steam_flow"], lw=.6, label="actual steam flow")
ax[0].plot(s.dtat, s["pred_flow"], lw=.6, c="r", label="predicted")
ax[0].set_title("held-out shadow: steam flow actual vs predicted"); ax[0].legend(fontsize=8)
ax[1].plot(s.dtat, s["steam_op"], lw=.6, label="actual operator OP")
ax[1].plot(s.dtat, s["pred_op"], lw=.6, c="r", label="predicted OP")
ax[1].set_ylabel("OP %"); ax[1].set_title("operator OP vs shadow-predicted OP"); ax[1].legend(fontsize=8)
err = te["pred_op"] - te["steam_op"]
ax[2].hist(err, bins=80); ax[2].axvline(0, c="k", lw=.5)
ax[2].set_title(f"OP error (pred-actual): median {err.median():+.2f}%, std {err.std():.2f}%")
fig.tight_layout(); fig.savefig(BASE + f"{args.prefix}_shadow.png", dpi=95)
print(f"\n플롯 저장: {BASE}{args.prefix}_shadow.png")
if __name__ == "__main__":
main()