Files
HC900-Crawler/scripts/analysis/c6111_prodmap.py
windpacer 4306f76ddb feat: 형제 컬럼(6-2~10차) 분석 + SHUTDOWN + operator-assist + C# SteamAdvisor 포팅
- c6111_extract: roles_for() 동적 생성, COLUMN_EXCEPTIONS per-prefix
- c6111_prodmap/shadow/startup/rolling: --data/--prefix CLI 인자 지원
- run_column.py: 5개 컬럼 전 파이프라인 실행 래퍼
- c6111_shutdown.py: detect_cutoffs + shutdown_milestones (lookback 1200)
- c6111_operator_assist.py: OOD 게이트 + shadow 리플레이
- c6111_export_model.py: 선형근사 JSON export
- SteamAdvisor.cs: Predict+ClassifyMode+InEnvelope (NaN guard, Ood fix)
- SteamAdvisorController: GET/POST /api/steam/predict
- appsettings.json/Program.cs: DI 등록
- docs: 작업지시서 현황 갱신, 진단보고서 작성 (3 MED/8 LOW, 100% 정확도)
2026-06-05 19:46:57 +09:00

136 lines
6.1 KiB
Python

"""
① 생산 정상상태 맵.
PROD 구간에서 밸브특성 + 스팀유량 회귀.
선행: c6111_extract.py 가 만든 c6111_data.pkl (mode 컬럼 포함).
형제 컬럼 호환: --data, --prefix CLI 인자.
"""
import argparse
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error
BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/"
TARGET = "steam_flow"
FEATURES = ["feed", "product", "vacuum", "feed_preheat", "T_C", "T_D"]
OP_RESAMPLE = "6h"
def load(data_path=None):
if data_path is None:
data_path = BASE + "c6111_data.pkl"
df = pd.read_pickle(data_path)
df = df[df["mode"] == "PROD"].copy()
# 엔지니어링 피처: 온도 구배(분리도)
df["dT_AC"] = df["reb_temp"] - df["T_C"]
df["dT_CD"] = df["T_C"] - df["T_D"]
# 기본 정합성: 유량/유효범위 (센서 음수노이즈·결측 제거)
df = df[(df["feed"] > 50) & (df["steam_flow"] > 10) & (df["steam_op"] > 1)
& df[FEATURES + [TARGET, "steam_op"]].notna().all(axis=1)]
return df.sort_values("dtat").reset_index(drop=True)
def valve_char(df):
"""OP(밸브%) ↔ 스팀유량(FIQ-6115) 특성."""
op, fl = df["steam_op"].values, df["steam_flow"].values
# 선형게인
a = np.polyfit(op, fl, 1)
# 상승/하강 방향별(히스테리시스 ~ stiction 신호): OP 변화방향으로 분리
dop = np.diff(df["steam_op"].values, prepend=df["steam_op"].values[0])
up, dn = dop > 0.05, dop < -0.05
# OP 빈(bin)별 유량 평균 — 같은 OP에서 상승/하강 유량차 = 히스테리시스
bins = np.arange(np.floor(op.min()), np.ceil(op.max()) + 1, 1.0)
rows = []
for lo, hi in zip(bins[:-1], bins[1:]):
m = (op >= lo) & (op < hi)
if m.sum() < 20:
continue
fu = fl[m & up].mean() if (m & up).sum() > 5 else np.nan
fd = fl[m & dn].mean() if (m & dn).sum() > 5 else np.nan
rows.append((lo + .5, fl[m].mean(), fu, fd, m.sum()))
hb = pd.DataFrame(rows, columns=["op", "flow", "flow_up", "flow_dn", "n"])
hyst = (hb["flow_dn"] - hb["flow_up"]).abs().mean()
print(f"[밸브] 선형 flow ≈ {a[0]:.1f}·OP + {a[1]:.1f} "
f"(OP {op.min():.0f}~{op.max():.0f}%, flow {fl.min():.0f}~{fl.max():.0f})")
print(f"[밸브] 상승/하강 평균 유량차(히스테리시스≈stiction) = {hyst:.1f} "
f"(유량 스팬의 {100*hyst/(fl.max()-fl.min()):.1f}%)")
return hb, a
def regress(df):
from sklearn.model_selection import train_test_split
# 운전점 집계: 정상상태 내부 변동이 거의 없어(98% steady) 점단위 학습 불가.
# 6h 중앙값 = 캠페인/로드레벨 단위 운전점 → 진짜 f(부하) 신호.
ops = (df.set_index("dtat").resample(OP_RESAMPLE).median(numeric_only=True)
.dropna(subset=[TARGET, "feed"]))
ops = ops[ops["feed"] > 50]
print(f"\n[운전점] PROD {len(df)}행 → {OP_RESAMPLE} 운전점 {len(ops)}")
X, y = ops[FEATURES].values, ops[TARGET].values
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=.3, random_state=0)
# 베이스라인: 피드만 (steam/feed 비율제어가 얼마나 설명?)
lb = LinearRegression().fit(Xtr[:, :1], ytr)
r2_feed = r2_score(yte, lb.predict(Xte[:, :1]))
sc = StandardScaler().fit(Xtr)
lin = LinearRegression().fit(sc.transform(Xtr), ytr)
gbm = GradientBoostingRegressor(n_estimators=200, max_depth=2,
learning_rate=0.05, random_state=0).fit(Xtr, ytr)
span = y.max() - y.min()
for name, pred in [("Linear", lin.predict(sc.transform(Xte))),
("GBM", gbm.predict(Xte))]:
print(f"[모델 {name:7s}] test R²(FIT)={r2_score(yte,pred):.3f} "
f"MAE={mean_absolute_error(yte,pred):.1f} (스팬의 {100*mean_absolute_error(yte,pred)/span:.1f}%)")
print(f"[베이스라인 피드단독] test R²={r2_feed:.3f} "
f"steam/feed비 중앙값={(ops[TARGET]/ops['feed']).median():.3f}")
print("\n[피처 중요도]")
coef = pd.Series(lin.coef_, index=FEATURES) # 표준화 → 상대중요도
imp = pd.Series(gbm.feature_importances_, index=FEATURES)
tbl = pd.DataFrame({"lin_std계수": coef.round(1),
"GBM중요도": imp.round(3)}).sort_values("GBM중요도", ascending=False)
print(tbl.to_string())
return ops, gbm, Xte, yte, gbm.predict(Xte), imp
def plots(hb, ops, yte, pred, imp, prefix="c6111"):
fig, ax = plt.subplots(1, 4, figsize=(22, 5))
ax[0].scatter(hb["op"], hb["flow"], s=20, c="k", label="mean")
ax[0].plot(hb["op"], hb["flow_up"], "b.-", ms=4, label="OP rising")
ax[0].plot(hb["op"], hb["flow_dn"], "r.-", ms=4, label="OP falling")
ax[0].set_xlabel("steam OP %"); ax[0].set_ylabel("steam flow")
ax[0].set_title("Valve char (hysteresis=stiction)"); ax[0].legend()
ax[1].scatter(ops["feed"], ops[TARGET], s=10, alpha=.5)
ax[1].set_xlabel("feed"); ax[1].set_ylabel("steam flow")
ax[1].set_title("steam vs feed (operating points)")
ax[2].scatter(yte, pred, s=12, alpha=.5)
lim = [min(yte.min(), pred.min()), max(yte.max(), pred.max())]
ax[2].plot(lim, lim, "r--"); ax[2].set_xlabel("actual steam flow")
ax[2].set_ylabel("predicted (GBM)"); ax[2].set_title("Predicted vs Actual (test ops)")
imp.sort_values().plot.barh(ax=ax[3]); ax[3].set_title("GBM feature importance")
fig.tight_layout(); fig.savefig(BASE + f"{prefix}_prodmap.png", dpi=95)
print(f"\n플롯 저장: {BASE}{prefix}_prodmap.png")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data", default=BASE + "c6111_data.pkl")
parser.add_argument("--prefix", default="c6111")
args = parser.parse_args()
df = load(args.data)
print(f"PROD 정합데이터 {len(df)}")
hb, a = valve_char(df)
ops, gbm, Xte, yte, pred, imp = regress(df)
plots(hb, ops, yte, pred, imp, args.prefix)
if __name__ == "__main__":
main()