Files
HC900-Crawler/scripts/analysis/c6111_export_model.py
windpacer 4306f76ddb feat: 형제 컬럼(6-2~10차) 분석 + SHUTDOWN + operator-assist + C# SteamAdvisor 포팅
- c6111_extract: roles_for() 동적 생성, COLUMN_EXCEPTIONS per-prefix
- c6111_prodmap/shadow/startup/rolling: --data/--prefix CLI 인자 지원
- run_column.py: 5개 컬럼 전 파이프라인 실행 래퍼
- c6111_shutdown.py: detect_cutoffs + shutdown_milestones (lookback 1200)
- c6111_operator_assist.py: OOD 게이트 + shadow 리플레이
- c6111_export_model.py: 선형근사 JSON export
- SteamAdvisor.cs: Predict+ClassifyMode+InEnvelope (NaN guard, Ood fix)
- SteamAdvisorController: GET/POST /api/steam/predict
- appsettings.json/Program.cs: DI 등록
- docs: 작업지시서 현황 갱신, 진단보고서 작성 (3 MED/8 LOW, 100% 정확도)
2026-06-05 19:46:57 +09:00

81 lines
2.8 KiB
Python

"""
모델 JSON export → C# SteamAdvisor에서 로드.
선형근사(1안): GBM 대신 LinearRegression 계수 export.
steam = w0 + w1*feed + w2*product + w3*T_C
valve_inv(flow) = poly3 → OP
사용법:
python3 c6111_export_model.py --data c6111_data.pkl --prefix c6111
"""
import argparse
import json
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
BASE = "/home/windpacer/projects/hc900_ax/scripts/analysis/"
FEATURES = ["feed", "product", "T_C"]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data", default=BASE + "c6111_data.pkl")
parser.add_argument("--prefix", default="c6111")
parser.add_argument("--output", help="JSON 출력 경로 (기본: scripts/analysis/{prefix}_model.json)")
args = parser.parse_args()
df = pd.read_pickle(args.data)
prod = df[df["mode"] == "PROD"].copy()
prod = prod[(prod["feed"] > 50) & (prod["steam_flow"] > 10) & (prod["steam_op"] > 1)]
prod = prod.dropna(subset=FEATURES + ["steam_op", "steam_flow"])
ops = (prod.set_index("dtat").resample("6h").median(numeric_only=True)
.dropna(subset=["steam_flow", "feed"]))
ops = ops[ops["feed"] > 50]
# 선형 모델
lr = LinearRegression()
lr.fit(ops[FEATURES].values, ops["steam_flow"].values)
r2 = lr.score(ops[FEATURES].values, ops["steam_flow"].values)
print(f"선형 steam_flow R² = {r2:.4f} (GBM 대비 비교용)")
# 밸브 역특성: steam_flow → steam_op (3차)
vp = np.polyfit(prod["steam_flow"], prod["steam_op"], 3)
# Envelope (1%, 99%)
lo = ops[FEATURES].quantile(0.01)
hi = ops[FEATURES].quantile(0.99)
# GBM feature importance (참고용)
try:
from sklearn.ensemble import GradientBoostingRegressor
gbm = GradientBoostingRegressor(n_estimators=200, max_depth=2,
learning_rate=0.05, random_state=0)
gbm.fit(ops[FEATURES].values, ops["steam_flow"].values)
gbm_r2 = gbm.score(ops[FEATURES].values, ops["steam_flow"].values)
except Exception:
gbm_r2 = None
model = {
"column": args.prefix,
"features": FEATURES,
"linear_coeffs": lr.coef_.tolist(),
"intercept": lr.intercept_,
"linear_r2": round(r2, 4),
"gbm_r2": round(gbm_r2, 4) if gbm_r2 else None,
"valve_poly": vp.tolist(),
"envelope_lo": {c: round(float(lo[c]), 1) for c in FEATURES},
"envelope_hi": {c: round(float(hi[c]), 1) for c in FEATURES},
"n_operating_points": len(ops),
"n_prod_rows": len(prod),
}
out = args.output or (BASE + f"{args.prefix}_model.json")
with open(out, "w") as f:
json.dump(model, f, indent=2)
print(f"\n모델 export: {out}")
print(json.dumps(model, indent=2))
if __name__ == "__main__":
main()