update for 4 hour rain forecast
This commit is contained in:
@@ -10,6 +10,7 @@ import psycopg2
|
||||
|
||||
from rain_model_common import (
|
||||
AVAILABLE_FEATURE_SETS,
|
||||
DEFAULT_HORIZON_HOURS,
|
||||
RAIN_EVENT_THRESHOLD_MM,
|
||||
build_dataset,
|
||||
feature_columns_for_set,
|
||||
@@ -17,8 +18,11 @@ from rain_model_common import (
|
||||
fetch_baro,
|
||||
fetch_forecast,
|
||||
fetch_ws90,
|
||||
normalize_horizon_hours,
|
||||
model_frame,
|
||||
parse_time,
|
||||
rain_next_flag_col,
|
||||
rain_next_mm_col,
|
||||
to_builtin,
|
||||
)
|
||||
|
||||
@@ -29,6 +33,12 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument("--site", required=True, help="Site name (e.g. home).")
|
||||
parser.add_argument("--start", help="Start time (RFC3339 or YYYY-MM-DD).")
|
||||
parser.add_argument("--end", help="End time (RFC3339 or YYYY-MM-DD).")
|
||||
parser.add_argument(
|
||||
"--horizon-hours",
|
||||
type=int,
|
||||
default=DEFAULT_HORIZON_HOURS,
|
||||
help="Prediction horizon in hours for target/label auditing.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--feature-set",
|
||||
default="baseline",
|
||||
@@ -57,13 +67,13 @@ def longest_zero_run(counts: np.ndarray) -> int:
|
||||
return best
|
||||
|
||||
|
||||
def build_weekly_balance(model_df):
|
||||
def build_weekly_balance(model_df, target_col: str):
|
||||
weekly = model_df.copy()
|
||||
iso = weekly.index.to_series().dt.isocalendar()
|
||||
weekly["year_week"] = iso["year"].astype(str) + "-W" + iso["week"].astype(str).str.zfill(2)
|
||||
|
||||
grouped = (
|
||||
weekly.groupby("year_week")["rain_next_1h"]
|
||||
weekly.groupby("year_week")[target_col]
|
||||
.agg(total_rows="count", positive_rows="sum")
|
||||
.reset_index()
|
||||
.sort_values("year_week")
|
||||
@@ -79,6 +89,9 @@ def main() -> int:
|
||||
|
||||
start = parse_time(args.start) if args.start else ""
|
||||
end = parse_time(args.end) if args.end else ""
|
||||
horizon_hours = normalize_horizon_hours(args.horizon_hours)
|
||||
target_col = rain_next_flag_col(horizon_hours)
|
||||
target_mm_col = rain_next_mm_col(horizon_hours)
|
||||
feature_cols = feature_columns_for_set(args.feature_set)
|
||||
needs_forecast = feature_columns_need_forecast(feature_cols)
|
||||
|
||||
@@ -89,8 +102,14 @@ def main() -> int:
|
||||
if needs_forecast:
|
||||
forecast = fetch_forecast(conn, args.site, start, end, model=args.forecast_model)
|
||||
|
||||
df = build_dataset(ws90, baro, forecast=forecast, rain_event_threshold_mm=RAIN_EVENT_THRESHOLD_MM)
|
||||
model_df = model_frame(df, feature_cols, require_target=True)
|
||||
df = build_dataset(
|
||||
ws90,
|
||||
baro,
|
||||
forecast=forecast,
|
||||
rain_event_threshold_mm=RAIN_EVENT_THRESHOLD_MM,
|
||||
horizon_hours=horizon_hours,
|
||||
)
|
||||
model_df = model_frame(df, feature_cols, require_target=True, target_col=target_col)
|
||||
|
||||
ws90_dupes = int(ws90.duplicated(subset=["ts", "station_id"]).sum()) if not ws90.empty else 0
|
||||
baro_dupes = int(baro.duplicated(subset=["ts", "source"]).sum()) if not baro.empty else 0
|
||||
@@ -114,7 +133,7 @@ def main() -> int:
|
||||
baro_max_gap_min = longest_zero_run(np.array(baro_counts)) * 5 if len(baro_counts) else 0
|
||||
|
||||
missingness = {}
|
||||
for col in feature_cols + ["pressure_hpa", "rain_mm", "rain_inc", "rain_next_1h_mm"]:
|
||||
for col in feature_cols + ["pressure_hpa", "rain_mm", "rain_inc", target_mm_col]:
|
||||
if col in df.columns:
|
||||
missingness[col] = float(df[col].isna().mean())
|
||||
|
||||
@@ -125,9 +144,12 @@ def main() -> int:
|
||||
report = {
|
||||
"site": args.site,
|
||||
"feature_set": args.feature_set,
|
||||
"horizon_hours": horizon_hours,
|
||||
"target_column": target_col,
|
||||
"target_mm_column": target_mm_col,
|
||||
"feature_columns": feature_cols,
|
||||
"forecast_model": args.forecast_model if needs_forecast else None,
|
||||
"target_definition": f"rain_next_1h_mm >= {RAIN_EVENT_THRESHOLD_MM:.2f}",
|
||||
"target_definition": f"{target_mm_col} >= {RAIN_EVENT_THRESHOLD_MM:.2f}",
|
||||
"requested_window": {
|
||||
"start": start or None,
|
||||
"end": end or None,
|
||||
@@ -167,8 +189,8 @@ def main() -> int:
|
||||
"max_rain_increment_5m_mm": max_rain_inc,
|
||||
},
|
||||
"class_balance": {
|
||||
"overall_positive_rate": float(model_df["rain_next_1h"].mean()) if not model_df.empty else None,
|
||||
"weekly": build_weekly_balance(model_df) if not model_df.empty else [],
|
||||
"overall_positive_rate": float(model_df[target_col].mean()) if not model_df.empty else None,
|
||||
"weekly": build_weekly_balance(model_df, target_col=target_col) if not model_df.empty else [],
|
||||
},
|
||||
}
|
||||
report = to_builtin(report)
|
||||
|
||||
Reference in New Issue
Block a user