update for 4 hour rain forecast

This commit is contained in:
2026-04-06 18:32:33 +10:00
parent fb50c8ed71
commit 3a7309b2cf
20 changed files with 716 additions and 132 deletions
+69 -16
View File
@@ -86,7 +86,46 @@ FEATURE_COLUMNS = BASELINE_FEATURE_COLUMNS
RAIN_EVENT_THRESHOLD_MM = 0.2
RAIN_SPIKE_THRESHOLD_MM_5M = 5.0
RAIN_HORIZON_BUCKETS = 12 # 12 * 5m = 1h
BUCKET_MINUTES = 5
DEFAULT_HORIZON_HOURS = 4
SUPPORTED_PREDICTION_HORIZONS = (1, 4)
def normalize_horizon_hours(horizon_hours: int) -> int:
out = int(horizon_hours)
if out <= 0:
raise ValueError("horizon_hours must be > 0")
return out
def horizon_suffix(horizon_hours: int) -> str:
return f"{normalize_horizon_hours(horizon_hours)}h"
def horizon_buckets(horizon_hours: int) -> int:
hours = normalize_horizon_hours(horizon_hours)
return (hours * 60) // BUCKET_MINUTES
def rain_last_mm_col(horizon_hours: int) -> str:
return f"rain_last_{horizon_suffix(horizon_hours)}_mm"
def rain_next_mm_col(horizon_hours: int) -> str:
return f"rain_next_{horizon_suffix(horizon_hours)}_mm"
def rain_next_flag_col(horizon_hours: int) -> str:
return f"rain_next_{horizon_suffix(horizon_hours)}"
def prediction_table_for_horizon(horizon_hours: int) -> str:
horizon = normalize_horizon_hours(horizon_hours)
if horizon == 1:
return "predictions_rain_1h"
if horizon == 4:
return "predictions_rain_4h"
raise ValueError(f"unsupported prediction-table horizon: {horizon_hours}")
def parse_time(value: str) -> str:
@@ -232,6 +271,7 @@ def build_dataset(
baro: pd.DataFrame,
forecast: pd.DataFrame | None = None,
rain_event_threshold_mm: float = RAIN_EVENT_THRESHOLD_MM,
horizon_hours: int = 1,
) -> pd.DataFrame:
if ws90.empty:
raise RuntimeError("no ws90 observations found")
@@ -261,12 +301,20 @@ def build_dataset(
df["rain_inc"] = df["rain_inc_raw"].clip(lower=0)
df["rain_spike_5m"] = df["rain_inc"] >= RAIN_SPIKE_THRESHOLD_MM_5M
window = RAIN_HORIZON_BUCKETS
df["rain_last_1h_mm"] = df["rain_inc"].rolling(window=window, min_periods=1).sum()
df["rain_next_1h_mm"] = df["rain_inc"].rolling(window=window, min_periods=1).sum().shift(-(window - 1))
df["rain_next_1h"] = df["rain_next_1h_mm"] >= rain_event_threshold_mm
windows: dict[int, int] = {
1: horizon_buckets(1),
normalize_horizon_hours(horizon_hours): horizon_buckets(horizon_hours),
}
for hours, window in windows.items():
rain_last_col = rain_last_mm_col(hours)
rain_next_mm = rain_next_mm_col(hours)
rain_next_flag = rain_next_flag_col(hours)
df[rain_last_col] = df["rain_inc"].rolling(window=window, min_periods=1).sum()
df[rain_next_mm] = df["rain_inc"].rolling(window=window, min_periods=1).sum().shift(-(window - 1))
df[rain_next_flag] = df[rain_next_mm] >= rain_event_threshold_mm
df["pressure_trend_1h"] = df["pressure_hpa"] - df["pressure_hpa"].shift(window)
window_1h = horizon_buckets(1)
df["pressure_trend_1h"] = df["pressure_hpa"] - df["pressure_hpa"].shift(window_1h)
# Wind direction cyclical encoding.
radians = np.deg2rad(df["wind_dir_deg"] % 360.0)
@@ -279,14 +327,14 @@ def build_dataset(
df["wind_avg_lag_5m"] = df["wind_avg_m_s"].shift(1)
df["pressure_lag_5m"] = df["pressure_hpa"].shift(1)
df["temp_roll_1h_mean"] = df["temperature_c"].rolling(window=window, min_periods=3).mean()
df["temp_roll_1h_std"] = df["temperature_c"].rolling(window=window, min_periods=3).std()
df["humidity_roll_1h_mean"] = df["humidity"].rolling(window=window, min_periods=3).mean()
df["humidity_roll_1h_std"] = df["humidity"].rolling(window=window, min_periods=3).std()
df["wind_avg_roll_1h_mean"] = df["wind_avg_m_s"].rolling(window=window, min_periods=3).mean()
df["wind_gust_roll_1h_max"] = df["wind_max_m_s"].rolling(window=window, min_periods=3).max()
df["pressure_roll_1h_mean"] = df["pressure_hpa"].rolling(window=window, min_periods=3).mean()
df["pressure_roll_1h_std"] = df["pressure_hpa"].rolling(window=window, min_periods=3).std()
df["temp_roll_1h_mean"] = df["temperature_c"].rolling(window=window_1h, min_periods=3).mean()
df["temp_roll_1h_std"] = df["temperature_c"].rolling(window=window_1h, min_periods=3).std()
df["humidity_roll_1h_mean"] = df["humidity"].rolling(window=window_1h, min_periods=3).mean()
df["humidity_roll_1h_std"] = df["humidity"].rolling(window=window_1h, min_periods=3).std()
df["wind_avg_roll_1h_mean"] = df["wind_avg_m_s"].rolling(window=window_1h, min_periods=3).mean()
df["wind_gust_roll_1h_max"] = df["wind_max_m_s"].rolling(window=window_1h, min_periods=3).max()
df["pressure_roll_1h_mean"] = df["pressure_hpa"].rolling(window=window_1h, min_periods=3).mean()
df["pressure_roll_1h_std"] = df["pressure_hpa"].rolling(window=window_1h, min_periods=3).std()
# Calendar/seasonality features (UTC based).
hour_of_day = df.index.hour + (df.index.minute / 60.0)
@@ -304,11 +352,16 @@ def build_dataset(
return df
def model_frame(df: pd.DataFrame, feature_cols: list[str] | None = None, require_target: bool = True) -> pd.DataFrame:
def model_frame(
df: pd.DataFrame,
feature_cols: list[str] | None = None,
require_target: bool = True,
target_col: str | None = None,
) -> pd.DataFrame:
features = feature_cols or FEATURE_COLUMNS
required = list(features)
if require_target:
required.append("rain_next_1h")
required.append(target_col or rain_next_flag_col(1))
out = df.dropna(subset=required).copy()
return out.sort_index()