update for 4 hour rain forecast
This commit is contained in:
@@ -86,7 +86,46 @@ FEATURE_COLUMNS = BASELINE_FEATURE_COLUMNS
|
||||
|
||||
RAIN_EVENT_THRESHOLD_MM = 0.2
|
||||
RAIN_SPIKE_THRESHOLD_MM_5M = 5.0
|
||||
RAIN_HORIZON_BUCKETS = 12 # 12 * 5m = 1h
|
||||
BUCKET_MINUTES = 5
|
||||
DEFAULT_HORIZON_HOURS = 4
|
||||
SUPPORTED_PREDICTION_HORIZONS = (1, 4)
|
||||
|
||||
|
||||
def normalize_horizon_hours(horizon_hours: int) -> int:
|
||||
out = int(horizon_hours)
|
||||
if out <= 0:
|
||||
raise ValueError("horizon_hours must be > 0")
|
||||
return out
|
||||
|
||||
|
||||
def horizon_suffix(horizon_hours: int) -> str:
|
||||
return f"{normalize_horizon_hours(horizon_hours)}h"
|
||||
|
||||
|
||||
def horizon_buckets(horizon_hours: int) -> int:
|
||||
hours = normalize_horizon_hours(horizon_hours)
|
||||
return (hours * 60) // BUCKET_MINUTES
|
||||
|
||||
|
||||
def rain_last_mm_col(horizon_hours: int) -> str:
|
||||
return f"rain_last_{horizon_suffix(horizon_hours)}_mm"
|
||||
|
||||
|
||||
def rain_next_mm_col(horizon_hours: int) -> str:
|
||||
return f"rain_next_{horizon_suffix(horizon_hours)}_mm"
|
||||
|
||||
|
||||
def rain_next_flag_col(horizon_hours: int) -> str:
|
||||
return f"rain_next_{horizon_suffix(horizon_hours)}"
|
||||
|
||||
|
||||
def prediction_table_for_horizon(horizon_hours: int) -> str:
|
||||
horizon = normalize_horizon_hours(horizon_hours)
|
||||
if horizon == 1:
|
||||
return "predictions_rain_1h"
|
||||
if horizon == 4:
|
||||
return "predictions_rain_4h"
|
||||
raise ValueError(f"unsupported prediction-table horizon: {horizon_hours}")
|
||||
|
||||
|
||||
def parse_time(value: str) -> str:
|
||||
@@ -232,6 +271,7 @@ def build_dataset(
|
||||
baro: pd.DataFrame,
|
||||
forecast: pd.DataFrame | None = None,
|
||||
rain_event_threshold_mm: float = RAIN_EVENT_THRESHOLD_MM,
|
||||
horizon_hours: int = 1,
|
||||
) -> pd.DataFrame:
|
||||
if ws90.empty:
|
||||
raise RuntimeError("no ws90 observations found")
|
||||
@@ -261,12 +301,20 @@ def build_dataset(
|
||||
df["rain_inc"] = df["rain_inc_raw"].clip(lower=0)
|
||||
df["rain_spike_5m"] = df["rain_inc"] >= RAIN_SPIKE_THRESHOLD_MM_5M
|
||||
|
||||
window = RAIN_HORIZON_BUCKETS
|
||||
df["rain_last_1h_mm"] = df["rain_inc"].rolling(window=window, min_periods=1).sum()
|
||||
df["rain_next_1h_mm"] = df["rain_inc"].rolling(window=window, min_periods=1).sum().shift(-(window - 1))
|
||||
df["rain_next_1h"] = df["rain_next_1h_mm"] >= rain_event_threshold_mm
|
||||
windows: dict[int, int] = {
|
||||
1: horizon_buckets(1),
|
||||
normalize_horizon_hours(horizon_hours): horizon_buckets(horizon_hours),
|
||||
}
|
||||
for hours, window in windows.items():
|
||||
rain_last_col = rain_last_mm_col(hours)
|
||||
rain_next_mm = rain_next_mm_col(hours)
|
||||
rain_next_flag = rain_next_flag_col(hours)
|
||||
df[rain_last_col] = df["rain_inc"].rolling(window=window, min_periods=1).sum()
|
||||
df[rain_next_mm] = df["rain_inc"].rolling(window=window, min_periods=1).sum().shift(-(window - 1))
|
||||
df[rain_next_flag] = df[rain_next_mm] >= rain_event_threshold_mm
|
||||
|
||||
df["pressure_trend_1h"] = df["pressure_hpa"] - df["pressure_hpa"].shift(window)
|
||||
window_1h = horizon_buckets(1)
|
||||
df["pressure_trend_1h"] = df["pressure_hpa"] - df["pressure_hpa"].shift(window_1h)
|
||||
|
||||
# Wind direction cyclical encoding.
|
||||
radians = np.deg2rad(df["wind_dir_deg"] % 360.0)
|
||||
@@ -279,14 +327,14 @@ def build_dataset(
|
||||
df["wind_avg_lag_5m"] = df["wind_avg_m_s"].shift(1)
|
||||
df["pressure_lag_5m"] = df["pressure_hpa"].shift(1)
|
||||
|
||||
df["temp_roll_1h_mean"] = df["temperature_c"].rolling(window=window, min_periods=3).mean()
|
||||
df["temp_roll_1h_std"] = df["temperature_c"].rolling(window=window, min_periods=3).std()
|
||||
df["humidity_roll_1h_mean"] = df["humidity"].rolling(window=window, min_periods=3).mean()
|
||||
df["humidity_roll_1h_std"] = df["humidity"].rolling(window=window, min_periods=3).std()
|
||||
df["wind_avg_roll_1h_mean"] = df["wind_avg_m_s"].rolling(window=window, min_periods=3).mean()
|
||||
df["wind_gust_roll_1h_max"] = df["wind_max_m_s"].rolling(window=window, min_periods=3).max()
|
||||
df["pressure_roll_1h_mean"] = df["pressure_hpa"].rolling(window=window, min_periods=3).mean()
|
||||
df["pressure_roll_1h_std"] = df["pressure_hpa"].rolling(window=window, min_periods=3).std()
|
||||
df["temp_roll_1h_mean"] = df["temperature_c"].rolling(window=window_1h, min_periods=3).mean()
|
||||
df["temp_roll_1h_std"] = df["temperature_c"].rolling(window=window_1h, min_periods=3).std()
|
||||
df["humidity_roll_1h_mean"] = df["humidity"].rolling(window=window_1h, min_periods=3).mean()
|
||||
df["humidity_roll_1h_std"] = df["humidity"].rolling(window=window_1h, min_periods=3).std()
|
||||
df["wind_avg_roll_1h_mean"] = df["wind_avg_m_s"].rolling(window=window_1h, min_periods=3).mean()
|
||||
df["wind_gust_roll_1h_max"] = df["wind_max_m_s"].rolling(window=window_1h, min_periods=3).max()
|
||||
df["pressure_roll_1h_mean"] = df["pressure_hpa"].rolling(window=window_1h, min_periods=3).mean()
|
||||
df["pressure_roll_1h_std"] = df["pressure_hpa"].rolling(window=window_1h, min_periods=3).std()
|
||||
|
||||
# Calendar/seasonality features (UTC based).
|
||||
hour_of_day = df.index.hour + (df.index.minute / 60.0)
|
||||
@@ -304,11 +352,16 @@ def build_dataset(
|
||||
return df
|
||||
|
||||
|
||||
def model_frame(df: pd.DataFrame, feature_cols: list[str] | None = None, require_target: bool = True) -> pd.DataFrame:
|
||||
def model_frame(
|
||||
df: pd.DataFrame,
|
||||
feature_cols: list[str] | None = None,
|
||||
require_target: bool = True,
|
||||
target_col: str | None = None,
|
||||
) -> pd.DataFrame:
|
||||
features = feature_cols or FEATURE_COLUMNS
|
||||
required = list(features)
|
||||
if require_target:
|
||||
required.append("rain_next_1h")
|
||||
required.append(target_col or rain_next_flag_col(1))
|
||||
out = df.dropna(subset=required).copy()
|
||||
return out.sort_index()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user