another bugfix
This commit is contained in:
@@ -36,6 +36,16 @@ FORECAST_FEATURE_COLUMNS = [
|
||||
"fc_cloud_cover",
|
||||
]
|
||||
|
||||
CALENDAR_FEATURE_COLUMNS = [
|
||||
"hour_sin",
|
||||
"hour_cos",
|
||||
"dow_sin",
|
||||
"dow_cos",
|
||||
"month_sin",
|
||||
"month_cos",
|
||||
"is_weekend",
|
||||
]
|
||||
|
||||
EXTENDED_FEATURE_COLUMNS = [
|
||||
"pressure_trend_1h",
|
||||
"temperature_c",
|
||||
@@ -60,9 +70,15 @@ EXTENDED_FEATURE_COLUMNS = [
|
||||
*FORECAST_FEATURE_COLUMNS,
|
||||
]
|
||||
|
||||
EXTENDED_CALENDAR_FEATURE_COLUMNS = [
|
||||
*EXTENDED_FEATURE_COLUMNS,
|
||||
*CALENDAR_FEATURE_COLUMNS,
|
||||
]
|
||||
|
||||
FEATURE_SETS: dict[str, list[str]] = {
|
||||
"baseline": BASELINE_FEATURE_COLUMNS,
|
||||
"extended": EXTENDED_FEATURE_COLUMNS,
|
||||
"extended_calendar": EXTENDED_CALENDAR_FEATURE_COLUMNS,
|
||||
}
|
||||
|
||||
AVAILABLE_FEATURE_SETS = tuple(sorted(FEATURE_SETS.keys()))
|
||||
@@ -116,8 +132,8 @@ def fetch_ws90(conn, site: str, start: str, end: str) -> pd.DataFrame:
|
||||
SELECT ts, station_id, received_at, temperature_c, humidity, wind_avg_m_s, wind_max_m_s, wind_dir_deg, rain_mm
|
||||
FROM observations_ws90
|
||||
WHERE site = %s
|
||||
AND (%s = '' OR ts >= %s::timestamptz)
|
||||
AND (%s = '' OR ts <= %s::timestamptz)
|
||||
AND (%s = '' OR ts >= NULLIF(%s, '')::timestamptz)
|
||||
AND (%s = '' OR ts <= NULLIF(%s, '')::timestamptz)
|
||||
ORDER BY ts ASC
|
||||
"""
|
||||
return _fetch_df(conn, sql, (site, start, start, end, end), ["ts", "received_at"])
|
||||
@@ -128,8 +144,8 @@ def fetch_baro(conn, site: str, start: str, end: str) -> pd.DataFrame:
|
||||
SELECT ts, source, received_at, pressure_hpa
|
||||
FROM observations_baro
|
||||
WHERE site = %s
|
||||
AND (%s = '' OR ts >= %s::timestamptz)
|
||||
AND (%s = '' OR ts <= %s::timestamptz)
|
||||
AND (%s = '' OR ts >= NULLIF(%s, '')::timestamptz)
|
||||
AND (%s = '' OR ts <= NULLIF(%s, '')::timestamptz)
|
||||
ORDER BY ts ASC
|
||||
"""
|
||||
return _fetch_df(conn, sql, (site, start, start, end, end), ["ts", "received_at"])
|
||||
@@ -151,8 +167,8 @@ def fetch_forecast(conn, site: str, start: str, end: str, model: str = "ecmwf")
|
||||
FROM forecast_openmeteo_hourly
|
||||
WHERE site = %s
|
||||
AND model = %s
|
||||
AND (%s = '' OR ts >= %s::timestamptz - INTERVAL '2 hours')
|
||||
AND (%s = '' OR ts <= %s::timestamptz + INTERVAL '2 hours')
|
||||
AND (%s = '' OR ts >= NULLIF(%s, '')::timestamptz - INTERVAL '2 hours')
|
||||
AND (%s = '' OR ts <= NULLIF(%s, '')::timestamptz + INTERVAL '2 hours')
|
||||
ORDER BY ts ASC, retrieved_at DESC
|
||||
"""
|
||||
return _fetch_df(conn, sql, (site, model, start, start, end, end), ["ts", "retrieved_at"])
|
||||
@@ -199,6 +215,15 @@ def _apply_forecast_features(df: pd.DataFrame, forecast: pd.DataFrame | None) ->
|
||||
out.loc[mask, "fc_precip_prob"] = out.loc[mask, "fc_precip_prob"] / 100.0
|
||||
out["fc_precip_prob"] = out["fc_precip_prob"].clip(lower=0.0, upper=1.0)
|
||||
|
||||
# Some forecast sources (or model configs) provide precip amount but no precip probability.
|
||||
# Backfill missing probability to keep feature rows usable for training/inference.
|
||||
if "fc_precip_mm" in out.columns:
|
||||
fallback_prob = (out["fc_precip_mm"].fillna(0.0) > 0.0).astype(float)
|
||||
else:
|
||||
fallback_prob = 0.0
|
||||
out["fc_precip_prob"] = out["fc_precip_prob"].fillna(fallback_prob)
|
||||
out["fc_precip_prob"] = out["fc_precip_prob"].clip(lower=0.0, upper=1.0)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
@@ -263,6 +288,18 @@ def build_dataset(
|
||||
df["pressure_roll_1h_mean"] = df["pressure_hpa"].rolling(window=window, min_periods=3).mean()
|
||||
df["pressure_roll_1h_std"] = df["pressure_hpa"].rolling(window=window, min_periods=3).std()
|
||||
|
||||
# Calendar/seasonality features (UTC based).
|
||||
hour_of_day = df.index.hour + (df.index.minute / 60.0)
|
||||
day_of_week = df.index.dayofweek
|
||||
month_of_year = df.index.month
|
||||
df["hour_sin"] = np.sin(2.0 * np.pi * hour_of_day / 24.0)
|
||||
df["hour_cos"] = np.cos(2.0 * np.pi * hour_of_day / 24.0)
|
||||
df["dow_sin"] = np.sin(2.0 * np.pi * day_of_week / 7.0)
|
||||
df["dow_cos"] = np.cos(2.0 * np.pi * day_of_week / 7.0)
|
||||
df["month_sin"] = np.sin(2.0 * np.pi * (month_of_year - 1.0) / 12.0)
|
||||
df["month_cos"] = np.cos(2.0 * np.pi * (month_of_year - 1.0) / 12.0)
|
||||
df["is_weekend"] = (day_of_week >= 5).astype(float)
|
||||
|
||||
df = _apply_forecast_features(df, forecast)
|
||||
return df
|
||||
|
||||
|
||||
Reference in New Issue
Block a user