more work on model training

This commit is contained in:
2026-03-05 20:49:19 +11:00
parent 76270e5650
commit 5b8cad905f
9 changed files with 380 additions and 48 deletions

View File

@@ -41,12 +41,15 @@ class WorkerConfig:
site: str
model_name: str
model_version_base: str
feature_set: str
forecast_model: str
train_interval_hours: float
predict_interval_minutes: float
lookback_days: int
train_ratio: float
val_ratio: float
min_precision: float
dataset_path_template: str
model_path: Path
report_path: Path
audit_path: Path
@@ -81,10 +84,13 @@ def training_window(lookback_days: int) -> tuple[str, str]:
def run_training_cycle(cfg: WorkerConfig, env: dict[str, str]) -> None:
start, end = training_window(cfg.lookback_days)
model_version = f"{cfg.model_version_base}-{now_utc().strftime('%Y%m%d%H%M')}"
dataset_out = cfg.dataset_path_template.format(model_version=model_version, feature_set=cfg.feature_set)
ensure_parent(cfg.audit_path)
ensure_parent(cfg.report_path)
ensure_parent(cfg.model_path)
if dataset_out:
ensure_parent(Path(dataset_out))
run_cmd(
[
@@ -96,6 +102,10 @@ def run_training_cycle(cfg: WorkerConfig, env: dict[str, str]) -> None:
start,
"--end",
end,
"--feature-set",
cfg.feature_set,
"--forecast-model",
cfg.forecast_model,
"--out",
str(cfg.audit_path),
],
@@ -118,12 +128,18 @@ def run_training_cycle(cfg: WorkerConfig, env: dict[str, str]) -> None:
str(cfg.val_ratio),
"--min-precision",
str(cfg.min_precision),
"--feature-set",
cfg.feature_set,
"--forecast-model",
cfg.forecast_model,
"--model-version",
model_version,
"--out",
str(cfg.model_path),
"--report-out",
str(cfg.report_path),
"--dataset-out",
dataset_out,
],
env,
)
@@ -143,6 +159,8 @@ def run_predict_once(cfg: WorkerConfig, env: dict[str, str]) -> None:
str(cfg.model_path),
"--model-name",
cfg.model_name,
"--forecast-model",
cfg.forecast_model,
],
env,
)
@@ -158,12 +176,18 @@ def load_config() -> WorkerConfig:
site=read_env("RAIN_SITE", "home"),
model_name=read_env("RAIN_MODEL_NAME", "rain_next_1h"),
model_version_base=read_env("RAIN_MODEL_VERSION_BASE", "rain-logreg-v1"),
feature_set=read_env("RAIN_FEATURE_SET", "baseline"),
forecast_model=read_env("RAIN_FORECAST_MODEL", "ecmwf"),
train_interval_hours=read_env_float("RAIN_TRAIN_INTERVAL_HOURS", 24.0),
predict_interval_minutes=read_env_float("RAIN_PREDICT_INTERVAL_MINUTES", 10.0),
lookback_days=read_env_int("RAIN_LOOKBACK_DAYS", 30),
train_ratio=read_env_float("RAIN_TRAIN_RATIO", 0.7),
val_ratio=read_env_float("RAIN_VAL_RATIO", 0.15),
min_precision=read_env_float("RAIN_MIN_PRECISION", 0.70),
dataset_path_template=read_env(
"RAIN_DATASET_PATH",
"models/datasets/rain_dataset_{model_version}_{feature_set}.csv",
),
model_path=Path(read_env("RAIN_MODEL_PATH", "models/rain_model.pkl")),
report_path=Path(read_env("RAIN_REPORT_PATH", "models/rain_model_report.json")),
audit_path=Path(read_env("RAIN_AUDIT_PATH", "models/rain_data_audit.json")),
@@ -188,6 +212,8 @@ def main() -> int:
"[rain-ml] worker start "
f"site={cfg.site} "
f"model_name={cfg.model_name} "
f"feature_set={cfg.feature_set} "
f"forecast_model={cfg.forecast_model} "
f"train_interval_hours={cfg.train_interval_hours} "
f"predict_interval_minutes={cfg.predict_interval_minutes}",
flush=True,