From 1ef300d25e773183f3319ded291f68ec2391857c Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Mon, 6 Apr 2026 18:41:16 +1000 Subject: [PATCH] update --- README.md | 1 + docs/rain_data_issues.md | 2 +- docs/rain_model_runbook.md | 20 ++++++++++---------- docs/rain_prediction.md | 20 ++++++++++---------- scripts/rainml_py.sh | 27 +++++++++++++++++++++++++++ scripts/run_p0_rain_workflow.sh | 6 +++--- 6 files changed, 52 insertions(+), 24 deletions(-) create mode 100755 scripts/rainml_py.sh diff --git a/README.md b/README.md index 95b3ad4..5d5dfd7 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ Runbook/docs: - `docs/rain_data_issues.md` - `docs/rain_model_runbook.md` - `scripts/recommend_rain_model.py` (rank reports and recommend deploy candidate) +- `scripts/rainml_py.sh` (run ML Python scripts inside the `rainml` container; avoids host virtualenv/dependency setup) ## Publish a test WS90 payload ```sh diff --git a/docs/rain_data_issues.md b/docs/rain_data_issues.md index eb01c13..8c1d4b1 100644 --- a/docs/rain_data_issues.md +++ b/docs/rain_data_issues.md @@ -19,7 +19,7 @@ This document captures known data-quality issues observed in the rain-model pipe Run this regularly and retain JSON reports for comparison: ```sh -python scripts/audit_rain_data.py \ +scripts/rainml_py.sh scripts/audit_rain_data.py \ --site home \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ diff --git a/docs/rain_model_runbook.md b/docs/rain_model_runbook.md index 24f3798..10a65d5 100644 --- a/docs/rain_model_runbook.md +++ b/docs/rain_model_runbook.md @@ -25,7 +25,7 @@ docker compose exec -T timescaledb \ Recommended evaluation run (includes validation-only tuning, calibration comparison, naive baselines, and walk-forward folds): ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -59,7 +59,7 @@ Review in report: 2. Run one dry-run inference: ```sh -python scripts/predict_rain_model.py \ +scripts/rainml_py.sh scripts/predict_rain_model.py \ --site home \ --model-path "models/rain_model.pkl" \ --model-name "rain_next_4h" \ @@ -70,7 +70,7 @@ python scripts/predict_rain_model.py \ 3. Run live inference: ```sh -python scripts/predict_rain_model.py \ +scripts/rainml_py.sh scripts/predict_rain_model.py \ --site home \ --model-path "models/rain_model.pkl" \ --model-name "rain_next_4h" \ @@ -128,7 +128,7 @@ Alert heuristic: sustained Brier-score increase > 25% from trailing 30-day avera Use the health-check script in cron, systemd timer, or your alerting scheduler: ```sh -python scripts/check_rain_pipeline_health.py \ +scripts/rainml_py.sh scripts/check_rain_pipeline_health.py \ --site home \ --model-name rain_next_4h \ --horizon-hours 4 \ @@ -164,7 +164,7 @@ Recommended production defaults: To compare saved training reports and pick a deployment candidate automatically: ```sh -python scripts/recommend_rain_model.py \ +scripts/rainml_py.sh scripts/recommend_rain_model.py \ --reports-glob "models/rain_model_report*.json" \ --require-walk-forward \ --top-k 5 \ @@ -194,7 +194,7 @@ docker compose exec -T timescaledb \ 3. Run a full 4h training/evaluation cycle and save report: ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -213,21 +213,21 @@ python scripts/train_rain_model.py \ 4. Compare 4h metrics against the latest 1h benchmark report before switching dashboard defaults: ```sh -python scripts/compare_rain_reports.py \ +scripts/rainml_py.sh scripts/compare_rain_reports.py \ --baseline "models/rain_model_report_1h.json" \ --candidate "models/rain_model_report_4h.json" ``` 5. Run dry-run inference, then live inference with 4h model name/horizon: ```sh -python scripts/predict_rain_model.py \ +scripts/rainml_py.sh scripts/predict_rain_model.py \ --site home \ --model-path "models/rain_model_4h.pkl" \ --model-name "rain_next_4h" \ --horizon-hours 4 \ --dry-run -python scripts/predict_rain_model.py \ +scripts/rainml_py.sh scripts/predict_rain_model.py \ --site home \ --model-path "models/rain_model_4h.pkl" \ --model-name "rain_next_4h" \ @@ -237,7 +237,7 @@ python scripts/predict_rain_model.py \ 6. Validate health checks and dashboard data path for 4h: ```sh -python scripts/check_rain_pipeline_health.py \ +scripts/rainml_py.sh scripts/check_rain_pipeline_health.py \ --site home \ --model-name rain_next_4h \ --horizon-hours 4 \ diff --git a/docs/rain_prediction.md b/docs/rain_prediction.md index 6cae8a0..57752d0 100644 --- a/docs/rain_prediction.md +++ b/docs/rain_prediction.md @@ -90,7 +90,7 @@ All examples below assume a 4-hour horizon (`--horizon-hours 4`) and `model-name ```sh export DATABASE_URL="postgres://postgres:postgres@localhost:5432/micrometeo?sslmode=disable" -python scripts/audit_rain_data.py \ +scripts/rainml_py.sh scripts/audit_rain_data.py \ --site home \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -100,7 +100,7 @@ python scripts/audit_rain_data.py \ ### 3) Train baseline model ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -117,7 +117,7 @@ python scripts/train_rain_model.py \ ### 3b) Train expanded (P1) feature-set model ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -132,7 +132,7 @@ python scripts/train_rain_model.py \ ### 3b.1) Train expanded + calendar (P2) feature-set model ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -146,7 +146,7 @@ python scripts/train_rain_model.py \ ### 3c) Train tree-based baseline (P1) ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -161,7 +161,7 @@ python scripts/train_rain_model.py \ ### 3d) Auto-compare logistic vs tree baseline ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -175,7 +175,7 @@ python scripts/train_rain_model.py \ ### 3e) Full P1 evaluation (tuning + calibration + walk-forward) ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -194,7 +194,7 @@ python scripts/train_rain_model.py \ ### 3f) Walk-forward threshold policy (more temporally robust alert threshold) ```sh -python scripts/train_rain_model.py \ +scripts/rainml_py.sh scripts/train_rain_model.py \ --site "home" \ --start "2026-02-01T00:00:00Z" \ --end "2026-03-03T23:55:00Z" \ @@ -210,7 +210,7 @@ python scripts/train_rain_model.py \ ### 4) Run inference and store prediction ```sh -python scripts/predict_rain_model.py \ +scripts/rainml_py.sh scripts/predict_rain_model.py \ --site home \ --model-path "models/rain_model.pkl" \ --model-name "rain_next_4h" \ @@ -252,7 +252,7 @@ docker compose logs -f rainml ### 7) Recommend deploy candidate from saved reports ```sh -python scripts/recommend_rain_model.py \ +scripts/rainml_py.sh scripts/recommend_rain_model.py \ --reports-glob "models/rain_model_report*.json" \ --require-walk-forward \ --top-k 5 \ diff --git a/scripts/rainml_py.sh b/scripts/rainml_py.sh new file mode 100755 index 0000000..323438e --- /dev/null +++ b/scripts/rainml_py.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$ROOT_DIR" + +if [[ $# -lt 1 ]]; then + cat <<'EOF' +Usage: + scripts/rainml_py.sh [args...] + +Examples: + scripts/rainml_py.sh scripts/train_rain_model.py --site home --horizon-hours 4 ... + scripts/rainml_py.sh scripts/predict_rain_model.py --site home --model-name rain_next_4h --horizon-hours 4 + +Optional: + RAINML_PY_BUILD=1 scripts/rainml_py.sh ... + (builds the rainml image before running) +EOF + exit 1 +fi + +if [[ "${RAINML_PY_BUILD:-0}" == "1" ]]; then + docker compose build rainml +fi + +docker compose run --rm --no-deps --entrypoint python3 rainml "$@" diff --git a/scripts/run_p0_rain_workflow.sh b/scripts/run_p0_rain_workflow.sh index 3f06201..ff4a0a0 100644 --- a/scripts/run_p0_rain_workflow.sh +++ b/scripts/run_p0_rain_workflow.sh @@ -22,7 +22,7 @@ if [[ -z "${DATABASE_URL:-}" ]]; then fi echo "Running rain data audit..." -python scripts/audit_rain_data.py \ +python3 scripts/audit_rain_data.py \ --site "$SITE" \ --start "$START" \ --end "$END" \ @@ -32,7 +32,7 @@ python scripts/audit_rain_data.py \ --out "$AUDIT_PATH" echo "Training baseline rain model..." -python scripts/train_rain_model.py \ +python3 scripts/train_rain_model.py \ --site "$SITE" \ --start "$START" \ --end "$END" \ @@ -51,7 +51,7 @@ python scripts/train_rain_model.py \ --dataset-out "$DATASET_PATH" echo "Writing current prediction..." -python scripts/predict_rain_model.py \ +python3 scripts/predict_rain_model.py \ --site "$SITE" \ --model-path "$MODEL_PATH" \ --model-name "$MODEL_NAME" \