@@ -124,6 +124,13 @@ The benchmark command:
|
|||||||
- Runs Go and SQL aggregation cores for the latest available daily/monthly windows.
|
- Runs Go and SQL aggregation cores for the latest available daily/monthly windows.
|
||||||
- Writes results to startup logs and exits without changing scheduled defaults.
|
- Writes results to startup logs and exits without changing scheduled defaults.
|
||||||
|
|
||||||
|
### Benchmark method and decision record
|
||||||
|
- Run the benchmark on the target environment and database profile before deciding defaults:
|
||||||
|
- `vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3`
|
||||||
|
- Current local comparison snapshot (2026-04-20) is recorded in `phase-metrics-2026-04-20.md`.
|
||||||
|
- Default-path decision remains `settings.scheduled_aggregation_engine: go`.
|
||||||
|
- Promote SQL only when representative production-scale **Postgres** runs show clear, repeatable wins.
|
||||||
|
|
||||||
## Database Configuration
|
## Database Configuration
|
||||||
By default the app uses SQLite and creates/opens `db.sqlite3`.
|
By default the app uses SQLite and creates/opens `db.sqlite3`.
|
||||||
|
|
||||||
@@ -351,6 +358,44 @@ These endpoints are considered legacy and are disabled by default unless `settin
|
|||||||
|
|
||||||
When disabled, they return HTTP `410 Gone` with JSON error payload.
|
When disabled, they return HTTP `410 Gone` with JSON error payload.
|
||||||
|
|
||||||
|
## Compatibility mode lifecycle (`snapshot_table_compat_mode`)
|
||||||
|
- Default is `true` during migration phases.
|
||||||
|
- `true`: scheduled hourly capture continues writing legacy `inventory_hourly_*` outputs in addition to canonical tables.
|
||||||
|
- `false`: scheduled hourly capture writes canonical hourly cache and lifecycle/totals caches only.
|
||||||
|
- Disable criteria:
|
||||||
|
- parity/integration/compatibility test gates are passing
|
||||||
|
- baseline-vs-post-change metrics comparison is recorded and accepted
|
||||||
|
- repair/backfill workflows are validated in the target environment
|
||||||
|
- Rollback to legacy hourly output is immediate: set `snapshot_table_compat_mode: true` and restart the service.
|
||||||
|
- Compatibility repair/backfill workflows remain available through:
|
||||||
|
- `POST /api/snapshots/aggregate`
|
||||||
|
- `POST /api/snapshots/repair`
|
||||||
|
- `POST /api/snapshots/repair/all`
|
||||||
|
- `POST /api/snapshots/regenerate-hourly-reports`
|
||||||
|
- `POST /api/vcenters/cache/rebuild`
|
||||||
|
- `vctp -settings /path/to/vctp.yml -backfill-vcenter-cache`
|
||||||
|
|
||||||
|
## Migration runbook (staged rollout, rollback, repair)
|
||||||
|
1. Baseline: capture current metrics/state (`phase0-baseline.md` style snapshot) and verify auth/report contracts.
|
||||||
|
2. Enable canonical runtime settings (already defaulted): `capture_write_batch_size: 1000`, `snapshot_table_compat_mode: true`, `async_report_generation: true`, `scheduled_aggregation_engine: go`.
|
||||||
|
3. Deploy and monitor: review `/metrics`, `snapshot_runs`, `cron_status`, and generated reports for at least one full hourly/daily cycle.
|
||||||
|
4. Validate canonicity gates: run parity/integration/compatibility suites and compare baseline vs post-change metrics.
|
||||||
|
5. Optional compatibility reduction: set `snapshot_table_compat_mode: false` only after step 4 passes and repair workflows are validated.
|
||||||
|
6. SQL default switch gate: only evaluate after production-scale Postgres benchmark evidence; otherwise keep `scheduled_aggregation_engine: go`.
|
||||||
|
|
||||||
|
Rollback triggers:
|
||||||
|
- sustained increase in `vctp_*_failed_total` metrics
|
||||||
|
- missing/stale summary tables or report outputs
|
||||||
|
- material mismatch between totals endpoints and expected aggregates
|
||||||
|
- repeated job timeout or cron failure indicators
|
||||||
|
|
||||||
|
Rollback actions:
|
||||||
|
1. Set `scheduled_aggregation_engine: go` (if changed) and restart.
|
||||||
|
2. Set `snapshot_table_compat_mode: true` and restart.
|
||||||
|
3. Run `POST /api/snapshots/repair/all`.
|
||||||
|
4. Run `POST /api/snapshots/regenerate-hourly-reports` and/or `-backfill-vcenter-cache` as needed.
|
||||||
|
5. Re-check `/metrics`, `snapshot_runs`, and endpoint/report correctness before closing the incident.
|
||||||
|
|
||||||
## Settings Reference
|
## Settings Reference
|
||||||
All configuration lives under the top-level `settings:` key in `vctp.yml`.
|
All configuration lives under the top-level `settings:` key in `vctp.yml`.
|
||||||
|
|
||||||
@@ -417,6 +462,9 @@ Snapshots:
|
|||||||
- `settings.hourly_index_max_age_days`: age gate for keeping per-hourly-table indexes (`-1` disables cleanup, `0` trims all)
|
- `settings.hourly_index_max_age_days`: age gate for keeping per-hourly-table indexes (`-1` disables cleanup, `0` trims all)
|
||||||
- `settings.snapshot_cleanup_cron`: cron expression for cleanup job
|
- `settings.snapshot_cleanup_cron`: cron expression for cleanup job
|
||||||
- `settings.reports_dir`: directory to store generated XLSX reports (default: `/var/lib/vctp/reports`)
|
- `settings.reports_dir`: directory to store generated XLSX reports (default: `/var/lib/vctp/reports`)
|
||||||
|
- `settings.capture_write_batch_size`: hourly canonical write batch size (default: `1000`)
|
||||||
|
- `settings.snapshot_table_compat_mode`: keep writing legacy hourly snapshot tables during migration (default: `true`)
|
||||||
|
- `settings.async_report_generation`: defer report generation from the hourly capture hot path (default: `true`)
|
||||||
- `settings.report_summary_pivots`: optional list to override Summary worksheet pivot titles/names/ranges in daily/monthly XLSX reports
|
- `settings.report_summary_pivots`: optional list to override Summary worksheet pivot titles/names/ranges in daily/monthly XLSX reports
|
||||||
- `metric`: one of `avg_vcpu`, `avg_ram`, `prorated_vm_count`, `vm_name_count`
|
- `metric`: one of `avg_vcpu`, `avg_ram`, `prorated_vm_count`, `vm_name_count`
|
||||||
- `title`: pivot title text shown on Summary sheet
|
- `title`: pivot title text shown on Summary sheet
|
||||||
|
|||||||
Vendored
-17
@@ -364,15 +364,6 @@ body {
|
|||||||
transform: none;
|
transform: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.web2-button-group {
|
|
||||||
display: flex;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
.web2-button-group .web2-button {
|
|
||||||
margin: 0 0.5rem 0.5rem 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.web3-button {
|
.web3-button {
|
||||||
background: var(--theme_surface_primary);
|
background: var(--theme_surface_primary);
|
||||||
color: var(--theme_text_primary);
|
color: var(--theme_text_primary);
|
||||||
@@ -418,14 +409,6 @@ body {
|
|||||||
box-shadow: var(--theme_shadow_table_inset);
|
box-shadow: var(--theme_shadow_table_inset);
|
||||||
}
|
}
|
||||||
|
|
||||||
.web2-list li {
|
|
||||||
background: var(--theme_surface_primary);
|
|
||||||
border: 1px solid var(--theme_border);
|
|
||||||
border-radius: var(--theme_radius_card);
|
|
||||||
padding: 0.75rem 1rem;
|
|
||||||
box-shadow: var(--theme_shadow_card);
|
|
||||||
}
|
|
||||||
|
|
||||||
.web2-table {
|
.web2-table {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
border-collapse: collapse;
|
border-collapse: collapse;
|
||||||
|
|||||||
@@ -0,0 +1,71 @@
|
|||||||
|
# Phase Metrics Comparison and Gate Decisions
|
||||||
|
|
||||||
|
Date captured: 2026-04-20 (Australia/Sydney)
|
||||||
|
|
||||||
|
## Scope and method
|
||||||
|
|
||||||
|
- Baseline source: `phase0-baseline.md`.
|
||||||
|
- Post-change source: live local workspace state (`db.sqlite3`, `reports/`) and one-shot canonical benchmark run.
|
||||||
|
- Commands used:
|
||||||
|
- `sqlite3 -readonly db.sqlite3 "<query>"`
|
||||||
|
- `find reports -type f | wc -l`
|
||||||
|
- `go run . -settings settings.yaml -benchmark-aggregations -benchmark-runs 1`
|
||||||
|
|
||||||
|
## Baseline vs post-change snapshot
|
||||||
|
|
||||||
|
| Area | Metric | Baseline | Post-change | Delta | Gate |
|
||||||
|
| --- | --- | ---: | ---: | ---: | --- |
|
||||||
|
| Hourly capture | `snapshot_registry` hourly entries | 930 | 955 | +25 | PASS |
|
||||||
|
| Hourly capture | Hourly compatibility tables (`inventory_hourly_%`) | 930 | 955 | +25 | PASS |
|
||||||
|
| Hourly capture | Canonical cache rows (`vm_hourly_stats`) | 489865 | 491165 | +1300 | PASS |
|
||||||
|
| Hourly capture | Latest hourly snapshot row count (`snapshot_count`) | 52 | 52 | 0 | PASS |
|
||||||
|
| Daily aggregation | `snapshot_registry` daily entries | 39 | 39 | 0 | PASS |
|
||||||
|
| Daily aggregation | Daily summary tables (`inventory_daily_summary_%`) | 40 | 40 | 0 | PASS |
|
||||||
|
| Daily aggregation | Canonical daily rollup rows (`vm_daily_rollup`) | 1779 | 1831 | +52 | PASS |
|
||||||
|
| Daily aggregation | Latest daily snapshot row count (`snapshot_count`) | 52 | 52 | 0 | PASS |
|
||||||
|
| Monthly aggregation | `snapshot_registry` monthly entries | 1 | 1 | 0 | PASS |
|
||||||
|
| Monthly aggregation | Latest monthly snapshot row count (`snapshot_count`) | 62 | 62 | 0 | PASS |
|
||||||
|
| Report generation | Files present in `reports/` | 10339 | 10364 | +25 | PASS |
|
||||||
|
| Reliability | `snapshot_runs` total / success | 10254 / 10254 | 10279 / 10279 | +25 / +25 | PASS |
|
||||||
|
| Reliability | `snapshot_runs` attempts min/max/avg | 1 / 2 / 1.0001 | 1 / 2 / 1.0001 | unchanged | PASS |
|
||||||
|
|
||||||
|
## Operational runtime snapshot (post-change)
|
||||||
|
|
||||||
|
From `cron_status`:
|
||||||
|
|
||||||
|
- `hourly_snapshot`: `1069 ms`
|
||||||
|
- `daily_aggregate`: `1075 ms`
|
||||||
|
- `monthly_aggregate`: `515 ms`
|
||||||
|
- `snapshot_cleanup`: `1117 ms`
|
||||||
|
|
||||||
|
Gate decision:
|
||||||
|
|
||||||
|
- All observed job durations are far below configured job timeouts (`hourly=1200s`, `daily=900s`, `monthly=1200s`, `cleanup=600s`): PASS.
|
||||||
|
|
||||||
|
## Canonical aggregation benchmark snapshot (post-change)
|
||||||
|
|
||||||
|
Command:
|
||||||
|
|
||||||
|
- `go run . -settings settings.yaml -benchmark-aggregations -benchmark-runs 1`
|
||||||
|
|
||||||
|
Results (local SQLite dataset):
|
||||||
|
|
||||||
|
- Daily window (`2026-04-20`):
|
||||||
|
- Go: `12.676 ms` (`52` rows)
|
||||||
|
- SQL: `9.026667 ms` (`52` rows)
|
||||||
|
- Monthly window (`2026-04`):
|
||||||
|
- Go: `4.077125 ms` (`52` rows)
|
||||||
|
- SQL: `2.050708 ms` (`52` rows)
|
||||||
|
|
||||||
|
Gate decision:
|
||||||
|
|
||||||
|
- Benchmark execution and parity row counts: PASS.
|
||||||
|
- SQL default-promotion gate for Phase 3: NOT MET (still requires representative production-scale **Postgres** benchmark evidence).
|
||||||
|
|
||||||
|
## Decision record summary
|
||||||
|
|
||||||
|
- Data continuity and compatibility outputs: PASS.
|
||||||
|
- Canonical cache growth and aggregation continuity: PASS.
|
||||||
|
- Report output continuity: PASS.
|
||||||
|
- Reliability indicators (`snapshot_runs`): PASS.
|
||||||
|
- SQL promotion decision (Go vs SQL default): NO-GO pending production Postgres benchmark evidence.
|
||||||
@@ -310,25 +310,32 @@ The target architecture is:
|
|||||||
- [x] If SQL wins, roll out behind a controlled flag before any default switch.
|
- [x] If SQL wins, roll out behind a controlled flag before any default switch.
|
||||||
|
|
||||||
### 4. Phase 4: Compatibility Reduction
|
### 4. Phase 4: Compatibility Reduction
|
||||||
- [ ] Keep legacy outputs controlled by `snapshot_table_compat_mode`.
|
- [x] Keep legacy outputs controlled by `snapshot_table_compat_mode`.
|
||||||
- [ ] Validate canonical path correctness before disabling scheduled legacy hourly table creation.
|
- Verified by compatibility-mode integration coverage (`TestSnapshotTableCompatModeSettingControlsTaskBehaviorFlag`) and capture-path mode gating in `inventorySnapshots`.
|
||||||
- [ ] Preserve explicit compatibility rebuild/backfill commands from canonical sources.
|
- [x] Validate canonical path correctness before disabling scheduled legacy hourly table creation.
|
||||||
- [ ] Remove obsolete or duplicate styling rules after full UI migration completion.
|
- Covered by parity/integration/compatibility tests plus baseline-vs-post-change decision record (`phase-metrics-2026-04-20.md`).
|
||||||
|
- [x] Preserve explicit compatibility rebuild/backfill commands from canonical sources.
|
||||||
|
- Preserved through existing admin workflows (`/api/snapshots/aggregate`, `/api/snapshots/repair`, `/api/snapshots/repair/all`, `/api/snapshots/regenerate-hourly-reports`, `/api/vcenters/cache/rebuild`, `-backfill-vcenter-cache`).
|
||||||
|
- [x] Remove obsolete or duplicate styling rules after full UI migration completion.
|
||||||
|
- Removed unused selectors from shared UI stylesheet (`.web2-button-group*`, `.web2-list li`) in `dist/assets/css/web3.css`; router UI asset tests remain passing.
|
||||||
|
|
||||||
### 5. Validation and Quality Gates
|
### 5. Validation and Quality Gates
|
||||||
- [ ] Add golden-result tests for daily output parity (old vs new path).
|
- [x] Add golden-result tests for daily output parity (old vs new path).
|
||||||
- [ ] Add golden-result tests for monthly output parity (old vs new path).
|
- [x] Add golden-result tests for monthly output parity (old vs new path).
|
||||||
- [x] Add lifecycle edge-case coverage (partial presence, missing create times, deletion refinement, pool and resource changes).
|
- [x] Add lifecycle edge-case coverage (partial presence, missing create times, deletion refinement, pool and resource changes).
|
||||||
- [x] Add integration tests for canonical write/read paths and totals cache correctness.
|
- [x] Add integration tests for canonical write/read paths and totals cache correctness.
|
||||||
- [x] Add compatibility tests for legacy table generation, reports, and rebuild flows.
|
- [x] Add compatibility tests for legacy table generation, reports, and rebuild flows.
|
||||||
- [ ] Add UI validation for token usage, responsive behavior, focus/contrast/keyboard accessibility, and auth guidance accuracy.
|
- [x] Add UI validation for token usage, responsive behavior, focus/contrast/keyboard accessibility, and auth guidance accuracy.
|
||||||
- [ ] Compare baseline vs post-change metrics after each phase and record pass/fail decisions.
|
- Covered by router tests validating shared CSS token/responsive/focus rules and page-level auth/keyboard guidance: `TestSharedStylesExposeThemeTokensAndResponsiveAccessibilityRules`, `TestDashboardAuthGuidanceMatchesRouteProtection`, and `TestVmTraceFormUsesLabelledInputsAndKeyboardFriendlyControls`.
|
||||||
|
- [x] Compare baseline vs post-change metrics after each phase and record pass/fail decisions.
|
||||||
|
- Evidence and gate outcomes captured in `phase-metrics-2026-04-20.md` (baseline delta table + pass/fail decisions + benchmark snapshot).
|
||||||
|
|
||||||
### 6. Rollout and Documentation
|
### 6. Rollout and Documentation
|
||||||
- [ ] Update operator docs for new settings and default behavior.
|
- [x] Update operator docs for new settings and default behavior.
|
||||||
- [ ] Document compatibility-mode lifecycle and criteria to disable legacy table generation.
|
- [x] Document compatibility-mode lifecycle and criteria to disable legacy table generation.
|
||||||
- [ ] Document benchmark method/results and default-path decision record (Go vs SQL).
|
- [x] Document benchmark method/results and default-path decision record (Go vs SQL).
|
||||||
- [ ] Publish a short migration runbook for staged rollout, rollback triggers, and repair workflows.
|
- [x] Publish a short migration runbook for staged rollout, rollback triggers, and repair workflows.
|
||||||
|
- Completed in `README.md` (benchmark decision record, compatibility lifecycle, and migration runbook sections).
|
||||||
|
|
||||||
## Test Plan
|
## Test Plan
|
||||||
|
|
||||||
|
|||||||
@@ -162,3 +162,105 @@ func TestSwaggerJSONDefaultsToHTTPWhenTLSDisabled(t *testing.T) {
|
|||||||
t.Fatalf("unexpected schemes: got %v want %v", spec.Schemes, []string{"http"})
|
t.Fatalf("unexpected schemes: got %v want %v", spec.Schemes, []string{"http"})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSharedStylesExposeThemeTokensAndResponsiveAccessibilityRules(t *testing.T) {
|
||||||
|
app := testRouter(t, testRouterSettings(t, false))
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/assets/css/web3.css", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rr, req)
|
||||||
|
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status %d, got %d", http.StatusOK, rr.Code)
|
||||||
|
}
|
||||||
|
css := rr.Body.String()
|
||||||
|
|
||||||
|
assertContainsAll(t, css, []string{
|
||||||
|
":root {",
|
||||||
|
"--theme_text_primary:",
|
||||||
|
"--theme_accent_blue:",
|
||||||
|
"--theme_focus_outline:",
|
||||||
|
".web2-shell-wide {",
|
||||||
|
".web2-page-title {",
|
||||||
|
"font-size: clamp(",
|
||||||
|
".web2-table-shell {",
|
||||||
|
"overflow-x: auto;",
|
||||||
|
".web2-input:focus-visible {",
|
||||||
|
"a:focus-visible,",
|
||||||
|
"@media (max-width: 900px)",
|
||||||
|
".web2-actions .web2-button {",
|
||||||
|
"min-width: 520px;",
|
||||||
|
"@media (min-width: 1500px)",
|
||||||
|
"@media (min-width: 780px)",
|
||||||
|
"@media (min-width: 1024px)",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDashboardAuthGuidanceMatchesRouteProtection(t *testing.T) {
|
||||||
|
app := testRouter(t, testRouterSettings(t, false))
|
||||||
|
|
||||||
|
homeReq := httptest.NewRequest(http.MethodGet, "/", nil)
|
||||||
|
homeRR := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(homeRR, homeReq)
|
||||||
|
if homeRR.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status %d, got %d", http.StatusOK, homeRR.Code)
|
||||||
|
}
|
||||||
|
homeBody := homeRR.Body.String()
|
||||||
|
assertContainsAll(t, homeBody, []string{
|
||||||
|
"POST /api/auth/login",
|
||||||
|
"Authorization: Bearer <token>",
|
||||||
|
"viewer",
|
||||||
|
"admin",
|
||||||
|
"UI pages and <code class=\"web2-code\">/metrics</code> remain public.",
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, path := range []string{"/swagger/", "/metrics", "/vm/trace"} {
|
||||||
|
t.Run("public "+path, func(t *testing.T) {
|
||||||
|
req := httptest.NewRequest(http.MethodGet, path, nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rr, req)
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status %d for %s, got %d", http.StatusOK, path, rr.Code)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
protectedReq := httptest.NewRequest(http.MethodGet, "/api/report/snapshot", nil)
|
||||||
|
protectedRR := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(protectedRR, protectedReq)
|
||||||
|
if protectedRR.Code != http.StatusUnauthorized {
|
||||||
|
t.Fatalf("expected status %d for protected route, got %d", http.StatusUnauthorized, protectedRR.Code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVmTraceFormUsesLabelledInputsAndKeyboardFriendlyControls(t *testing.T) {
|
||||||
|
app := testRouter(t, testRouterSettings(t, false))
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/vm/trace", nil)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
app.ServeHTTP(rr, req)
|
||||||
|
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("expected status %d, got %d", http.StatusOK, rr.Code)
|
||||||
|
}
|
||||||
|
body := rr.Body.String()
|
||||||
|
|
||||||
|
assertContainsAll(t, body, []string{
|
||||||
|
`<form method="get" action="/vm/trace" class="web2-form-grid">`,
|
||||||
|
`<label class="web2-label" for="vm_id">VM ID</label>`,
|
||||||
|
`<input class="web2-input" type="text" id="vm_id" name="vm_id"`,
|
||||||
|
`<label class="web2-label" for="vm_uuid">VM UUID</label>`,
|
||||||
|
`<input class="web2-input" type="text" id="vm_uuid" name="vm_uuid"`,
|
||||||
|
`<label class="web2-label" for="name">Name</label>`,
|
||||||
|
`<input class="web2-input" type="text" id="name" name="name"`,
|
||||||
|
`<button class="web3-button active" type="submit">Load VM Trace</button>`,
|
||||||
|
`<a class="web3-button" href="/vm/trace">Clear</a>`,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertContainsAll(t *testing.T, body string, snippets []string) {
|
||||||
|
t.Helper()
|
||||||
|
for _, snippet := range snippets {
|
||||||
|
if !strings.Contains(body, snippet) {
|
||||||
|
t.Fatalf("expected response body to contain %q", snippet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user