improve tracking of VM deletions
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-01-15 14:25:51 +11:00
parent bba308ad28
commit 8dee30ea97
7 changed files with 436 additions and 42 deletions

View File

@@ -5,6 +5,7 @@ import (
"database/sql"
"fmt"
"strings"
"time"
"vctp/db/queries"
@@ -281,6 +282,15 @@ func EnsureSnapshotTable(ctx context.Context, dbConn *sqlx.DB, tableName string)
return err
}
return EnsureSnapshotIndexes(ctx, dbConn, tableName)
}
// EnsureSnapshotIndexes creates the standard indexes for a snapshot table.
func EnsureSnapshotIndexes(ctx context.Context, dbConn *sqlx.DB, tableName string) error {
if _, err := SafeTableName(tableName); err != nil {
return err
}
indexes := []string{
fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_vm_vcenter_idx ON %s ("VmId","Vcenter")`, tableName, tableName),
fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_snapshottime_idx ON %s ("SnapshotTime")`, tableName, tableName),
@@ -387,30 +397,31 @@ func BuildDailySummaryInsert(tableName string, unionQuery string) (string, error
WITH snapshots AS (
%s
), totals AS (
SELECT COUNT(DISTINCT "SnapshotTime") AS total_samples FROM snapshots
SELECT COUNT(DISTINCT "SnapshotTime") AS total_samples, MAX("SnapshotTime") AS max_snapshot FROM snapshots
), agg AS (
SELECT
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId",
MIN(NULLIF("CreationTime", 0)) AS any_creation,
MAX(NULLIF("DeletionTime", 0)) AS any_deletion,
MIN(CASE WHEN "IsPresent" = 'TRUE' THEN "SnapshotTime" END) AS first_present,
MAX(CASE WHEN "IsPresent" = 'TRUE' THEN "SnapshotTime" END) AS last_present,
MAX(CASE WHEN "IsPresent" = 'FALSE' THEN "SnapshotTime" END) AS last_absent,
"Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount",
"RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid",
SUM(CASE WHEN "IsPresent" = 'TRUE' THEN 1 ELSE 0 END) AS samples_present,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND "VcpuCount" IS NOT NULL THEN "VcpuCount" ELSE 0 END) AS sum_vcpu,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND "RamGB" IS NOT NULL THEN "RamGB" ELSE 0 END) AS sum_ram,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND "ProvisionedDisk" IS NOT NULL THEN "ProvisionedDisk" ELSE 0 END) AS sum_disk,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND LOWER("ResourcePool") = 'tin' THEN 1 ELSE 0 END) AS tin_hits,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND LOWER("ResourcePool") = 'bronze' THEN 1 ELSE 0 END) AS bronze_hits,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND LOWER("ResourcePool") = 'silver' THEN 1 ELSE 0 END) AS silver_hits,
SUM(CASE WHEN "IsPresent" = 'TRUE' AND LOWER("ResourcePool") = 'gold' THEN 1 ELSE 0 END) AS gold_hits
FROM snapshots
s."InventoryId", s."Name", s."Vcenter", s."VmId", s."EventKey", s."CloudId",
MIN(NULLIF(s."CreationTime", 0)) AS any_creation,
MAX(NULLIF(s."DeletionTime", 0)) AS any_deletion,
MAX(COALESCE(inv."DeletionTime", 0)) AS inv_deletion,
MIN(s."SnapshotTime") AS first_present,
MAX(s."SnapshotTime") AS last_present,
COUNT(*) AS samples_present,
s."Datacenter", s."Cluster", s."Folder", s."ProvisionedDisk", s."VcpuCount",
s."RamGB", s."IsTemplate", s."PoweredOn", s."SrmPlaceholder", s."VmUuid",
SUM(CASE WHEN s."VcpuCount" IS NOT NULL THEN s."VcpuCount" ELSE 0 END) AS sum_vcpu,
SUM(CASE WHEN s."RamGB" IS NOT NULL THEN s."RamGB" ELSE 0 END) AS sum_ram,
SUM(CASE WHEN s."ProvisionedDisk" IS NOT NULL THEN s."ProvisionedDisk" ELSE 0 END) AS sum_disk,
SUM(CASE WHEN LOWER(s."ResourcePool") = 'tin' THEN 1 ELSE 0 END) AS tin_hits,
SUM(CASE WHEN LOWER(s."ResourcePool") = 'bronze' THEN 1 ELSE 0 END) AS bronze_hits,
SUM(CASE WHEN LOWER(s."ResourcePool") = 'silver' THEN 1 ELSE 0 END) AS silver_hits,
SUM(CASE WHEN LOWER(s."ResourcePool") = 'gold' THEN 1 ELSE 0 END) AS gold_hits
FROM snapshots s
LEFT JOIN inventory inv ON inv."VmId" = s."VmId" AND inv."Vcenter" = s."Vcenter"
GROUP BY
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId",
"Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount",
"RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid"
s."InventoryId", s."Name", s."Vcenter", s."VmId", s."EventKey", s."CloudId",
s."Datacenter", s."Cluster", s."Folder", s."ProvisionedDisk", s."VcpuCount",
s."RamGB", s."IsTemplate", s."PoweredOn", s."SrmPlaceholder", s."VmUuid"
)
INSERT INTO %s (
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId", "CreationTime", "DeletionTime",
@@ -424,8 +435,8 @@ SELECT
agg."InventoryId", agg."Name", agg."Vcenter", agg."VmId", agg."EventKey", agg."CloudId",
COALESCE(agg.any_creation, agg.first_present, 0) AS "CreationTime",
CASE
WHEN agg.last_present IS NULL THEN NULLIF(agg.any_deletion, 0)
WHEN agg.last_absent IS NOT NULL AND agg.last_absent > agg.last_present THEN agg.last_absent
WHEN NULLIF(agg.inv_deletion, 0) IS NOT NULL THEN NULLIF(agg.inv_deletion, 0)
WHEN totals.max_snapshot IS NOT NULL AND agg.last_present < totals.max_snapshot THEN COALESCE(NULLIF(agg.any_deletion, 0), totals.max_snapshot, agg.last_present)
ELSE NULLIF(agg.any_deletion, 0)
END AS "DeletionTime",
(
@@ -482,7 +493,7 @@ GROUP BY
agg."InventoryId", agg."Name", agg."Vcenter", agg."VmId", agg."EventKey", agg."CloudId",
agg."Datacenter", agg."Cluster", agg."Folder", agg."ProvisionedDisk", agg."VcpuCount",
agg."RamGB", agg."IsTemplate", agg."PoweredOn", agg."SrmPlaceholder", agg."VmUuid",
agg.any_creation, agg.any_deletion, agg.first_present, agg.last_present, agg.last_absent,
agg.any_creation, agg.any_deletion, agg.first_present, agg.last_present,
totals.total_samples;
`, unionQuery, tableName)
return insert, nil
@@ -668,3 +679,132 @@ func EnsureSummaryTable(ctx context.Context, dbConn *sqlx.DB, tableName string)
}
return nil
}
// EnsureSnapshotRunTable creates a table to track per-vCenter hourly snapshot attempts.
func EnsureSnapshotRunTable(ctx context.Context, dbConn *sqlx.DB) error {
ddl := `
CREATE TABLE IF NOT EXISTS snapshot_runs (
"RowId" INTEGER PRIMARY KEY AUTOINCREMENT,
"Vcenter" TEXT NOT NULL,
"SnapshotTime" BIGINT NOT NULL,
"Attempts" INTEGER NOT NULL DEFAULT 0,
"Success" TEXT NOT NULL DEFAULT 'FALSE',
"LastError" TEXT,
"LastAttempt" BIGINT NOT NULL
);
`
if strings.ToLower(dbConn.DriverName()) == "pgx" || strings.ToLower(dbConn.DriverName()) == "postgres" {
ddl = `
CREATE TABLE IF NOT EXISTS snapshot_runs (
"RowId" BIGSERIAL PRIMARY KEY,
"Vcenter" TEXT NOT NULL,
"SnapshotTime" BIGINT NOT NULL,
"Attempts" INTEGER NOT NULL DEFAULT 0,
"Success" TEXT NOT NULL DEFAULT 'FALSE',
"LastError" TEXT,
"LastAttempt" BIGINT NOT NULL
);
`
}
if _, err := dbConn.ExecContext(ctx, ddl); err != nil {
return err
}
indexes := []string{
`CREATE UNIQUE INDEX IF NOT EXISTS snapshot_runs_vc_time_idx ON snapshot_runs ("Vcenter","SnapshotTime")`,
`CREATE INDEX IF NOT EXISTS snapshot_runs_success_idx ON snapshot_runs ("Success")`,
}
for _, idx := range indexes {
if _, err := dbConn.ExecContext(ctx, idx); err != nil {
return err
}
}
return nil
}
// UpsertSnapshotRun updates or inserts snapshot run status.
func UpsertSnapshotRun(ctx context.Context, dbConn *sqlx.DB, vcenter string, snapshotTime time.Time, success bool, errMsg string) error {
if err := EnsureSnapshotRunTable(ctx, dbConn); err != nil {
return err
}
successStr := "FALSE"
if success {
successStr = "TRUE"
}
now := time.Now().Unix()
driver := strings.ToLower(dbConn.DriverName())
switch driver {
case "sqlite":
_, err := dbConn.ExecContext(ctx, `
INSERT INTO snapshot_runs ("Vcenter","SnapshotTime","Attempts","Success","LastError","LastAttempt")
VALUES (?, ?, 1, ?, ?, ?)
ON CONFLICT("Vcenter","SnapshotTime") DO UPDATE SET
"Attempts" = snapshot_runs."Attempts" + 1,
"Success" = excluded."Success",
"LastError" = excluded."LastError",
"LastAttempt" = excluded."LastAttempt"
`, vcenter, snapshotTime.Unix(), successStr, errMsg, now)
return err
case "pgx", "postgres":
_, err := dbConn.ExecContext(ctx, `
INSERT INTO snapshot_runs ("Vcenter","SnapshotTime","Attempts","Success","LastError","LastAttempt")
VALUES ($1, $2, 1, $3, $4, $5)
ON CONFLICT("Vcenter","SnapshotTime") DO UPDATE SET
"Attempts" = snapshot_runs."Attempts" + 1,
"Success" = EXCLUDED."Success",
"LastError" = EXCLUDED."LastError",
"LastAttempt" = EXCLUDED."LastAttempt"
`, vcenter, snapshotTime.Unix(), successStr, errMsg, now)
return err
default:
return fmt.Errorf("unsupported driver for snapshot_runs upsert: %s", driver)
}
}
// ListFailedSnapshotRuns returns vcenter/time pairs needing retry.
func ListFailedSnapshotRuns(ctx context.Context, dbConn *sqlx.DB, maxAttempts int) ([]struct {
Vcenter string
SnapshotTime int64
Attempts int
}, error) {
if maxAttempts <= 0 {
maxAttempts = 3
}
driver := strings.ToLower(dbConn.DriverName())
query := `
SELECT "Vcenter","SnapshotTime","Attempts"
FROM snapshot_runs
WHERE "Success" = 'FALSE' AND "Attempts" < ?
ORDER BY "LastAttempt" ASC
`
args := []interface{}{maxAttempts}
if driver == "pgx" || driver == "postgres" {
query = `
SELECT "Vcenter","SnapshotTime","Attempts"
FROM snapshot_runs
WHERE "Success" = 'FALSE' AND "Attempts" < $1
ORDER BY "LastAttempt" ASC
`
}
type row struct {
Vcenter string `db:"Vcenter"`
SnapshotTime int64 `db:"SnapshotTime"`
Attempts int `db:"Attempts"`
}
rows := []row{}
if err := dbConn.SelectContext(ctx, &rows, query, args...); err != nil {
return nil, err
}
results := make([]struct {
Vcenter string
SnapshotTime int64
Attempts int
}, 0, len(rows))
for _, r := range rows {
results = append(results, struct {
Vcenter string
SnapshotTime int64
Attempts int
}{Vcenter: r.Vcenter, SnapshotTime: r.SnapshotTime, Attempts: r.Attempts})
}
return results, nil
}