From c66679a71f6061685dd07ddc14ce5197c162284e Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Sun, 8 Feb 2026 15:40:42 +1100 Subject: [PATCH] more index cleanups to optimise space --- README.md | 25 +-- db/db.go | 10 +- db/helpers.go | 228 +++++++++++++++++++++++++-- db/helpers_cache_and_index_test.go | 164 +++++++++++++++++++ internal/settings/settings.go | 7 + internal/tasks/inventorySnapshots.go | 23 +++ main.go | 17 +- server/handler/legacy_gate.go | 17 +- server/handler/legacy_gate_test.go | 26 +-- server/router/router.go | 2 +- src/postinstall.sh | 102 +++++++++++- src/vctp.default | 20 ++- src/vctp.yml | 10 ++ 13 files changed, 590 insertions(+), 61 deletions(-) create mode 100644 db/helpers_cache_and_index_test.go diff --git a/README.md b/README.md index 69dacbc..8ba9b74 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Monthly summary rows keep the same aggregate fields as daily summaries and recom The RPM installs the service and defaults under `/usr/bin`, config under `/etc/dtms`, and data under `/var/lib/vctp`: - Binary: `/usr/bin/vctp-linux-amd64` - Systemd unit: `/etc/systemd/system/vctp.service` -- Defaults/env: `/etc/dtms/vctp.yml` (override with `-settings`), `/etc/default/vctp` (environment) +- Defaults/config: `/etc/dtms/vctp.yml` (override with `-settings`), `/etc/default/vctp` (optional env flags) - TLS cert/key: `/etc/dtms/vctp.crt` and `/etc/dtms/vctp.key` (generated if absent) - Data: SQLite DB and reports default to `/var/lib/vctp` (reports under `/var/lib/vctp/reports`) - Scripts: preinstall/postinstall handle directory creation and permissions. @@ -84,7 +84,7 @@ vctp -settings /path/to/vctp.yml -db-cleanup By default the app uses SQLite and creates/opens `db.sqlite3`. PostgreSQL support is currently **experimental** and not a production target. To enable it, -set `VCTP_ENABLE_EXPERIMENTAL_POSTGRES=1` and update the settings file: +set `settings.enable_experimental_postgres: true` in the settings file: - `settings.database_driver`: `sqlite` (default) or `postgres` (experimental) - `settings.database_url`: SQLite file path/DSN or PostgreSQL DSN @@ -93,10 +93,12 @@ Examples: ```yaml settings: database_driver: sqlite + enable_experimental_postgres: false database_url: ./db.sqlite3 settings: database_driver: postgres + enable_experimental_postgres: true database_url: postgres://user:pass@localhost:5432/vctp?sslmode=disable ``` @@ -110,12 +112,10 @@ Hourly and daily snapshot table retention can be configured in the settings file - `settings.daily_snapshot_max_age_months` (default: 12) ## Runtime Environment Flags -These flags are read from the process environment (for example via `/etc/default/vctp` on systemd installs): +These optional flags are read from the process environment (for example via `/etc/default/vctp`): -- `VCTP_ENCRYPTION_KEY`: optional explicit key source for credential encryption/decryption. - Recommended for stable behavior across host migrations/rebuilds. -- `VCTP_ENABLE_EXPERIMENTAL_POSTGRES`: set to `1` to enable experimental PostgreSQL driver startup. -- `VCTP_ENABLE_LEGACY_API`: set to `1` to re-enable deprecated legacy API endpoints temporarily. +- `DAILY_AGG_GO`: set to `1` (default in `src/vctp.default`) to use the Go daily aggregation path. +- `MONTHLY_AGG_GO`: set to `1` (default in `src/vctp.default`) to use the Go monthly aggregation path. ## Credential Encryption Lifecycle At startup, vCTP resolves `settings.vcenter_password` using this order: @@ -132,7 +132,7 @@ Behavior notes: - Legacy encrypted values can still be migrated forward automatically. ## Deprecated API Endpoints -These endpoints are considered legacy and are disabled by default unless `VCTP_ENABLE_LEGACY_API=1`: +These endpoints are considered legacy and are disabled by default unless `settings.enable_legacy_api: true`: - `/api/event/vm/create` - `/api/event/vm/modify` @@ -151,7 +151,8 @@ General: - `settings.log_output`: log format, `text` or `json` Database: -- `settings.database_driver`: `sqlite` or `postgres` (experimental; requires `VCTP_ENABLE_EXPERIMENTAL_POSTGRES=1`) +- `settings.database_driver`: `sqlite` or `postgres` (experimental) +- `settings.enable_experimental_postgres`: set `true` to allow PostgreSQL startup - `settings.database_url`: SQLite file path/DSN or PostgreSQL DSN HTTP/TLS: @@ -162,9 +163,12 @@ HTTP/TLS: - `settings.tls_key_filename`: PEM private key path (TLS mode) vCenter: +- `settings.encryption_key`: optional explicit key source for credential encryption/decryption. + If unset, vCTP derives a host key from hardware/host identity. - `settings.vcenter_username`: vCenter username - `settings.vcenter_password`: vCenter password (auto-encrypted on startup if plaintext length > 2) - `settings.vcenter_insecure`: `true` to skip TLS verification +- `settings.enable_legacy_api`: set `true` to temporarily re-enable deprecated legacy endpoints - `settings.vcenter_event_polling_seconds`: deprecated and ignored - `settings.vcenter_inventory_polling_seconds`: deprecated and ignored - `settings.vcenter_inventory_snapshot_seconds`: hourly snapshot cadence (seconds) @@ -172,14 +176,13 @@ vCenter: - `settings.vcenter_addresses`: list of vCenter SDK URLs to monitor Credential encryption: -- `VCTP_ENCRYPTION_KEY`: optional environment variable used to derive the encryption key. - If unset, vCTP derives a host key from hardware/host identity. - New encrypted values are written with `enc:v1:` prefix. Snapshots: - `settings.hourly_snapshot_concurrency`: max concurrent vCenter snapshots (0 = unlimited) - `settings.hourly_snapshot_max_age_days`: retention for hourly tables - `settings.daily_snapshot_max_age_months`: retention for daily tables +- `settings.hourly_index_max_age_days`: age gate for keeping per-hourly-table indexes (`-1` disables cleanup, `0` trims all) - `settings.snapshot_cleanup_cron`: cron expression for cleanup job - `settings.reports_dir`: directory to store generated XLSX reports (default: `/var/lib/vctp/reports`) - `settings.hourly_snapshot_retry_seconds`: interval for retrying failed hourly snapshots (default: 300 seconds) diff --git a/db/db.go b/db/db.go index 2ee6d51..ea2bc2b 100644 --- a/db/db.go +++ b/db/db.go @@ -5,7 +5,6 @@ import ( "embed" "fmt" "log/slog" - "os" "reflect" "strings" @@ -24,8 +23,9 @@ type Database interface { } type Config struct { - Driver string - DSN string + Driver string + DSN string + EnableExperimentalPostgres bool } func New(logger *slog.Logger, cfg Config) (Database, error) { @@ -42,8 +42,8 @@ func New(logger *slog.Logger, cfg Config) (Database, error) { return db, nil case "postgres": // The sqlc query set is SQLite-first. Keep Postgres opt-in until full parity is validated. - if strings.TrimSpace(os.Getenv("VCTP_ENABLE_EXPERIMENTAL_POSTGRES")) != "1" { - return nil, fmt.Errorf("postgres driver is disabled by default; set VCTP_ENABLE_EXPERIMENTAL_POSTGRES=1 to enable experimental mode") + if !cfg.EnableExperimentalPostgres { + return nil, fmt.Errorf("postgres driver is disabled by default; set settings.enable_experimental_postgres=true to enable experimental mode") } db, err := newPostgresDB(logger, cfg.DSN) if err != nil { diff --git a/db/helpers.go b/db/helpers.go index 0d9c145..8835d27 100644 --- a/db/helpers.go +++ b/db/helpers.go @@ -7,6 +7,7 @@ import ( "fmt" "log/slog" "sort" + "strconv" "strings" "time" @@ -430,6 +431,16 @@ ORDER BY tablename DESC // CleanupHourlySnapshotIndexes drops low-value per-table indexes on hourly snapshot tables. func CleanupHourlySnapshotIndexes(ctx context.Context, dbConn *sqlx.DB) (int, error) { + return cleanupHourlySnapshotIndexes(ctx, dbConn, 0) +} + +// CleanupHourlySnapshotIndexesOlderThan drops per-table hourly indexes for snapshot tables older than cutoff. +// cutoff <= 0 means drop across all hourly tables. +func CleanupHourlySnapshotIndexesOlderThan(ctx context.Context, dbConn *sqlx.DB, cutoff time.Time) (int, error) { + return cleanupHourlySnapshotIndexes(ctx, dbConn, cutoff.Unix()) +} + +func cleanupHourlySnapshotIndexes(ctx context.Context, dbConn *sqlx.DB, cutoffUnix int64) (int, error) { driver := strings.ToLower(dbConn.DriverName()) if driver != "sqlite" { return 0, fmt.Errorf("hourly snapshot index cleanup is only supported for sqlite") @@ -438,25 +449,68 @@ func CleanupHourlySnapshotIndexes(ctx context.Context, dbConn *sqlx.DB) (int, er if err != nil { return 0, err } + + var existing []struct { + Name string `db:"name"` + } + if err := selectLog(ctx, dbConn, &existing, ` +SELECT name +FROM sqlite_master +WHERE type = 'index' + AND tbl_name LIKE 'inventory_hourly_%' +`); err != nil { + return 0, err + } + existingSet := make(map[string]struct{}, len(existing)) + for _, idx := range existing { + existingSet[idx.Name] = struct{}{} + } + dropped := 0 for _, tableName := range tables { if _, err := SafeTableName(tableName); err != nil { continue } - indexes := []string{ - fmt.Sprintf(`DROP INDEX IF EXISTS %s_snapshottime_idx`, tableName), - fmt.Sprintf(`DROP INDEX IF EXISTS %s_resourcepool_idx`, tableName), + snapshotUnix, ok := parseHourlySnapshotUnix(tableName) + if !ok { + continue } - for _, stmt := range indexes { - if _, err := execLog(ctx, dbConn, stmt); err != nil { + if cutoffUnix > 0 && snapshotUnix >= cutoffUnix { + continue + } + + indexNames := []string{ + fmt.Sprintf("%s_vm_vcenter_idx", tableName), + fmt.Sprintf("%s_snapshottime_idx", tableName), + fmt.Sprintf("%s_resourcepool_idx", tableName), + } + for _, indexName := range indexNames { + if _, exists := existingSet[indexName]; !exists { + continue + } + if _, err := execLog(ctx, dbConn, fmt.Sprintf(`DROP INDEX IF EXISTS %s`, indexName)); err != nil { return dropped, err } + delete(existingSet, indexName) dropped++ } } return dropped, nil } +func parseHourlySnapshotUnix(tableName string) (int64, bool) { + const prefix = "inventory_hourly_" + if !strings.HasPrefix(tableName, prefix) { + return 0, false + } + suffix := strings.TrimPrefix(tableName, prefix) + unix, err := strconv.ParseInt(suffix, 10, 64) + if err != nil || unix <= 0 { + return 0, false + } + return unix, true +} + // BackfillSerialColumn sets missing values in a serial-like column for Postgres tables. func BackfillSerialColumn(ctx context.Context, dbConn *sqlx.DB, tableName, columnName string) error { if err := ValidateTableName(tableName); err != nil { @@ -550,6 +604,7 @@ CREATE TABLE IF NOT EXISTS vm_hourly_stats ( return err } _, _ = execLog(ctx, dbConn, `CREATE INDEX IF NOT EXISTS vm_hourly_stats_vmuuid_time_idx ON vm_hourly_stats ("VmUuid","SnapshotTime")`) + _, _ = execLog(ctx, dbConn, `CREATE INDEX IF NOT EXISTS vm_hourly_stats_vmid_time_idx ON vm_hourly_stats ("VmId","SnapshotTime")`) _, _ = execLog(ctx, dbConn, `CREATE INDEX IF NOT EXISTS vm_hourly_stats_snapshottime_idx ON vm_hourly_stats ("SnapshotTime")`) return nil } @@ -1243,8 +1298,38 @@ type VmLifecycle struct { } // FetchVmTrace returns combined hourly snapshot records for a VM (by id/uuid/name) ordered by snapshot time. -// To avoid SQLite's UNION term limits, this iterates tables one by one and merges in-memory. +// It prefers the shared vm_hourly_stats history table and falls back to per-snapshot tables. func FetchVmTrace(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) ([]VmTraceRow, error) { + if TableExists(ctx, dbConn, "vm_hourly_stats") { + rows, err := fetchVmTraceFromHourlyCache(ctx, dbConn, vmID, vmUUID, name) + if err != nil { + slog.Warn("vm trace cache query failed; falling back to hourly tables", "error", err) + } else if len(rows) > 0 { + slog.Debug("vm trace loaded from hourly cache", "row_count", len(rows)) + return rows, nil + } + } + return fetchVmTraceFromSnapshotTables(ctx, dbConn, vmID, vmUUID, name) +} + +func fetchVmTraceFromHourlyCache(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) ([]VmTraceRow, error) { + query := ` +SELECT "SnapshotTime","Name","Vcenter","VmId","VmUuid","ResourcePool","VcpuCount","RamGB","ProvisionedDisk", + COALESCE("CreationTime",0) AS "CreationTime", + COALESCE("DeletionTime",0) AS "DeletionTime" +FROM vm_hourly_stats +WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) +ORDER BY "SnapshotTime" +` + query = dbConn.Rebind(query) + var rows []VmTraceRow + if err := selectLog(ctx, dbConn, &rows, query, vmID, vmUUID, name); err != nil { + return nil, err + } + return rows, nil +} + +func fetchVmTraceFromSnapshotTables(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) ([]VmTraceRow, error) { var tables []struct { TableName string `db:"table_name"` SnapshotTime int64 `db:"snapshot_time"` @@ -1281,7 +1366,6 @@ WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) `, t.SnapshotTime, t.TableName) args := []interface{}{vmID, vmUUID, name} if driver != "sqlite" { - // convert ? to $1 style for postgres/pgx query = strings.Replace(query, "?", "$1", 1) query = strings.Replace(query, "?", "$2", 1) query = strings.Replace(query, "?", "$3", 1) @@ -1302,8 +1386,134 @@ WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) return rows, nil } -// FetchVmLifecycle walks hourly snapshots to determine lifecycle bounds for a VM. +// FetchVmLifecycle walks VM history data to determine lifecycle bounds for a VM. +// It prefers vm_hourly_stats + vm_lifecycle_cache and falls back to per-snapshot table probes. func FetchVmLifecycle(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) (VmLifecycle, error) { + if TableExists(ctx, dbConn, "vm_hourly_stats") { + lifecycle, found, err := fetchVmLifecycleFromHourlyCache(ctx, dbConn, vmID, vmUUID, name) + if err != nil { + slog.Warn("vm lifecycle cache query failed; falling back to hourly tables", "error", err) + } else if found { + if TableExists(ctx, dbConn, "vm_lifecycle_cache") { + cached, cachedFound, cacheErr := fetchVmLifecycleFromLifecycleCache(ctx, dbConn, vmID, vmUUID, name) + if cacheErr != nil { + slog.Warn("vm lifecycle cache lookup failed", "error", cacheErr) + } else if cachedFound { + lifecycle = mergeVmLifecycle(lifecycle, cached) + } + } + return lifecycle, nil + } + } + return fetchVmLifecycleFromSnapshotTables(ctx, dbConn, vmID, vmUUID, name) +} + +func fetchVmLifecycleFromHourlyCache(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) (VmLifecycle, bool, error) { + var row struct { + Rows int64 `db:"rows"` + Creation sql.NullInt64 `db:"creation_time"` + FirstSeen sql.NullInt64 `db:"first_seen"` + LastSeen sql.NullInt64 `db:"last_seen"` + Deletion sql.NullInt64 `db:"deletion_time"` + } + query := ` +SELECT + COUNT(1) AS rows, + MIN(NULLIF("CreationTime",0)) AS creation_time, + MIN("SnapshotTime") AS first_seen, + MAX("SnapshotTime") AS last_seen, + MIN(NULLIF("DeletionTime",0)) AS deletion_time +FROM vm_hourly_stats +WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) +` + query = dbConn.Rebind(query) + if err := getLog(ctx, dbConn, &row, query, vmID, vmUUID, name); err != nil { + return VmLifecycle{}, false, err + } + if row.Rows == 0 { + return VmLifecycle{}, false, nil + } + lifecycle := VmLifecycle{ + FirstSeen: row.FirstSeen.Int64, + LastSeen: row.LastSeen.Int64, + } + if row.Creation.Valid && row.Creation.Int64 > 0 { + lifecycle.CreationTime = row.Creation.Int64 + lifecycle.CreationApprox = false + } else if row.FirstSeen.Valid && row.FirstSeen.Int64 > 0 { + lifecycle.CreationTime = row.FirstSeen.Int64 + lifecycle.CreationApprox = true + } + if row.Deletion.Valid && row.Deletion.Int64 > 0 { + lifecycle.DeletionTime = row.Deletion.Int64 + } + return lifecycle, true, nil +} + +func fetchVmLifecycleFromLifecycleCache(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) (VmLifecycle, bool, error) { + var row struct { + Rows int64 `db:"rows"` + FirstSeen sql.NullInt64 `db:"first_seen"` + LastSeen sql.NullInt64 `db:"last_seen"` + Deletion sql.NullInt64 `db:"deletion_time"` + } + query := ` +SELECT + COUNT(1) AS rows, + MIN(NULLIF("FirstSeen",0)) AS first_seen, + MAX(NULLIF("LastSeen",0)) AS last_seen, + MIN(NULLIF("DeletedAt",0)) AS deletion_time +FROM vm_lifecycle_cache +WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) +` + query = dbConn.Rebind(query) + if err := getLog(ctx, dbConn, &row, query, vmID, vmUUID, name); err != nil { + return VmLifecycle{}, false, err + } + if row.Rows == 0 { + return VmLifecycle{}, false, nil + } + lifecycle := VmLifecycle{ + FirstSeen: row.FirstSeen.Int64, + LastSeen: row.LastSeen.Int64, + } + if row.FirstSeen.Valid && row.FirstSeen.Int64 > 0 { + lifecycle.CreationTime = row.FirstSeen.Int64 + lifecycle.CreationApprox = true + } + if row.Deletion.Valid && row.Deletion.Int64 > 0 { + lifecycle.DeletionTime = row.Deletion.Int64 + } + return lifecycle, true, nil +} + +func mergeVmLifecycle(base, overlay VmLifecycle) VmLifecycle { + out := base + if overlay.FirstSeen > 0 && (out.FirstSeen == 0 || overlay.FirstSeen < out.FirstSeen) { + out.FirstSeen = overlay.FirstSeen + } + if overlay.LastSeen > out.LastSeen { + out.LastSeen = overlay.LastSeen + } + if overlay.DeletionTime > 0 && (out.DeletionTime == 0 || overlay.DeletionTime < out.DeletionTime) { + out.DeletionTime = overlay.DeletionTime + } + if overlay.CreationTime > 0 { + if out.CreationTime == 0 || overlay.CreationTime < out.CreationTime { + out.CreationTime = overlay.CreationTime + out.CreationApprox = overlay.CreationApprox + } else if out.CreationTime == overlay.CreationTime && !overlay.CreationApprox { + out.CreationApprox = false + } + } + if out.CreationTime == 0 && out.FirstSeen > 0 { + out.CreationTime = out.FirstSeen + out.CreationApprox = true + } + return out +} + +func fetchVmLifecycleFromSnapshotTables(ctx context.Context, dbConn *sqlx.DB, vmID, vmUUID, name string) (VmLifecycle, error) { var lifecycle VmLifecycle var tables []struct { TableName string `db:"table_name"` @@ -1325,7 +1535,6 @@ ORDER BY snapshot_time if err := ValidateTableName(t.TableName); err != nil { continue } - // Probe this table for the VM. query := fmt.Sprintf(` SELECT MIN(NULLIF("CreationTime",0)) AS min_creation, COUNT(1) AS cnt FROM %s @@ -1362,7 +1571,6 @@ WHERE ("VmId" = ? OR "VmUuid" = ? OR lower("Name") = lower(?)) break } } else { - // reset if we haven't seen the VM yet consecutiveMissing = 0 } } diff --git a/db/helpers_cache_and_index_test.go b/db/helpers_cache_and_index_test.go new file mode 100644 index 0000000..2fd57bf --- /dev/null +++ b/db/helpers_cache_and_index_test.go @@ -0,0 +1,164 @@ +package db + +import ( + "context" + "database/sql" + "fmt" + "testing" + "time" + + "github.com/jmoiron/sqlx" + _ "modernc.org/sqlite" +) + +func newTestSQLiteDB(t *testing.T) *sqlx.DB { + t.Helper() + dbConn, err := sqlx.Open("sqlite", ":memory:") + if err != nil { + t.Fatalf("failed to open sqlite test db: %v", err) + } + t.Cleanup(func() { + _ = dbConn.Close() + }) + return dbConn +} + +func indexExists(t *testing.T, dbConn *sqlx.DB, name string) bool { + t.Helper() + var count int + if err := dbConn.Get(&count, `SELECT COUNT(1) FROM sqlite_master WHERE type='index' AND name=?`, name); err != nil { + t.Fatalf("failed to query index %s: %v", name, err) + } + return count > 0 +} + +func TestCleanupHourlySnapshotIndexesOlderThan(t *testing.T) { + ctx := context.Background() + dbConn := newTestSQLiteDB(t) + + oldTable := "inventory_hourly_1700000000" + newTable := "inventory_hourly_1800000000" + for _, table := range []string{oldTable, newTable} { + if err := EnsureSnapshotTable(ctx, dbConn, table); err != nil { + t.Fatalf("failed to create snapshot table %s: %v", table, err) + } + if _, err := dbConn.ExecContext(ctx, fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_snapshottime_idx ON %s ("SnapshotTime")`, table, table)); err != nil { + t.Fatalf("failed to create snapshottime index for %s: %v", table, err) + } + if _, err := dbConn.ExecContext(ctx, fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_resourcepool_idx ON %s ("ResourcePool")`, table, table)); err != nil { + t.Fatalf("failed to create resourcepool index for %s: %v", table, err) + } + } + + cutoff := time.Unix(1750000000, 0) + dropped, err := CleanupHourlySnapshotIndexesOlderThan(ctx, dbConn, cutoff) + if err != nil { + t.Fatalf("cleanup failed: %v", err) + } + if dropped != 3 { + t.Fatalf("expected 3 old indexes dropped, got %d", dropped) + } + + oldIndexes := []string{ + oldTable + "_vm_vcenter_idx", + oldTable + "_snapshottime_idx", + oldTable + "_resourcepool_idx", + } + for _, idx := range oldIndexes { + if indexExists(t, dbConn, idx) { + t.Fatalf("expected old index %s to be removed", idx) + } + } + + newIndexes := []string{ + newTable + "_vm_vcenter_idx", + newTable + "_snapshottime_idx", + newTable + "_resourcepool_idx", + } + for _, idx := range newIndexes { + if !indexExists(t, dbConn, idx) { + t.Fatalf("expected recent index %s to remain", idx) + } + } +} + +func TestFetchVmTraceAndLifecycleUseCacheTables(t *testing.T) { + ctx := context.Background() + dbConn := newTestSQLiteDB(t) + + if err := EnsureVmHourlyStats(ctx, dbConn); err != nil { + t.Fatalf("failed to ensure vm_hourly_stats: %v", err) + } + if err := EnsureVmLifecycleCache(ctx, dbConn); err != nil { + t.Fatalf("failed to ensure vm_lifecycle_cache: %v", err) + } + + insertSQL := ` +INSERT INTO vm_hourly_stats ( + "SnapshotTime","Vcenter","VmId","VmUuid","Name","CreationTime","DeletionTime","ResourcePool", + "Datacenter","Cluster","Folder","ProvisionedDisk","VcpuCount","RamGB","IsTemplate","PoweredOn","SrmPlaceholder" +) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) +` + rows := [][]interface{}{ + {int64(1000), "vc-a", "vm-1", "uuid-1", "demo-vm", int64(900), int64(0), "Tin", "dc", "cluster", "folder", 100.0, int64(2), int64(4), "FALSE", "TRUE", "FALSE"}, + {int64(2000), "vc-a", "vm-1", "uuid-1", "demo-vm", int64(900), int64(0), "Gold", "dc", "cluster", "folder", 150.0, int64(4), int64(8), "FALSE", "TRUE", "FALSE"}, + } + for _, args := range rows { + if _, err := dbConn.ExecContext(ctx, insertSQL, args...); err != nil { + t.Fatalf("failed to insert hourly cache row: %v", err) + } + } + + if err := UpsertVmLifecycleCache(ctx, dbConn, "vc-a", "vm-1", "uuid-1", "demo-vm", "cluster", time.Unix(1000, 0), sql.NullInt64{Int64: 900, Valid: true}); err != nil { + t.Fatalf("failed to upsert lifecycle cache: %v", err) + } + if err := MarkVmDeletedWithDetails(ctx, dbConn, "vc-a", "vm-1", "uuid-1", "demo-vm", "cluster", 2500); err != nil { + t.Fatalf("failed to mark vm deleted: %v", err) + } + + traceRows, err := FetchVmTrace(ctx, dbConn, "vm-1", "", "") + if err != nil { + t.Fatalf("FetchVmTrace failed: %v", err) + } + if len(traceRows) != 2 { + t.Fatalf("expected 2 trace rows, got %d", len(traceRows)) + } + if traceRows[0].SnapshotTime != 1000 || traceRows[1].SnapshotTime != 2000 { + t.Fatalf("trace rows are not sorted by snapshot time: %#v", traceRows) + } + + lifecycle, err := FetchVmLifecycle(ctx, dbConn, "vm-1", "", "") + if err != nil { + t.Fatalf("FetchVmLifecycle failed: %v", err) + } + if lifecycle.FirstSeen != 900 { + t.Fatalf("expected FirstSeen=900 (earliest known from lifecycle cache), got %d", lifecycle.FirstSeen) + } + if lifecycle.LastSeen != 2000 { + t.Fatalf("expected LastSeen=2000, got %d", lifecycle.LastSeen) + } + if lifecycle.CreationTime != 900 || lifecycle.CreationApprox { + t.Fatalf("expected exact CreationTime=900, got time=%d approx=%v", lifecycle.CreationTime, lifecycle.CreationApprox) + } + if lifecycle.DeletionTime != 2500 { + t.Fatalf("expected DeletionTime=2500 from lifecycle cache, got %d", lifecycle.DeletionTime) + } +} + +func TestParseHourlySnapshotUnix(t *testing.T) { + cases := []struct { + table string + ok bool + val int64 + }{ + {table: "inventory_hourly_1700000000", ok: true, val: 1700000000}, + {table: "inventory_hourly_bad", ok: false, val: 0}, + {table: "inventory_daily_summary_20260101", ok: false, val: 0}, + } + for _, tc := range cases { + got, ok := parseHourlySnapshotUnix(tc.table) + if ok != tc.ok || got != tc.val { + t.Fatalf("parseHourlySnapshotUnix(%q) = (%d,%v), expected (%d,%v)", tc.table, got, ok, tc.val, tc.ok) + } + } +} diff --git a/internal/settings/settings.go b/internal/settings/settings.go index 22ee4b0..3a15ee7 100644 --- a/internal/settings/settings.go +++ b/internal/settings/settings.go @@ -24,14 +24,17 @@ type SettingsYML struct { LogOutput string `yaml:"log_output"` DatabaseDriver string `yaml:"database_driver"` DatabaseURL string `yaml:"database_url"` + EnableExperimentalPostgres bool `yaml:"enable_experimental_postgres"` BindIP string `yaml:"bind_ip"` BindPort int `yaml:"bind_port"` BindDisableTLS bool `yaml:"bind_disable_tls"` TLSCertFilename string `yaml:"tls_cert_filename"` TLSKeyFilename string `yaml:"tls_key_filename"` + EncryptionKey string `yaml:"encryption_key"` VcenterUsername string `yaml:"vcenter_username"` VcenterPassword string `yaml:"vcenter_password"` VcenterInsecure bool `yaml:"vcenter_insecure"` + EnableLegacyAPI bool `yaml:"enable_legacy_api"` VcenterEventPollingSeconds int `yaml:"vcenter_event_polling_seconds"` VcenterInventoryPollingSeconds int `yaml:"vcenter_inventory_polling_seconds"` VcenterInventorySnapshotSeconds int `yaml:"vcenter_inventory_snapshot_seconds"` @@ -39,6 +42,7 @@ type SettingsYML struct { HourlySnapshotConcurrency int `yaml:"hourly_snapshot_concurrency"` HourlySnapshotMaxAgeDays int `yaml:"hourly_snapshot_max_age_days"` DailySnapshotMaxAgeMonths int `yaml:"daily_snapshot_max_age_months"` + HourlyIndexMaxAgeDays int `yaml:"hourly_index_max_age_days"` SnapshotCleanupCron string `yaml:"snapshot_cleanup_cron"` ReportsDir string `yaml:"reports_dir"` HourlyJobTimeoutSeconds int `yaml:"hourly_job_timeout_seconds"` @@ -95,6 +99,9 @@ func (s *Settings) ReadYMLSettings() error { // Avoid logging sensitive fields (e.g., credentials). redacted := settings redacted.Settings.VcenterPassword = "REDACTED" + if redacted.Settings.EncryptionKey != "" { + redacted.Settings.EncryptionKey = "REDACTED" + } s.Logger.Debug("Updating settings", "settings", redacted) s.Values = &settings diff --git a/internal/tasks/inventorySnapshots.go b/internal/tasks/inventorySnapshots.go index 1eb34f7..bb071fa 100644 --- a/internal/tasks/inventorySnapshots.go +++ b/internal/tasks/inventorySnapshots.go @@ -315,6 +315,10 @@ func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) now := time.Now() hourlyMaxDays := intWithDefault(c.Settings.Values.Settings.HourlySnapshotMaxAgeDays, 60) dailyMaxMonths := intWithDefault(c.Settings.Values.Settings.DailySnapshotMaxAgeMonths, 12) + hourlyIndexMaxAgeDays := 7 + if c.Settings != nil && c.Settings.Values != nil { + hourlyIndexMaxAgeDays = intWithDefault(c.Settings.Values.Settings.HourlyIndexMaxAgeDays, 7) + } hourlyCutoff := now.AddDate(0, 0, -hourlyMaxDays) dailyCutoff := now.AddDate(0, -dailyMaxMonths, 0) @@ -324,6 +328,7 @@ func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) "daily_cutoff", truncateDate(dailyCutoff), "hourly_max_age_days", hourlyMaxDays, "daily_max_age_months", dailyMaxMonths, + "hourly_index_max_age_days", hourlyIndexMaxAgeDays, ) dbConn := c.Database.DB() @@ -382,13 +387,31 @@ func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) } } + trimmedHourlyIndexes := 0 + if hourlyIndexMaxAgeDays >= 0 && db.TableExists(ctx, dbConn, "vm_hourly_stats") { + indexCutoff := truncateDate(now.AddDate(0, 0, -hourlyIndexMaxAgeDays)) + trimmed, trimErr := db.CleanupHourlySnapshotIndexesOlderThan(ctx, dbConn, indexCutoff) + if trimErr != nil { + c.Logger.Warn("failed to cleanup old hourly snapshot indexes", "error", trimErr, "index_cutoff", indexCutoff) + } else { + trimmedHourlyIndexes = trimmed + c.Logger.Info("Snapshot cleanup hourly index trim", + "index_cutoff", indexCutoff, + "trimmed_indexes", trimmedHourlyIndexes, + "hourly_index_max_age_days", hourlyIndexMaxAgeDays, + ) + } + } + c.Logger.Info("Finished snapshot cleanup", "hourly_tables_scanned", scannedHourly, "daily_tables_scanned", scannedDaily, "removed_hourly_tables", removedHourly, "removed_daily_tables", removedDaily, + "trimmed_hourly_indexes", trimmedHourlyIndexes, "hourly_max_age_days", hourlyMaxDays, "daily_max_age_months", dailyMaxMonths, + "hourly_index_max_age_days", hourlyIndexMaxAgeDays, ) return nil } diff --git a/main.go b/main.go index e5526a7..9544ddb 100644 --- a/main.go +++ b/main.go @@ -35,7 +35,6 @@ var ( const ( encryptedVcenterPasswordPrefix = "enc:v1:" - encryptionKeyEnvVar = "VCTP_ENCRYPTION_KEY" legacyFallbackEncryptionKey = "5L1l3B5KvwOCzUHMAlCgsgUTRAYMfSpa" ) @@ -80,7 +79,11 @@ func main() { dbURL = utils.GetFilePath("db.sqlite3") } - database, err := db.New(logger, db.Config{Driver: normalizedDriver, DSN: dbURL}) + database, err := db.New(logger, db.Config{ + Driver: normalizedDriver, + DSN: dbURL, + EnableExperimentalPostgres: s.Values.Settings.EnableExperimentalPostgres, + }) if err != nil { logger.Error("Failed to create database", "error", err) os.Exit(1) @@ -143,7 +146,7 @@ func main() { } // Load vcenter credentials from settings, decrypt if required. - encKey := deriveEncryptionKey(logger, *settingsPath) + encKey := deriveEncryptionKey(logger, *settingsPath, s.Values.Settings.EncryptionKey) a := secrets.New(logger, encKey) legacyDecryptKeys := deriveLegacyDecryptionKeys(*settingsPath, encKey) vcEp := strings.TrimSpace(s.Values.Settings.VcenterPassword) @@ -456,10 +459,10 @@ func deriveLegacyDecryptionKeys(settingsPath string, activeKey []byte) [][]byte return legacyKeys } -func deriveEncryptionKey(logger *slog.Logger, settingsPath string) []byte { - if provided := strings.TrimSpace(os.Getenv(encryptionKeyEnvVar)); provided != "" { +func deriveEncryptionKey(logger *slog.Logger, settingsPath string, configuredKey string) []byte { + if provided := strings.TrimSpace(configuredKey); provided != "" { sum := sha256.Sum256([]byte(provided)) - logger.Debug("derived encryption key from environment variable", "env_var", encryptionKeyEnvVar) + logger.Debug("derived encryption key from settings", "setting", "settings.encryption_key") return sum[:] } @@ -470,7 +473,7 @@ func deriveEncryptionKey(logger *slog.Logger, settingsPath string) []byte { case "machine-id": logger.Debug("derived encryption key from machine-id") default: - logger.Warn("using host-derived encryption key fallback; set environment variable for explicit key", "env_var", encryptionKeyEnvVar) + logger.Warn("using host-derived encryption key fallback; set settings.encryption_key for an explicit key") } return key } diff --git a/server/handler/legacy_gate.go b/server/handler/legacy_gate.go index 15830c9..d586f8c 100644 --- a/server/handler/legacy_gate.go +++ b/server/handler/legacy_gate.go @@ -3,21 +3,22 @@ package handler import ( "fmt" "net/http" - "os" - "strings" ) -const legacyAPIEnvVar = "VCTP_ENABLE_LEGACY_API" +const legacyAPISetting = "settings.enable_legacy_api" -func legacyAPIEnabled() bool { - return strings.TrimSpace(os.Getenv(legacyAPIEnvVar)) == "1" +func (h *Handler) legacyAPIEnabled() bool { + if h == nil || h.Settings == nil || h.Settings.Values == nil { + return false + } + return h.Settings.Values.Settings.EnableLegacyAPI } func (h *Handler) denyLegacyAPI(w http.ResponseWriter, endpoint string) bool { - if legacyAPIEnabled() { + if h.legacyAPIEnabled() { return false } - h.Logger.Warn("legacy endpoint request blocked", "endpoint", endpoint, "env_var", legacyAPIEnvVar) - writeJSONError(w, http.StatusGone, fmt.Sprintf("%s is deprecated and disabled; set %s=1 to temporarily re-enable", endpoint, legacyAPIEnvVar)) + h.Logger.Warn("legacy endpoint request blocked", "endpoint", endpoint, "setting", legacyAPISetting) + writeJSONError(w, http.StatusGone, fmt.Sprintf("%s is deprecated and disabled; set %s=true to temporarily re-enable", endpoint, legacyAPISetting)) return true } diff --git a/server/handler/legacy_gate_test.go b/server/handler/legacy_gate_test.go index cd34493..e49405f 100644 --- a/server/handler/legacy_gate_test.go +++ b/server/handler/legacy_gate_test.go @@ -5,11 +5,20 @@ import ( "net/http/httptest" "strings" "testing" + "vctp/internal/settings" ) +func newLegacyGateHandler(enabled bool) *Handler { + cfg := &settings.Settings{Values: &settings.SettingsYML{}} + cfg.Values.Settings.EnableLegacyAPI = enabled + return &Handler{ + Logger: newTestLogger(), + Settings: cfg, + } +} + func TestDenyLegacyAPIDisabledByDefault(t *testing.T) { - t.Setenv(legacyAPIEnvVar, "") - h := &Handler{Logger: newTestLogger()} + h := newLegacyGateHandler(false) rr := httptest.NewRecorder() denied := h.denyLegacyAPI(rr, "/api/event/vm/create") @@ -24,14 +33,13 @@ func TestDenyLegacyAPIDisabledByDefault(t *testing.T) { } } -func TestDenyLegacyAPIEnabledViaEnv(t *testing.T) { - t.Setenv(legacyAPIEnvVar, "1") - h := &Handler{Logger: newTestLogger()} +func TestDenyLegacyAPIEnabledViaSettings(t *testing.T) { + h := newLegacyGateHandler(true) rr := httptest.NewRecorder() denied := h.denyLegacyAPI(rr, "/api/event/vm/create") if denied { - t.Fatal("expected legacy API to be allowed when env var is set") + t.Fatal("expected legacy API to be allowed when setting is enabled") } if rr.Body.Len() != 0 { t.Fatalf("expected no response body write, got: %s", rr.Body.String()) @@ -39,10 +47,8 @@ func TestDenyLegacyAPIEnabledViaEnv(t *testing.T) { } func TestVmCreateEventHonorsLegacyGate(t *testing.T) { - h := &Handler{Logger: newTestLogger()} - t.Run("disabled", func(t *testing.T) { - t.Setenv(legacyAPIEnvVar, "") + h := newLegacyGateHandler(false) req := httptest.NewRequest(http.MethodPost, "/api/event/vm/create", strings.NewReader("{invalid")) rr := httptest.NewRecorder() h.VmCreateEvent(rr, req) @@ -52,7 +58,7 @@ func TestVmCreateEventHonorsLegacyGate(t *testing.T) { }) t.Run("enabled", func(t *testing.T) { - t.Setenv(legacyAPIEnvVar, "1") + h := newLegacyGateHandler(true) req := httptest.NewRequest(http.MethodPost, "/api/event/vm/create", strings.NewReader("{invalid")) rr := httptest.NewRecorder() h.VmCreateEvent(rr, req) diff --git a/server/router/router.go b/server/router/router.go index 67ba361..fa46728 100644 --- a/server/router/router.go +++ b/server/router/router.go @@ -55,7 +55,7 @@ func New(logger *slog.Logger, database db.Database, buildTime string, sha1ver st // add missing data to VMs //mux.HandleFunc("/api/inventory/vm/update", h.VmUpdateDetails) - // Legacy/maintenance endpoints are gated by VCTP_ENABLE_LEGACY_API. + // Legacy/maintenance endpoints are gated by settings.enable_legacy_api. mux.HandleFunc("/api/cleanup/updates", h.UpdateCleanup) //mux.HandleFunc("/api/cleanup/vcenter", h.VcCleanup) diff --git a/src/postinstall.sh b/src/postinstall.sh index 4b89ac8..776a275 100644 --- a/src/postinstall.sh +++ b/src/postinstall.sh @@ -1,8 +1,108 @@ #!/bin/bash +TARGET_CFG="/etc/dtms/vctp.yml" +SOURCE_CFG="${TARGET_CFG}.rpmnew" + +extract_setting_key_lines() { + local file="$1" + awk ' + /^settings:[[:space:]]*$/ { in_settings = 1; next } + in_settings && /^[^[:space:]]/ { in_settings = 0 } + in_settings && $0 ~ /^ [A-Za-z0-9_]+:[[:space:]]*/ { + key = $1 + sub(":", "", key) + print key "\t" $0 + } + ' "$file" +} + +merge_missing_settings_from_rpmnew() { + local target="$1" + local source="$2" + + if [ ! -f "$target" ] || [ ! -f "$source" ]; then + return 0 + fi + + local src_pairs target_pairs missing_lines merged_file + src_pairs="$(mktemp /tmp/vctp-postinstall-src-XXXXXX)" || return 0 + target_pairs="$(mktemp /tmp/vctp-postinstall-target-XXXXXX)" || { rm -f "$src_pairs"; return 0; } + missing_lines="$(mktemp /tmp/vctp-postinstall-missing-XXXXXX)" || { + rm -f "$src_pairs" "$target_pairs" + return 0 + } + merged_file="$(mktemp /tmp/vctp-postinstall-merged-XXXXXX)" || { + rm -f "$src_pairs" "$target_pairs" "$missing_lines" + return 0 + } + + extract_setting_key_lines "$source" > "$src_pairs" + extract_setting_key_lines "$target" > "$target_pairs" + + declare -A existing_keys=() + while IFS=$'\t' read -r key _; do + [ -n "$key" ] || continue + existing_keys["$key"]=1 + done < "$target_pairs" + + local added=0 + : > "$missing_lines" + while IFS=$'\t' read -r key line; do + [ -n "$key" ] || continue + if [ -z "${existing_keys[$key]+x}" ]; then + if [ "$added" -eq 0 ]; then + echo " # Added automatically by RPM postinstall from vctp.yml.rpmnew defaults." >> "$missing_lines" + fi + echo "$line" >> "$missing_lines" + existing_keys["$key"]=1 + added=$((added + 1)) + fi + done < "$src_pairs" + + if [ "$added" -gt 0 ]; then + awk -v missing_file="$missing_lines" ' + function print_missing( line) { + while ((getline line < missing_file) > 0) { + print line + } + close(missing_file) + } + BEGIN { in_settings = 0; inserted = 0 } + { + if ($0 ~ /^settings:[[:space:]]*$/) { + in_settings = 1 + print + next + } + if (in_settings && $0 ~ /^[^[:space:]]/) { + if (!inserted) { + print_missing() + inserted = 1 + } + in_settings = 0 + } + print + } + END { + if (in_settings && !inserted) { + print_missing() + } + } + ' "$target" > "$merged_file" && cat "$merged_file" > "$target" + + if [ "$?" -eq 0 ]; then + echo "vCTP postinstall: added ${added} missing settings key(s) to ${target}" + fi + fi + + rm -f "$src_pairs" "$target_pairs" "$missing_lines" "$merged_file" +} + +merge_missing_settings_from_rpmnew "$TARGET_CFG" "$SOURCE_CFG" || : + if command -v systemctl >/dev/null 2>&1; then systemctl daemon-reload || : - if [ "$1" -eq 1 ]; then + if [ "${1:-0}" -eq 1 ]; then systemctl enable --now vctp.service || : else systemctl try-restart vctp.service || : diff --git a/src/vctp.default b/src/vctp.default index 111e69e..8d0c4bd 100644 --- a/src/vctp.default +++ b/src/vctp.default @@ -1,9 +1,13 @@ CPE_OPTS='-settings /etc/dtms/vctp.yml' -MONTHLY_AGG_GO=0 -DAILY_AGG_GO=0 -# Optional explicit encryption key source (recommended for stable credential decryption across host changes): -# VCTP_ENCRYPTION_KEY='' -# PostgreSQL is experimental and disabled by default: -# VCTP_ENABLE_EXPERIMENTAL_POSTGRES=0 -# Deprecated API endpoints are disabled by default: -# VCTP_ENABLE_LEGACY_API=0 + +# Aggregation engine selection (default: Go paths enabled). +# DAILY_AGG_GO=1: +# Use the Go fan-out/reduce daily aggregation path. +# MONTHLY_AGG_GO=1: +# Use the Go monthly aggregation path for both monthly modes +# (hourly or daily source tables, controlled by settings.monthly_aggregation_granularity). +# Set either option to 0 to prefer the SQL implementation for that layer. +# If a Go aggregation run fails, vCTP automatically falls back to SQL for that run. +DAILY_AGG_GO=1 +MONTHLY_AGG_GO=1 +# Additional runtime behavior is configured in the YAML file (`/etc/dtms/vctp.yml` by default). diff --git a/src/vctp.yml b/src/vctp.yml index dad4b68..6b7f27b 100644 --- a/src/vctp.yml +++ b/src/vctp.yml @@ -2,6 +2,8 @@ settings: log_level: "info" log_output: "text" database_driver: "sqlite" + # PostgreSQL remains experimental and is disabled by default. + enable_experimental_postgres: false database_url: "/var/lib/vctp/db.sqlite3" reports_dir: /var/lib/vctp/reports bind_ip: @@ -9,9 +11,14 @@ settings: bind_disable_tls: false tls_cert_filename: "/var/lib/vctp/vctp.crt" tls_key_filename: "/var/lib/vctp/vctp.key" + # Optional explicit key source for credential encryption/decryption. + # Leave empty to use host-derived key material. + encryption_key: "" vcenter_username: "" vcenter_password: "" vcenter_insecure: false + # Legacy API endpoints are disabled by default. + enable_legacy_api: false # Deprecated (ignored): legacy event poller vcenter_event_polling_seconds: 0 # Deprecated (ignored): legacy inventory poller @@ -21,6 +28,9 @@ settings: hourly_snapshot_concurrency: 0 hourly_snapshot_max_age_days: 60 daily_snapshot_max_age_months: 12 + # Retain hourly-table indexes only for recent data. + # -1 disables index cleanup; 0 trims indexes from all hourly tables. + hourly_index_max_age_days: 7 snapshot_cleanup_cron: "30 2 * * *" hourly_snapshot_retry_seconds: 300 hourly_snapshot_max_retries: 3