diff --git a/db/helpers.go b/db/helpers.go index e76f6e4..76599f7 100644 --- a/db/helpers.go +++ b/db/helpers.go @@ -277,6 +277,12 @@ func EnsureSnapshotTable(ctx context.Context, dbConn *sqlx.DB, tableName string) } _, err := dbConn.ExecContext(ctx, ddl) + if err != nil { + return err + } + + index := fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_vm_vcenter_idx ON %s ("VmId","Vcenter")`, tableName, tableName) + _, err = dbConn.ExecContext(ctx, index) return err } @@ -303,6 +309,67 @@ func BackfillSerialColumn(ctx context.Context, dbConn *sqlx.DB, tableName, colum return nil } +// ApplySQLiteTuning applies lightweight WAL/synchronous tweaks for better concurrency in non-prod contexts. +func ApplySQLiteTuning(ctx context.Context, dbConn *sqlx.DB) { + if strings.ToLower(dbConn.DriverName()) != "sqlite" { + return + } + // Best-effort pragmas; ignore errors to stay safe in constrained environments. + pragmas := []string{ + `PRAGMA journal_mode=WAL;`, + `PRAGMA synchronous=NORMAL;`, + `PRAGMA temp_store=MEMORY;`, + } + for _, pragma := range pragmas { + _, _ = dbConn.ExecContext(ctx, pragma) + } +} + +// CheckMigrationState ensures goose migrations are present and not dirty. +func CheckMigrationState(ctx context.Context, dbConn *sqlx.DB) error { + driver := strings.ToLower(dbConn.DriverName()) + var tableExists bool + switch driver { + case "sqlite": + err := dbConn.GetContext(ctx, &tableExists, ` +SELECT COUNT(1) > 0 FROM sqlite_master WHERE type='table' AND name='goose_db_version' +`) + if err != nil { + return err + } + case "pgx", "postgres": + err := dbConn.GetContext(ctx, &tableExists, ` +SELECT EXISTS ( + SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = 'goose_db_version' +) +`) + if err != nil { + return err + } + default: + return fmt.Errorf("unsupported driver for migration check: %s", driver) + } + + if !tableExists { + return fmt.Errorf("goose_db_version table not found; database migrations may not be applied") + } + + var dirty bool + err := dbConn.GetContext(ctx, &dirty, ` +SELECT NOT is_applied +FROM goose_db_version +ORDER BY id DESC +LIMIT 1 +`) + if err != nil { + return err + } + if dirty { + return fmt.Errorf("database migrations are in a dirty state; please resolve goose_db_version") + } + return nil +} + // BuildDailySummaryInsert returns the SQL to aggregate hourly snapshots into a daily summary table. func BuildDailySummaryInsert(tableName string, unionQuery string) (string, error) { if _, err := SafeTableName(tableName); err != nil { diff --git a/db/migrations/20250116102000_snapshot_registry_indexes.sql b/db/migrations/20250116102000_snapshot_registry_indexes.sql new file mode 100644 index 0000000..1f73fe0 --- /dev/null +++ b/db/migrations/20250116102000_snapshot_registry_indexes.sql @@ -0,0 +1,5 @@ +-- +goose Up +CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time); + +-- +goose Down +DROP INDEX IF EXISTS idx_snapshot_registry_type_time; diff --git a/db/migrations_postgres/20250116102000_snapshot_registry_indexes.sql b/db/migrations_postgres/20250116102000_snapshot_registry_indexes.sql new file mode 100644 index 0000000..1f73fe0 --- /dev/null +++ b/db/migrations_postgres/20250116102000_snapshot_registry_indexes.sql @@ -0,0 +1,5 @@ +-- +goose Up +CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time); + +-- +goose Down +DROP INDEX IF EXISTS idx_snapshot_registry_type_time; diff --git a/db/queries/models.go b/db/queries/models.go index d6904d2..d0f448a 100644 --- a/db/queries/models.go +++ b/db/queries/models.go @@ -69,10 +69,11 @@ type PragmaTableInfo struct { } type SnapshotRegistry struct { - ID int64 `db:"id" json:"id"` - SnapshotType string `db:"snapshot_type" json:"snapshot_type"` - TableName string `db:"table_name" json:"table_name"` - SnapshotTime int64 `db:"snapshot_time" json:"snapshot_time"` + ID int64 `db:"id" json:"id"` + SnapshotType string `db:"snapshot_type" json:"snapshot_type"` + TableName string `db:"table_name" json:"table_name"` + SnapshotTime int64 `db:"snapshot_time" json:"snapshot_time"` + SnapshotCount int64 `db:"snapshot_count" json:"snapshot_count"` } type SqliteMaster struct { diff --git a/db/schema.sql b/db/schema.sql index 8c76431..45a0c73 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -73,6 +73,7 @@ CREATE TABLE IF NOT EXISTS snapshot_registry ( "snapshot_time" INTEGER NOT NULL, "snapshot_count" BIGINT NOT NULL DEFAULT 0 ); +CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time); -- The following tables are declared for sqlc type-checking only. -- Do not apply this file as a migration. diff --git a/internal/settings/settings.go b/internal/settings/settings.go index b4ef49f..1e9ecf3 100644 --- a/internal/settings/settings.go +++ b/internal/settings/settings.go @@ -40,6 +40,11 @@ type SettingsYML struct { HourlySnapshotMaxAgeDays int `yaml:"hourly_snapshot_max_age_days"` DailySnapshotMaxAgeMonths int `yaml:"daily_snapshot_max_age_months"` SnapshotCleanupCron string `yaml:"snapshot_cleanup_cron"` + HourlyJobTimeoutSeconds int `yaml:"hourly_job_timeout_seconds"` + HourlySnapshotTimeoutSeconds int `yaml:"hourly_snapshot_timeout_seconds"` + DailyJobTimeoutSeconds int `yaml:"daily_job_timeout_seconds"` + MonthlyJobTimeoutSeconds int `yaml:"monthly_job_timeout_seconds"` + CleanupJobTimeoutSeconds int `yaml:"cleanup_job_timeout_seconds"` TenantsToFilter []string `yaml:"tenants_to_filter"` NodeChargeClusters []string `yaml:"node_charge_clusters"` SrmActiveActiveVms []string `yaml:"srm_activeactive_vms"` diff --git a/internal/tasks/cronstatus.go b/internal/tasks/cronstatus.go new file mode 100644 index 0000000..d9aa1a3 --- /dev/null +++ b/internal/tasks/cronstatus.go @@ -0,0 +1,152 @@ +package tasks + +import ( + "context" + "database/sql" + "time" + "vctp/db" + + "github.com/jmoiron/sqlx" +) + +// CronTracker manages re-entry protection and status recording for cron jobs. +type CronTracker struct { + db db.Database + bindType int +} + +func NewCronTracker(database db.Database) *CronTracker { + return &CronTracker{ + db: database, + bindType: sqlx.BindType(database.DB().DriverName()), + } +} + +func (c *CronTracker) ensureTable(ctx context.Context) error { + conn := c.db.DB() + driver := conn.DriverName() + var ddl string + switch driver { + case "pgx", "postgres": + ddl = ` +CREATE TABLE IF NOT EXISTS cron_status ( + job_name TEXT PRIMARY KEY, + started_at BIGINT NOT NULL, + ended_at BIGINT NOT NULL, + duration_ms BIGINT NOT NULL, + last_error TEXT, + in_progress BOOLEAN NOT NULL DEFAULT FALSE +);` + default: + ddl = ` +CREATE TABLE IF NOT EXISTS cron_status ( + job_name TEXT PRIMARY KEY, + started_at BIGINT NOT NULL, + ended_at BIGINT NOT NULL, + duration_ms BIGINT NOT NULL, + last_error TEXT, + in_progress BOOLEAN NOT NULL DEFAULT FALSE +);` + } + _, err := conn.ExecContext(ctx, ddl) + return err +} + +// Start marks a job as in-progress; returns a completion callback and whether to skip because it's already running. +func (c *CronTracker) Start(ctx context.Context, job string) (func(error), bool, error) { + if err := c.ensureTable(ctx); err != nil { + return nil, false, err + } + conn := c.db.DB() + now := time.Now().Unix() + + tx, err := conn.BeginTxx(ctx, nil) + if err != nil { + return nil, false, err + } + + var inProgress bool + query := sqlx.Rebind(c.bindType, `SELECT in_progress FROM cron_status WHERE job_name = ?`) + err = tx.QueryRowContext(ctx, query, job).Scan(&inProgress) + if err != nil { + // no row, insert + if err := upsertCron(tx, c.bindType, job, now, false); err != nil { + tx.Rollback() + return nil, false, err + } + } else { + if inProgress { + tx.Rollback() + return nil, true, nil + } + if err := markCronStart(tx, c.bindType, job, now); err != nil { + tx.Rollback() + return nil, false, err + } + } + + if err := tx.Commit(); err != nil { + return nil, false, err + } + + done := func(runErr error) { + _ = c.finish(context.Background(), job, now, runErr) + } + return done, false, nil +} + +func (c *CronTracker) finish(ctx context.Context, job string, startedAt int64, runErr error) error { + conn := c.db.DB() + duration := time.Since(time.Unix(startedAt, 0)).Milliseconds() + tx, err := conn.BeginTxx(ctx, nil) + if err != nil { + return err + } + var lastError sql.NullString + if runErr != nil { + lastError = sql.NullString{String: runErr.Error(), Valid: true} + } + err = upsertCronFinish(tx, c.bindType, job, startedAt, duration, lastError.String) + if err != nil { + tx.Rollback() + return err + } + return tx.Commit() +} + +func upsertCron(tx *sqlx.Tx, bindType int, job string, startedAt int64, inProgress bool) error { + query := ` +INSERT INTO cron_status (job_name, started_at, ended_at, duration_ms, last_error, in_progress) +VALUES (?, ?, 0, 0, NULL, ?) +ON CONFLICT (job_name) DO UPDATE SET started_at = excluded.started_at, in_progress = excluded.in_progress, ended_at = excluded.ended_at, duration_ms = excluded.duration_ms, last_error = excluded.last_error +` + _, err := tx.Exec(sqlx.Rebind(bindType, query), job, startedAt, inProgress) + return err +} + +func markCronStart(tx *sqlx.Tx, bindType int, job string, startedAt int64) error { + query := ` +UPDATE cron_status +SET started_at = ?, in_progress = TRUE, ended_at = 0, duration_ms = 0, last_error = NULL +WHERE job_name = ? +` + _, err := tx.Exec(sqlx.Rebind(bindType, query), startedAt, job) + return err +} + +func upsertCronFinish(tx *sqlx.Tx, bindType int, job string, startedAt int64, durationMS int64, lastErr string) error { + query := ` +UPDATE cron_status +SET ended_at = ?, duration_ms = ?, last_error = ?, in_progress = FALSE +WHERE job_name = ? +` + _, err := tx.Exec(sqlx.Rebind(bindType, query), time.Now().Unix(), durationMS, nullableString(lastErr), job) + return err +} + +func nullableString(s string) interface{} { + if s == "" { + return nil + } + return s +} diff --git a/internal/tasks/inventorySnapshots.go b/internal/tasks/inventorySnapshots.go index f81adeb..b079db2 100644 --- a/internal/tasks/inventorySnapshots.go +++ b/internal/tasks/inventorySnapshots.go @@ -13,6 +13,7 @@ import ( "vctp/db" "vctp/db/queries" "vctp/internal/report" + "vctp/internal/utils" "vctp/internal/vcenter" "github.com/jmoiron/sqlx" @@ -47,13 +48,37 @@ type inventorySnapshotRow struct { type snapshotTotals = db.SnapshotTotals // RunVcenterSnapshotHourly records hourly inventory snapshots into a daily table. -func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) error { +func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) (err error) { + jobCtx := ctx + jobTimeout := durationFromSeconds(c.Settings.Values.Settings.HourlyJobTimeoutSeconds, 20*time.Minute) + if jobTimeout > 0 { + var cancel context.CancelFunc + jobCtx, cancel = context.WithTimeout(ctx, jobTimeout) + defer cancel() + } startedAt := time.Now() defer func() { logger.Info("Hourly snapshot job finished", "duration", time.Since(startedAt)) }() + tracker := NewCronTracker(c.Database) + done, skip, err := tracker.Start(jobCtx, "hourly_snapshot") + if err != nil { + return err + } + if skip { + logger.Warn("Hourly snapshot skipped because a previous run is still active") + return nil + } + defer func() { done(err) }() + + ctx, cancel := context.WithCancel(jobCtx) + defer cancel() startTime := time.Now() + if err := db.CheckMigrationState(ctx, c.Database.DB()); err != nil { + return err + } + // reload settings in case vcenter list has changed c.Settings.ReadYMLSettings() @@ -83,6 +108,7 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo } dbConn := c.Database.DB() + db.ApplySQLiteTuning(ctx, dbConn) if err := ensureDailyInventoryTable(ctx, dbConn, tableName); err != nil { return err } @@ -90,6 +116,9 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo var wg sync.WaitGroup var errCount int64 concurrencyLimit := c.Settings.Values.Settings.HourlySnapshotConcurrency + if override, ok := utils.EnvInt("VCTP_HOURLY_SNAPSHOT_CONCURRENCY"); ok && override >= 0 { + concurrencyLimit = override + } var sem chan struct{} if concurrencyLimit > 0 { sem = make(chan struct{}, concurrencyLimit) @@ -99,23 +128,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo wg.Add(1) go func(url string) { defer wg.Done() + waitStarted := time.Now() vcStart := time.Now() if sem != nil { sem <- struct{}{} defer func() { <-sem }() } + waitDuration := time.Since(waitStarted) + + timeout := durationFromSeconds(c.Settings.Values.Settings.HourlySnapshotTimeoutSeconds, 10*time.Minute) + runCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + c.Logger.Info("Starting hourly snapshot for vcenter", "url", url) - if err := c.captureHourlySnapshotForVcenter(ctx, startTime, tableName, url); err != nil { + if err := c.captureHourlySnapshotForVcenter(runCtx, startTime, tableName, url); err != nil { atomic.AddInt64(&errCount, 1) c.Logger.Error("hourly snapshot failed", "error", err, "url", url) } else { - c.Logger.Info("Finished hourly snapshot for vcenter", "url", url, "duration", time.Since(vcStart)) + c.Logger.Info("Finished hourly snapshot for vcenter", + "url", url, + "queue_wait", waitDuration, + "duration", time.Since(vcStart), + "timeout", timeout, + ) } }(url) } wg.Wait() if errCount > 0 { - return fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount) + err = fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount) + return err } rowCount, err := db.TableRowCount(ctx, dbConn, tableName) @@ -131,13 +173,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo } // RunVcenterDailyAggregate summarizes hourly snapshots into a daily summary table. -func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) error { +func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) (err error) { + jobCtx := ctx + jobTimeout := durationFromSeconds(c.Settings.Values.Settings.DailyJobTimeoutSeconds, 15*time.Minute) + if jobTimeout > 0 { + var cancel context.CancelFunc + jobCtx, cancel = context.WithTimeout(ctx, jobTimeout) + defer cancel() + } + tracker := NewCronTracker(c.Database) + done, skip, err := tracker.Start(jobCtx, "daily_aggregate") + if err != nil { + return err + } + if skip { + logger.Warn("Daily aggregate skipped because a previous run is still active") + return nil + } + defer func() { done(err) }() + + if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil { + return err + } + startedAt := time.Now() defer func() { logger.Info("Daily summary job finished", "duration", time.Since(startedAt)) }() targetTime := time.Now().Add(-time.Minute) - return c.aggregateDailySummary(ctx, targetTime, false) + err = c.aggregateDailySummary(jobCtx, targetTime, false) + return err } func (c *CronTask) AggregateDailySummary(ctx context.Context, date time.Time, force bool) error { @@ -252,7 +317,29 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti } // RunVcenterMonthlyAggregate summarizes the previous month's daily snapshots. -func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) error { +func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) (err error) { + jobCtx := ctx + jobTimeout := durationFromSeconds(c.Settings.Values.Settings.MonthlyJobTimeoutSeconds, 20*time.Minute) + if jobTimeout > 0 { + var cancel context.CancelFunc + jobCtx, cancel = context.WithTimeout(ctx, jobTimeout) + defer cancel() + } + tracker := NewCronTracker(c.Database) + done, skip, err := tracker.Start(jobCtx, "monthly_aggregate") + if err != nil { + return err + } + if skip { + logger.Warn("Monthly aggregate skipped because a previous run is still active") + return nil + } + defer func() { done(err) }() + + if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil { + return err + } + startedAt := time.Now() defer func() { logger.Info("Monthly summary job finished", "duration", time.Since(startedAt)) @@ -260,7 +347,8 @@ func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog. now := time.Now() firstOfThisMonth := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, now.Location()) targetMonth := firstOfThisMonth.AddDate(0, -1, 0) - return c.aggregateMonthlySummary(ctx, targetMonth, false) + err = c.aggregateMonthlySummary(jobCtx, targetMonth, false) + return err } func (c *CronTask) AggregateMonthlySummary(ctx context.Context, month time.Time, force bool) error { @@ -348,7 +436,29 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time } // RunSnapshotCleanup drops hourly and daily snapshot tables older than retention. -func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) error { +func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) (err error) { + jobCtx := ctx + jobTimeout := durationFromSeconds(c.Settings.Values.Settings.CleanupJobTimeoutSeconds, 10*time.Minute) + if jobTimeout > 0 { + var cancel context.CancelFunc + jobCtx, cancel = context.WithTimeout(ctx, jobTimeout) + defer cancel() + } + tracker := NewCronTracker(c.Database) + done, skip, err := tracker.Start(jobCtx, "snapshot_cleanup") + if err != nil { + return err + } + if skip { + logger.Warn("Snapshot cleanup skipped because a previous run is still active") + return nil + } + defer func() { done(err) }() + + if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil { + return err + } + startedAt := time.Now() defer func() { logger.Info("Snapshot cleanup job finished", "duration", time.Since(startedAt)) @@ -582,6 +692,13 @@ func intWithDefault(value int, fallback int) int { return value } +func durationFromSeconds(seconds int, fallback time.Duration) time.Duration { + if seconds > 0 { + return time.Duration(seconds) * time.Second + } + return fallback +} + func normalizeResourcePool(value string) string { trimmed := strings.TrimSpace(value) if trimmed == "" { @@ -800,6 +917,58 @@ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); return err } +func insertHourlyBatch(ctx context.Context, dbConn *sqlx.DB, tableName string, rows []inventorySnapshotRow) error { + if len(rows) == 0 { + return nil + } + tx, err := dbConn.BeginTxx(ctx, nil) + if err != nil { + return err + } + stmt, err := tx.PreparexContext(ctx, sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), fmt.Sprintf(` +INSERT INTO %s ( + "InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId", "CreationTime", "DeletionTime", + "ResourcePool", "Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount", + "RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid", "SnapshotTime", "IsPresent" +) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) +`, tableName))) + if err != nil { + tx.Rollback() + return err + } + defer stmt.Close() + + for _, row := range rows { + if _, err := stmt.ExecContext(ctx, + row.InventoryId, + row.Name, + row.Vcenter, + row.VmId, + row.EventKey, + row.CloudId, + row.CreationTime, + row.DeletionTime, + row.ResourcePool, + row.Datacenter, + row.Cluster, + row.Folder, + row.ProvisionedDisk, + row.VcpuCount, + row.RamGB, + row.IsTemplate, + row.PoweredOn, + row.SrmPlaceholder, + row.VmUuid, + row.SnapshotTime, + row.IsPresent, + ); err != nil { + tx.Rollback() + return err + } + } + return tx.Commit() +} + func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTime time.Time, tableName string, url string) error { c.Logger.Debug("connecting to vcenter for hourly snapshot", "url", url) vc := vcenter.New(c.Logger, c.VcCreds) @@ -886,10 +1055,9 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim totals.DiskTotal += nullFloat64ToFloat(row.ProvisionedDisk) } + batch := make([]inventorySnapshotRow, 0, len(presentSnapshots)+len(inventoryRows)) for _, row := range presentSnapshots { - if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil { - c.Logger.Error("failed to insert hourly snapshot", "error", err, "vm_id", row.VmId.String) - } + batch = append(batch, row) } if !canDetectMissing { @@ -927,9 +1095,11 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim c.Logger.Warn("failed to mark inventory record deleted", "error", err, "vm_id", row.VmId.String) } } - if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil { - c.Logger.Error("failed to insert missing VM snapshot", "error", err, "vm_id", row.VmId.String) - } + batch = append(batch, row) + } + + if err := insertHourlyBatch(ctx, dbConn, tableName, batch); err != nil { + return err } c.Logger.Info("Hourly snapshot summary", diff --git a/internal/utils/utils.go b/internal/utils/utils.go index c93839d..900045b 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -7,6 +7,7 @@ import ( "net" "os" "path/filepath" + "strconv" "time" ) @@ -70,3 +71,29 @@ func SleepWithContext(ctx context.Context, d time.Duration) { case <-timer.C: } } + +// EnvInt parses an environment variable into an int; returns (value, true) when set and valid. +func EnvInt(key string) (int, bool) { + val := os.Getenv(key) + if val == "" { + return 0, false + } + parsed, err := strconv.Atoi(val) + if err != nil { + return 0, false + } + return parsed, true +} + +// DurationFromEnv parses an environment variable representing seconds into a duration, defaulting when unset/invalid. +func DurationFromEnv(key string, fallback time.Duration) time.Duration { + val := os.Getenv(key) + if val == "" { + return fallback + } + seconds, err := strconv.ParseInt(val, 10, 64) + if err != nil || seconds <= 0 { + return fallback + } + return time.Duration(seconds) * time.Second +} diff --git a/src/vctp.yml b/src/vctp.yml index 0d47021..82bbc12 100644 --- a/src/vctp.yml +++ b/src/vctp.yml @@ -19,6 +19,11 @@ settings: hourly_snapshot_max_age_days: 60 daily_snapshot_max_age_months: 12 snapshot_cleanup_cron: "30 2 * * *" + hourly_job_timeout_seconds: 1200 + hourly_snapshot_timeout_seconds: 600 + daily_job_timeout_seconds: 900 + monthly_job_timeout_seconds: 1200 + cleanup_job_timeout_seconds: 600 tenants_to_filter: node_charge_clusters: srm_activeactive_vms: