This commit is contained in:
@@ -277,6 +277,12 @@ func EnsureSnapshotTable(ctx context.Context, dbConn *sqlx.DB, tableName string)
|
||||
}
|
||||
|
||||
_, err := dbConn.ExecContext(ctx, ddl)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
index := fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_vm_vcenter_idx ON %s ("VmId","Vcenter")`, tableName, tableName)
|
||||
_, err = dbConn.ExecContext(ctx, index)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -303,6 +309,67 @@ func BackfillSerialColumn(ctx context.Context, dbConn *sqlx.DB, tableName, colum
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplySQLiteTuning applies lightweight WAL/synchronous tweaks for better concurrency in non-prod contexts.
|
||||
func ApplySQLiteTuning(ctx context.Context, dbConn *sqlx.DB) {
|
||||
if strings.ToLower(dbConn.DriverName()) != "sqlite" {
|
||||
return
|
||||
}
|
||||
// Best-effort pragmas; ignore errors to stay safe in constrained environments.
|
||||
pragmas := []string{
|
||||
`PRAGMA journal_mode=WAL;`,
|
||||
`PRAGMA synchronous=NORMAL;`,
|
||||
`PRAGMA temp_store=MEMORY;`,
|
||||
}
|
||||
for _, pragma := range pragmas {
|
||||
_, _ = dbConn.ExecContext(ctx, pragma)
|
||||
}
|
||||
}
|
||||
|
||||
// CheckMigrationState ensures goose migrations are present and not dirty.
|
||||
func CheckMigrationState(ctx context.Context, dbConn *sqlx.DB) error {
|
||||
driver := strings.ToLower(dbConn.DriverName())
|
||||
var tableExists bool
|
||||
switch driver {
|
||||
case "sqlite":
|
||||
err := dbConn.GetContext(ctx, &tableExists, `
|
||||
SELECT COUNT(1) > 0 FROM sqlite_master WHERE type='table' AND name='goose_db_version'
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "pgx", "postgres":
|
||||
err := dbConn.GetContext(ctx, &tableExists, `
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = 'goose_db_version'
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported driver for migration check: %s", driver)
|
||||
}
|
||||
|
||||
if !tableExists {
|
||||
return fmt.Errorf("goose_db_version table not found; database migrations may not be applied")
|
||||
}
|
||||
|
||||
var dirty bool
|
||||
err := dbConn.GetContext(ctx, &dirty, `
|
||||
SELECT NOT is_applied
|
||||
FROM goose_db_version
|
||||
ORDER BY id DESC
|
||||
LIMIT 1
|
||||
`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if dirty {
|
||||
return fmt.Errorf("database migrations are in a dirty state; please resolve goose_db_version")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// BuildDailySummaryInsert returns the SQL to aggregate hourly snapshots into a daily summary table.
|
||||
func BuildDailySummaryInsert(tableName string, unionQuery string) (string, error) {
|
||||
if _, err := SafeTableName(tableName); err != nil {
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
-- +goose Up
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||
|
||||
-- +goose Down
|
||||
DROP INDEX IF EXISTS idx_snapshot_registry_type_time;
|
||||
@@ -0,0 +1,5 @@
|
||||
-- +goose Up
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||
|
||||
-- +goose Down
|
||||
DROP INDEX IF EXISTS idx_snapshot_registry_type_time;
|
||||
@@ -73,6 +73,7 @@ type SnapshotRegistry struct {
|
||||
SnapshotType string `db:"snapshot_type" json:"snapshot_type"`
|
||||
TableName string `db:"table_name" json:"table_name"`
|
||||
SnapshotTime int64 `db:"snapshot_time" json:"snapshot_time"`
|
||||
SnapshotCount int64 `db:"snapshot_count" json:"snapshot_count"`
|
||||
}
|
||||
|
||||
type SqliteMaster struct {
|
||||
|
||||
@@ -73,6 +73,7 @@ CREATE TABLE IF NOT EXISTS snapshot_registry (
|
||||
"snapshot_time" INTEGER NOT NULL,
|
||||
"snapshot_count" BIGINT NOT NULL DEFAULT 0
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||
|
||||
-- The following tables are declared for sqlc type-checking only.
|
||||
-- Do not apply this file as a migration.
|
||||
|
||||
@@ -40,6 +40,11 @@ type SettingsYML struct {
|
||||
HourlySnapshotMaxAgeDays int `yaml:"hourly_snapshot_max_age_days"`
|
||||
DailySnapshotMaxAgeMonths int `yaml:"daily_snapshot_max_age_months"`
|
||||
SnapshotCleanupCron string `yaml:"snapshot_cleanup_cron"`
|
||||
HourlyJobTimeoutSeconds int `yaml:"hourly_job_timeout_seconds"`
|
||||
HourlySnapshotTimeoutSeconds int `yaml:"hourly_snapshot_timeout_seconds"`
|
||||
DailyJobTimeoutSeconds int `yaml:"daily_job_timeout_seconds"`
|
||||
MonthlyJobTimeoutSeconds int `yaml:"monthly_job_timeout_seconds"`
|
||||
CleanupJobTimeoutSeconds int `yaml:"cleanup_job_timeout_seconds"`
|
||||
TenantsToFilter []string `yaml:"tenants_to_filter"`
|
||||
NodeChargeClusters []string `yaml:"node_charge_clusters"`
|
||||
SrmActiveActiveVms []string `yaml:"srm_activeactive_vms"`
|
||||
|
||||
152
internal/tasks/cronstatus.go
Normal file
152
internal/tasks/cronstatus.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package tasks
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"time"
|
||||
"vctp/db"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
// CronTracker manages re-entry protection and status recording for cron jobs.
|
||||
type CronTracker struct {
|
||||
db db.Database
|
||||
bindType int
|
||||
}
|
||||
|
||||
func NewCronTracker(database db.Database) *CronTracker {
|
||||
return &CronTracker{
|
||||
db: database,
|
||||
bindType: sqlx.BindType(database.DB().DriverName()),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CronTracker) ensureTable(ctx context.Context) error {
|
||||
conn := c.db.DB()
|
||||
driver := conn.DriverName()
|
||||
var ddl string
|
||||
switch driver {
|
||||
case "pgx", "postgres":
|
||||
ddl = `
|
||||
CREATE TABLE IF NOT EXISTS cron_status (
|
||||
job_name TEXT PRIMARY KEY,
|
||||
started_at BIGINT NOT NULL,
|
||||
ended_at BIGINT NOT NULL,
|
||||
duration_ms BIGINT NOT NULL,
|
||||
last_error TEXT,
|
||||
in_progress BOOLEAN NOT NULL DEFAULT FALSE
|
||||
);`
|
||||
default:
|
||||
ddl = `
|
||||
CREATE TABLE IF NOT EXISTS cron_status (
|
||||
job_name TEXT PRIMARY KEY,
|
||||
started_at BIGINT NOT NULL,
|
||||
ended_at BIGINT NOT NULL,
|
||||
duration_ms BIGINT NOT NULL,
|
||||
last_error TEXT,
|
||||
in_progress BOOLEAN NOT NULL DEFAULT FALSE
|
||||
);`
|
||||
}
|
||||
_, err := conn.ExecContext(ctx, ddl)
|
||||
return err
|
||||
}
|
||||
|
||||
// Start marks a job as in-progress; returns a completion callback and whether to skip because it's already running.
|
||||
func (c *CronTracker) Start(ctx context.Context, job string) (func(error), bool, error) {
|
||||
if err := c.ensureTable(ctx); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
conn := c.db.DB()
|
||||
now := time.Now().Unix()
|
||||
|
||||
tx, err := conn.BeginTxx(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
var inProgress bool
|
||||
query := sqlx.Rebind(c.bindType, `SELECT in_progress FROM cron_status WHERE job_name = ?`)
|
||||
err = tx.QueryRowContext(ctx, query, job).Scan(&inProgress)
|
||||
if err != nil {
|
||||
// no row, insert
|
||||
if err := upsertCron(tx, c.bindType, job, now, false); err != nil {
|
||||
tx.Rollback()
|
||||
return nil, false, err
|
||||
}
|
||||
} else {
|
||||
if inProgress {
|
||||
tx.Rollback()
|
||||
return nil, true, nil
|
||||
}
|
||||
if err := markCronStart(tx, c.bindType, job, now); err != nil {
|
||||
tx.Rollback()
|
||||
return nil, false, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
done := func(runErr error) {
|
||||
_ = c.finish(context.Background(), job, now, runErr)
|
||||
}
|
||||
return done, false, nil
|
||||
}
|
||||
|
||||
func (c *CronTracker) finish(ctx context.Context, job string, startedAt int64, runErr error) error {
|
||||
conn := c.db.DB()
|
||||
duration := time.Since(time.Unix(startedAt, 0)).Milliseconds()
|
||||
tx, err := conn.BeginTxx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var lastError sql.NullString
|
||||
if runErr != nil {
|
||||
lastError = sql.NullString{String: runErr.Error(), Valid: true}
|
||||
}
|
||||
err = upsertCronFinish(tx, c.bindType, job, startedAt, duration, lastError.String)
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func upsertCron(tx *sqlx.Tx, bindType int, job string, startedAt int64, inProgress bool) error {
|
||||
query := `
|
||||
INSERT INTO cron_status (job_name, started_at, ended_at, duration_ms, last_error, in_progress)
|
||||
VALUES (?, ?, 0, 0, NULL, ?)
|
||||
ON CONFLICT (job_name) DO UPDATE SET started_at = excluded.started_at, in_progress = excluded.in_progress, ended_at = excluded.ended_at, duration_ms = excluded.duration_ms, last_error = excluded.last_error
|
||||
`
|
||||
_, err := tx.Exec(sqlx.Rebind(bindType, query), job, startedAt, inProgress)
|
||||
return err
|
||||
}
|
||||
|
||||
func markCronStart(tx *sqlx.Tx, bindType int, job string, startedAt int64) error {
|
||||
query := `
|
||||
UPDATE cron_status
|
||||
SET started_at = ?, in_progress = TRUE, ended_at = 0, duration_ms = 0, last_error = NULL
|
||||
WHERE job_name = ?
|
||||
`
|
||||
_, err := tx.Exec(sqlx.Rebind(bindType, query), startedAt, job)
|
||||
return err
|
||||
}
|
||||
|
||||
func upsertCronFinish(tx *sqlx.Tx, bindType int, job string, startedAt int64, durationMS int64, lastErr string) error {
|
||||
query := `
|
||||
UPDATE cron_status
|
||||
SET ended_at = ?, duration_ms = ?, last_error = ?, in_progress = FALSE
|
||||
WHERE job_name = ?
|
||||
`
|
||||
_, err := tx.Exec(sqlx.Rebind(bindType, query), time.Now().Unix(), durationMS, nullableString(lastErr), job)
|
||||
return err
|
||||
}
|
||||
|
||||
func nullableString(s string) interface{} {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
return s
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"vctp/db"
|
||||
"vctp/db/queries"
|
||||
"vctp/internal/report"
|
||||
"vctp/internal/utils"
|
||||
"vctp/internal/vcenter"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
@@ -47,13 +48,37 @@ type inventorySnapshotRow struct {
|
||||
type snapshotTotals = db.SnapshotTotals
|
||||
|
||||
// RunVcenterSnapshotHourly records hourly inventory snapshots into a daily table.
|
||||
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.HourlyJobTimeoutSeconds, 20*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Hourly snapshot job finished", "duration", time.Since(startedAt))
|
||||
}()
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "hourly_snapshot")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Hourly snapshot skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
ctx, cancel := context.WithCancel(jobCtx)
|
||||
defer cancel()
|
||||
startTime := time.Now()
|
||||
|
||||
if err := db.CheckMigrationState(ctx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// reload settings in case vcenter list has changed
|
||||
c.Settings.ReadYMLSettings()
|
||||
|
||||
@@ -83,6 +108,7 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
}
|
||||
|
||||
dbConn := c.Database.DB()
|
||||
db.ApplySQLiteTuning(ctx, dbConn)
|
||||
if err := ensureDailyInventoryTable(ctx, dbConn, tableName); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -90,6 +116,9 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
var wg sync.WaitGroup
|
||||
var errCount int64
|
||||
concurrencyLimit := c.Settings.Values.Settings.HourlySnapshotConcurrency
|
||||
if override, ok := utils.EnvInt("VCTP_HOURLY_SNAPSHOT_CONCURRENCY"); ok && override >= 0 {
|
||||
concurrencyLimit = override
|
||||
}
|
||||
var sem chan struct{}
|
||||
if concurrencyLimit > 0 {
|
||||
sem = make(chan struct{}, concurrencyLimit)
|
||||
@@ -99,23 +128,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
wg.Add(1)
|
||||
go func(url string) {
|
||||
defer wg.Done()
|
||||
waitStarted := time.Now()
|
||||
vcStart := time.Now()
|
||||
if sem != nil {
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
}
|
||||
waitDuration := time.Since(waitStarted)
|
||||
|
||||
timeout := durationFromSeconds(c.Settings.Values.Settings.HourlySnapshotTimeoutSeconds, 10*time.Minute)
|
||||
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
c.Logger.Info("Starting hourly snapshot for vcenter", "url", url)
|
||||
if err := c.captureHourlySnapshotForVcenter(ctx, startTime, tableName, url); err != nil {
|
||||
if err := c.captureHourlySnapshotForVcenter(runCtx, startTime, tableName, url); err != nil {
|
||||
atomic.AddInt64(&errCount, 1)
|
||||
c.Logger.Error("hourly snapshot failed", "error", err, "url", url)
|
||||
} else {
|
||||
c.Logger.Info("Finished hourly snapshot for vcenter", "url", url, "duration", time.Since(vcStart))
|
||||
c.Logger.Info("Finished hourly snapshot for vcenter",
|
||||
"url", url,
|
||||
"queue_wait", waitDuration,
|
||||
"duration", time.Since(vcStart),
|
||||
"timeout", timeout,
|
||||
)
|
||||
}
|
||||
}(url)
|
||||
}
|
||||
wg.Wait()
|
||||
if errCount > 0 {
|
||||
return fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
||||
err = fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
||||
return err
|
||||
}
|
||||
|
||||
rowCount, err := db.TableRowCount(ctx, dbConn, tableName)
|
||||
@@ -131,13 +173,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
}
|
||||
|
||||
// RunVcenterDailyAggregate summarizes hourly snapshots into a daily summary table.
|
||||
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.DailyJobTimeoutSeconds, 15*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "daily_aggregate")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Daily aggregate skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Daily summary job finished", "duration", time.Since(startedAt))
|
||||
}()
|
||||
targetTime := time.Now().Add(-time.Minute)
|
||||
return c.aggregateDailySummary(ctx, targetTime, false)
|
||||
err = c.aggregateDailySummary(jobCtx, targetTime, false)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *CronTask) AggregateDailySummary(ctx context.Context, date time.Time, force bool) error {
|
||||
@@ -252,7 +317,29 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti
|
||||
}
|
||||
|
||||
// RunVcenterMonthlyAggregate summarizes the previous month's daily snapshots.
|
||||
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.MonthlyJobTimeoutSeconds, 20*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "monthly_aggregate")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Monthly aggregate skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Monthly summary job finished", "duration", time.Since(startedAt))
|
||||
@@ -260,7 +347,8 @@ func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.
|
||||
now := time.Now()
|
||||
firstOfThisMonth := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, now.Location())
|
||||
targetMonth := firstOfThisMonth.AddDate(0, -1, 0)
|
||||
return c.aggregateMonthlySummary(ctx, targetMonth, false)
|
||||
err = c.aggregateMonthlySummary(jobCtx, targetMonth, false)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *CronTask) AggregateMonthlySummary(ctx context.Context, month time.Time, force bool) error {
|
||||
@@ -348,7 +436,29 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
||||
}
|
||||
|
||||
// RunSnapshotCleanup drops hourly and daily snapshot tables older than retention.
|
||||
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.CleanupJobTimeoutSeconds, 10*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "snapshot_cleanup")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Snapshot cleanup skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Snapshot cleanup job finished", "duration", time.Since(startedAt))
|
||||
@@ -582,6 +692,13 @@ func intWithDefault(value int, fallback int) int {
|
||||
return value
|
||||
}
|
||||
|
||||
func durationFromSeconds(seconds int, fallback time.Duration) time.Duration {
|
||||
if seconds > 0 {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func normalizeResourcePool(value string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
@@ -800,6 +917,58 @@ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
|
||||
return err
|
||||
}
|
||||
|
||||
func insertHourlyBatch(ctx context.Context, dbConn *sqlx.DB, tableName string, rows []inventorySnapshotRow) error {
|
||||
if len(rows) == 0 {
|
||||
return nil
|
||||
}
|
||||
tx, err := dbConn.BeginTxx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stmt, err := tx.PreparexContext(ctx, sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), fmt.Sprintf(`
|
||||
INSERT INTO %s (
|
||||
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId", "CreationTime", "DeletionTime",
|
||||
"ResourcePool", "Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount",
|
||||
"RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid", "SnapshotTime", "IsPresent"
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, tableName)))
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, row := range rows {
|
||||
if _, err := stmt.ExecContext(ctx,
|
||||
row.InventoryId,
|
||||
row.Name,
|
||||
row.Vcenter,
|
||||
row.VmId,
|
||||
row.EventKey,
|
||||
row.CloudId,
|
||||
row.CreationTime,
|
||||
row.DeletionTime,
|
||||
row.ResourcePool,
|
||||
row.Datacenter,
|
||||
row.Cluster,
|
||||
row.Folder,
|
||||
row.ProvisionedDisk,
|
||||
row.VcpuCount,
|
||||
row.RamGB,
|
||||
row.IsTemplate,
|
||||
row.PoweredOn,
|
||||
row.SrmPlaceholder,
|
||||
row.VmUuid,
|
||||
row.SnapshotTime,
|
||||
row.IsPresent,
|
||||
); err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTime time.Time, tableName string, url string) error {
|
||||
c.Logger.Debug("connecting to vcenter for hourly snapshot", "url", url)
|
||||
vc := vcenter.New(c.Logger, c.VcCreds)
|
||||
@@ -886,10 +1055,9 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
||||
totals.DiskTotal += nullFloat64ToFloat(row.ProvisionedDisk)
|
||||
}
|
||||
|
||||
batch := make([]inventorySnapshotRow, 0, len(presentSnapshots)+len(inventoryRows))
|
||||
for _, row := range presentSnapshots {
|
||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
||||
c.Logger.Error("failed to insert hourly snapshot", "error", err, "vm_id", row.VmId.String)
|
||||
}
|
||||
batch = append(batch, row)
|
||||
}
|
||||
|
||||
if !canDetectMissing {
|
||||
@@ -927,9 +1095,11 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
||||
c.Logger.Warn("failed to mark inventory record deleted", "error", err, "vm_id", row.VmId.String)
|
||||
}
|
||||
}
|
||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
||||
c.Logger.Error("failed to insert missing VM snapshot", "error", err, "vm_id", row.VmId.String)
|
||||
batch = append(batch, row)
|
||||
}
|
||||
|
||||
if err := insertHourlyBatch(ctx, dbConn, tableName, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Logger.Info("Hourly snapshot summary",
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -70,3 +71,29 @@ func SleepWithContext(ctx context.Context, d time.Duration) {
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
|
||||
// EnvInt parses an environment variable into an int; returns (value, true) when set and valid.
|
||||
func EnvInt(key string) (int, bool) {
|
||||
val := os.Getenv(key)
|
||||
if val == "" {
|
||||
return 0, false
|
||||
}
|
||||
parsed, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return parsed, true
|
||||
}
|
||||
|
||||
// DurationFromEnv parses an environment variable representing seconds into a duration, defaulting when unset/invalid.
|
||||
func DurationFromEnv(key string, fallback time.Duration) time.Duration {
|
||||
val := os.Getenv(key)
|
||||
if val == "" {
|
||||
return fallback
|
||||
}
|
||||
seconds, err := strconv.ParseInt(val, 10, 64)
|
||||
if err != nil || seconds <= 0 {
|
||||
return fallback
|
||||
}
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
@@ -19,6 +19,11 @@ settings:
|
||||
hourly_snapshot_max_age_days: 60
|
||||
daily_snapshot_max_age_months: 12
|
||||
snapshot_cleanup_cron: "30 2 * * *"
|
||||
hourly_job_timeout_seconds: 1200
|
||||
hourly_snapshot_timeout_seconds: 600
|
||||
daily_job_timeout_seconds: 900
|
||||
monthly_job_timeout_seconds: 1200
|
||||
cleanup_job_timeout_seconds: 600
|
||||
tenants_to_filter:
|
||||
node_charge_clusters:
|
||||
srm_activeactive_vms:
|
||||
|
||||
Reference in New Issue
Block a user