This commit is contained in:
@@ -277,6 +277,12 @@ func EnsureSnapshotTable(ctx context.Context, dbConn *sqlx.DB, tableName string)
|
|||||||
}
|
}
|
||||||
|
|
||||||
_, err := dbConn.ExecContext(ctx, ddl)
|
_, err := dbConn.ExecContext(ctx, ddl)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
index := fmt.Sprintf(`CREATE INDEX IF NOT EXISTS %s_vm_vcenter_idx ON %s ("VmId","Vcenter")`, tableName, tableName)
|
||||||
|
_, err = dbConn.ExecContext(ctx, index)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -303,6 +309,67 @@ func BackfillSerialColumn(ctx context.Context, dbConn *sqlx.DB, tableName, colum
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ApplySQLiteTuning applies lightweight WAL/synchronous tweaks for better concurrency in non-prod contexts.
|
||||||
|
func ApplySQLiteTuning(ctx context.Context, dbConn *sqlx.DB) {
|
||||||
|
if strings.ToLower(dbConn.DriverName()) != "sqlite" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Best-effort pragmas; ignore errors to stay safe in constrained environments.
|
||||||
|
pragmas := []string{
|
||||||
|
`PRAGMA journal_mode=WAL;`,
|
||||||
|
`PRAGMA synchronous=NORMAL;`,
|
||||||
|
`PRAGMA temp_store=MEMORY;`,
|
||||||
|
}
|
||||||
|
for _, pragma := range pragmas {
|
||||||
|
_, _ = dbConn.ExecContext(ctx, pragma)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckMigrationState ensures goose migrations are present and not dirty.
|
||||||
|
func CheckMigrationState(ctx context.Context, dbConn *sqlx.DB) error {
|
||||||
|
driver := strings.ToLower(dbConn.DriverName())
|
||||||
|
var tableExists bool
|
||||||
|
switch driver {
|
||||||
|
case "sqlite":
|
||||||
|
err := dbConn.GetContext(ctx, &tableExists, `
|
||||||
|
SELECT COUNT(1) > 0 FROM sqlite_master WHERE type='table' AND name='goose_db_version'
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
case "pgx", "postgres":
|
||||||
|
err := dbConn.GetContext(ctx, &tableExists, `
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT 1 FROM pg_tables WHERE schemaname = 'public' AND tablename = 'goose_db_version'
|
||||||
|
)
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("unsupported driver for migration check: %s", driver)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !tableExists {
|
||||||
|
return fmt.Errorf("goose_db_version table not found; database migrations may not be applied")
|
||||||
|
}
|
||||||
|
|
||||||
|
var dirty bool
|
||||||
|
err := dbConn.GetContext(ctx, &dirty, `
|
||||||
|
SELECT NOT is_applied
|
||||||
|
FROM goose_db_version
|
||||||
|
ORDER BY id DESC
|
||||||
|
LIMIT 1
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if dirty {
|
||||||
|
return fmt.Errorf("database migrations are in a dirty state; please resolve goose_db_version")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// BuildDailySummaryInsert returns the SQL to aggregate hourly snapshots into a daily summary table.
|
// BuildDailySummaryInsert returns the SQL to aggregate hourly snapshots into a daily summary table.
|
||||||
func BuildDailySummaryInsert(tableName string, unionQuery string) (string, error) {
|
func BuildDailySummaryInsert(tableName string, unionQuery string) (string, error) {
|
||||||
if _, err := SafeTableName(tableName); err != nil {
|
if _, err := SafeTableName(tableName); err != nil {
|
||||||
|
|||||||
@@ -0,0 +1,5 @@
|
|||||||
|
-- +goose Up
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||||
|
|
||||||
|
-- +goose Down
|
||||||
|
DROP INDEX IF EXISTS idx_snapshot_registry_type_time;
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
-- +goose Up
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||||
|
|
||||||
|
-- +goose Down
|
||||||
|
DROP INDEX IF EXISTS idx_snapshot_registry_type_time;
|
||||||
@@ -73,6 +73,7 @@ type SnapshotRegistry struct {
|
|||||||
SnapshotType string `db:"snapshot_type" json:"snapshot_type"`
|
SnapshotType string `db:"snapshot_type" json:"snapshot_type"`
|
||||||
TableName string `db:"table_name" json:"table_name"`
|
TableName string `db:"table_name" json:"table_name"`
|
||||||
SnapshotTime int64 `db:"snapshot_time" json:"snapshot_time"`
|
SnapshotTime int64 `db:"snapshot_time" json:"snapshot_time"`
|
||||||
|
SnapshotCount int64 `db:"snapshot_count" json:"snapshot_count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SqliteMaster struct {
|
type SqliteMaster struct {
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ CREATE TABLE IF NOT EXISTS snapshot_registry (
|
|||||||
"snapshot_time" INTEGER NOT NULL,
|
"snapshot_time" INTEGER NOT NULL,
|
||||||
"snapshot_count" BIGINT NOT NULL DEFAULT 0
|
"snapshot_count" BIGINT NOT NULL DEFAULT 0
|
||||||
);
|
);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshot_registry_type_time ON snapshot_registry (snapshot_type, snapshot_time);
|
||||||
|
|
||||||
-- The following tables are declared for sqlc type-checking only.
|
-- The following tables are declared for sqlc type-checking only.
|
||||||
-- Do not apply this file as a migration.
|
-- Do not apply this file as a migration.
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ type SettingsYML struct {
|
|||||||
HourlySnapshotMaxAgeDays int `yaml:"hourly_snapshot_max_age_days"`
|
HourlySnapshotMaxAgeDays int `yaml:"hourly_snapshot_max_age_days"`
|
||||||
DailySnapshotMaxAgeMonths int `yaml:"daily_snapshot_max_age_months"`
|
DailySnapshotMaxAgeMonths int `yaml:"daily_snapshot_max_age_months"`
|
||||||
SnapshotCleanupCron string `yaml:"snapshot_cleanup_cron"`
|
SnapshotCleanupCron string `yaml:"snapshot_cleanup_cron"`
|
||||||
|
HourlyJobTimeoutSeconds int `yaml:"hourly_job_timeout_seconds"`
|
||||||
|
HourlySnapshotTimeoutSeconds int `yaml:"hourly_snapshot_timeout_seconds"`
|
||||||
|
DailyJobTimeoutSeconds int `yaml:"daily_job_timeout_seconds"`
|
||||||
|
MonthlyJobTimeoutSeconds int `yaml:"monthly_job_timeout_seconds"`
|
||||||
|
CleanupJobTimeoutSeconds int `yaml:"cleanup_job_timeout_seconds"`
|
||||||
TenantsToFilter []string `yaml:"tenants_to_filter"`
|
TenantsToFilter []string `yaml:"tenants_to_filter"`
|
||||||
NodeChargeClusters []string `yaml:"node_charge_clusters"`
|
NodeChargeClusters []string `yaml:"node_charge_clusters"`
|
||||||
SrmActiveActiveVms []string `yaml:"srm_activeactive_vms"`
|
SrmActiveActiveVms []string `yaml:"srm_activeactive_vms"`
|
||||||
|
|||||||
152
internal/tasks/cronstatus.go
Normal file
152
internal/tasks/cronstatus.go
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
package tasks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"time"
|
||||||
|
"vctp/db"
|
||||||
|
|
||||||
|
"github.com/jmoiron/sqlx"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CronTracker manages re-entry protection and status recording for cron jobs.
|
||||||
|
type CronTracker struct {
|
||||||
|
db db.Database
|
||||||
|
bindType int
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewCronTracker(database db.Database) *CronTracker {
|
||||||
|
return &CronTracker{
|
||||||
|
db: database,
|
||||||
|
bindType: sqlx.BindType(database.DB().DriverName()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *CronTracker) ensureTable(ctx context.Context) error {
|
||||||
|
conn := c.db.DB()
|
||||||
|
driver := conn.DriverName()
|
||||||
|
var ddl string
|
||||||
|
switch driver {
|
||||||
|
case "pgx", "postgres":
|
||||||
|
ddl = `
|
||||||
|
CREATE TABLE IF NOT EXISTS cron_status (
|
||||||
|
job_name TEXT PRIMARY KEY,
|
||||||
|
started_at BIGINT NOT NULL,
|
||||||
|
ended_at BIGINT NOT NULL,
|
||||||
|
duration_ms BIGINT NOT NULL,
|
||||||
|
last_error TEXT,
|
||||||
|
in_progress BOOLEAN NOT NULL DEFAULT FALSE
|
||||||
|
);`
|
||||||
|
default:
|
||||||
|
ddl = `
|
||||||
|
CREATE TABLE IF NOT EXISTS cron_status (
|
||||||
|
job_name TEXT PRIMARY KEY,
|
||||||
|
started_at BIGINT NOT NULL,
|
||||||
|
ended_at BIGINT NOT NULL,
|
||||||
|
duration_ms BIGINT NOT NULL,
|
||||||
|
last_error TEXT,
|
||||||
|
in_progress BOOLEAN NOT NULL DEFAULT FALSE
|
||||||
|
);`
|
||||||
|
}
|
||||||
|
_, err := conn.ExecContext(ctx, ddl)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start marks a job as in-progress; returns a completion callback and whether to skip because it's already running.
|
||||||
|
func (c *CronTracker) Start(ctx context.Context, job string) (func(error), bool, error) {
|
||||||
|
if err := c.ensureTable(ctx); err != nil {
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
conn := c.db.DB()
|
||||||
|
now := time.Now().Unix()
|
||||||
|
|
||||||
|
tx, err := conn.BeginTxx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inProgress bool
|
||||||
|
query := sqlx.Rebind(c.bindType, `SELECT in_progress FROM cron_status WHERE job_name = ?`)
|
||||||
|
err = tx.QueryRowContext(ctx, query, job).Scan(&inProgress)
|
||||||
|
if err != nil {
|
||||||
|
// no row, insert
|
||||||
|
if err := upsertCron(tx, c.bindType, job, now, false); err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if inProgress {
|
||||||
|
tx.Rollback()
|
||||||
|
return nil, true, nil
|
||||||
|
}
|
||||||
|
if err := markCronStart(tx, c.bindType, job, now); err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := tx.Commit(); err != nil {
|
||||||
|
return nil, false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
done := func(runErr error) {
|
||||||
|
_ = c.finish(context.Background(), job, now, runErr)
|
||||||
|
}
|
||||||
|
return done, false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *CronTracker) finish(ctx context.Context, job string, startedAt int64, runErr error) error {
|
||||||
|
conn := c.db.DB()
|
||||||
|
duration := time.Since(time.Unix(startedAt, 0)).Milliseconds()
|
||||||
|
tx, err := conn.BeginTxx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
var lastError sql.NullString
|
||||||
|
if runErr != nil {
|
||||||
|
lastError = sql.NullString{String: runErr.Error(), Valid: true}
|
||||||
|
}
|
||||||
|
err = upsertCronFinish(tx, c.bindType, job, startedAt, duration, lastError.String)
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
|
func upsertCron(tx *sqlx.Tx, bindType int, job string, startedAt int64, inProgress bool) error {
|
||||||
|
query := `
|
||||||
|
INSERT INTO cron_status (job_name, started_at, ended_at, duration_ms, last_error, in_progress)
|
||||||
|
VALUES (?, ?, 0, 0, NULL, ?)
|
||||||
|
ON CONFLICT (job_name) DO UPDATE SET started_at = excluded.started_at, in_progress = excluded.in_progress, ended_at = excluded.ended_at, duration_ms = excluded.duration_ms, last_error = excluded.last_error
|
||||||
|
`
|
||||||
|
_, err := tx.Exec(sqlx.Rebind(bindType, query), job, startedAt, inProgress)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func markCronStart(tx *sqlx.Tx, bindType int, job string, startedAt int64) error {
|
||||||
|
query := `
|
||||||
|
UPDATE cron_status
|
||||||
|
SET started_at = ?, in_progress = TRUE, ended_at = 0, duration_ms = 0, last_error = NULL
|
||||||
|
WHERE job_name = ?
|
||||||
|
`
|
||||||
|
_, err := tx.Exec(sqlx.Rebind(bindType, query), startedAt, job)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func upsertCronFinish(tx *sqlx.Tx, bindType int, job string, startedAt int64, durationMS int64, lastErr string) error {
|
||||||
|
query := `
|
||||||
|
UPDATE cron_status
|
||||||
|
SET ended_at = ?, duration_ms = ?, last_error = ?, in_progress = FALSE
|
||||||
|
WHERE job_name = ?
|
||||||
|
`
|
||||||
|
_, err := tx.Exec(sqlx.Rebind(bindType, query), time.Now().Unix(), durationMS, nullableString(lastErr), job)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func nullableString(s string) interface{} {
|
||||||
|
if s == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s
|
||||||
|
}
|
||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"vctp/db"
|
"vctp/db"
|
||||||
"vctp/db/queries"
|
"vctp/db/queries"
|
||||||
"vctp/internal/report"
|
"vctp/internal/report"
|
||||||
|
"vctp/internal/utils"
|
||||||
"vctp/internal/vcenter"
|
"vctp/internal/vcenter"
|
||||||
|
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
@@ -47,13 +48,37 @@ type inventorySnapshotRow struct {
|
|||||||
type snapshotTotals = db.SnapshotTotals
|
type snapshotTotals = db.SnapshotTotals
|
||||||
|
|
||||||
// RunVcenterSnapshotHourly records hourly inventory snapshots into a daily table.
|
// RunVcenterSnapshotHourly records hourly inventory snapshots into a daily table.
|
||||||
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) error {
|
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) (err error) {
|
||||||
|
jobCtx := ctx
|
||||||
|
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.HourlyJobTimeoutSeconds, 20*time.Minute)
|
||||||
|
if jobTimeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
startedAt := time.Now()
|
startedAt := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
logger.Info("Hourly snapshot job finished", "duration", time.Since(startedAt))
|
logger.Info("Hourly snapshot job finished", "duration", time.Since(startedAt))
|
||||||
}()
|
}()
|
||||||
|
tracker := NewCronTracker(c.Database)
|
||||||
|
done, skip, err := tracker.Start(jobCtx, "hourly_snapshot")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if skip {
|
||||||
|
logger.Warn("Hourly snapshot skipped because a previous run is still active")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { done(err) }()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(jobCtx)
|
||||||
|
defer cancel()
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
|
|
||||||
|
if err := db.CheckMigrationState(ctx, c.Database.DB()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// reload settings in case vcenter list has changed
|
// reload settings in case vcenter list has changed
|
||||||
c.Settings.ReadYMLSettings()
|
c.Settings.ReadYMLSettings()
|
||||||
|
|
||||||
@@ -83,6 +108,7 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
|||||||
}
|
}
|
||||||
|
|
||||||
dbConn := c.Database.DB()
|
dbConn := c.Database.DB()
|
||||||
|
db.ApplySQLiteTuning(ctx, dbConn)
|
||||||
if err := ensureDailyInventoryTable(ctx, dbConn, tableName); err != nil {
|
if err := ensureDailyInventoryTable(ctx, dbConn, tableName); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -90,6 +116,9 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
|||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var errCount int64
|
var errCount int64
|
||||||
concurrencyLimit := c.Settings.Values.Settings.HourlySnapshotConcurrency
|
concurrencyLimit := c.Settings.Values.Settings.HourlySnapshotConcurrency
|
||||||
|
if override, ok := utils.EnvInt("VCTP_HOURLY_SNAPSHOT_CONCURRENCY"); ok && override >= 0 {
|
||||||
|
concurrencyLimit = override
|
||||||
|
}
|
||||||
var sem chan struct{}
|
var sem chan struct{}
|
||||||
if concurrencyLimit > 0 {
|
if concurrencyLimit > 0 {
|
||||||
sem = make(chan struct{}, concurrencyLimit)
|
sem = make(chan struct{}, concurrencyLimit)
|
||||||
@@ -99,23 +128,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
|||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(url string) {
|
go func(url string) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
waitStarted := time.Now()
|
||||||
vcStart := time.Now()
|
vcStart := time.Now()
|
||||||
if sem != nil {
|
if sem != nil {
|
||||||
sem <- struct{}{}
|
sem <- struct{}{}
|
||||||
defer func() { <-sem }()
|
defer func() { <-sem }()
|
||||||
}
|
}
|
||||||
|
waitDuration := time.Since(waitStarted)
|
||||||
|
|
||||||
|
timeout := durationFromSeconds(c.Settings.Values.Settings.HourlySnapshotTimeoutSeconds, 10*time.Minute)
|
||||||
|
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
c.Logger.Info("Starting hourly snapshot for vcenter", "url", url)
|
c.Logger.Info("Starting hourly snapshot for vcenter", "url", url)
|
||||||
if err := c.captureHourlySnapshotForVcenter(ctx, startTime, tableName, url); err != nil {
|
if err := c.captureHourlySnapshotForVcenter(runCtx, startTime, tableName, url); err != nil {
|
||||||
atomic.AddInt64(&errCount, 1)
|
atomic.AddInt64(&errCount, 1)
|
||||||
c.Logger.Error("hourly snapshot failed", "error", err, "url", url)
|
c.Logger.Error("hourly snapshot failed", "error", err, "url", url)
|
||||||
} else {
|
} else {
|
||||||
c.Logger.Info("Finished hourly snapshot for vcenter", "url", url, "duration", time.Since(vcStart))
|
c.Logger.Info("Finished hourly snapshot for vcenter",
|
||||||
|
"url", url,
|
||||||
|
"queue_wait", waitDuration,
|
||||||
|
"duration", time.Since(vcStart),
|
||||||
|
"timeout", timeout,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}(url)
|
}(url)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
if errCount > 0 {
|
if errCount > 0 {
|
||||||
return fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
err = fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
rowCount, err := db.TableRowCount(ctx, dbConn, tableName)
|
rowCount, err := db.TableRowCount(ctx, dbConn, tableName)
|
||||||
@@ -131,13 +173,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunVcenterDailyAggregate summarizes hourly snapshots into a daily summary table.
|
// RunVcenterDailyAggregate summarizes hourly snapshots into a daily summary table.
|
||||||
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) error {
|
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||||
|
jobCtx := ctx
|
||||||
|
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.DailyJobTimeoutSeconds, 15*time.Minute)
|
||||||
|
if jobTimeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
tracker := NewCronTracker(c.Database)
|
||||||
|
done, skip, err := tracker.Start(jobCtx, "daily_aggregate")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if skip {
|
||||||
|
logger.Warn("Daily aggregate skipped because a previous run is still active")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { done(err) }()
|
||||||
|
|
||||||
|
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
startedAt := time.Now()
|
startedAt := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
logger.Info("Daily summary job finished", "duration", time.Since(startedAt))
|
logger.Info("Daily summary job finished", "duration", time.Since(startedAt))
|
||||||
}()
|
}()
|
||||||
targetTime := time.Now().Add(-time.Minute)
|
targetTime := time.Now().Add(-time.Minute)
|
||||||
return c.aggregateDailySummary(ctx, targetTime, false)
|
err = c.aggregateDailySummary(jobCtx, targetTime, false)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *CronTask) AggregateDailySummary(ctx context.Context, date time.Time, force bool) error {
|
func (c *CronTask) AggregateDailySummary(ctx context.Context, date time.Time, force bool) error {
|
||||||
@@ -252,7 +317,29 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunVcenterMonthlyAggregate summarizes the previous month's daily snapshots.
|
// RunVcenterMonthlyAggregate summarizes the previous month's daily snapshots.
|
||||||
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) error {
|
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||||
|
jobCtx := ctx
|
||||||
|
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.MonthlyJobTimeoutSeconds, 20*time.Minute)
|
||||||
|
if jobTimeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
tracker := NewCronTracker(c.Database)
|
||||||
|
done, skip, err := tracker.Start(jobCtx, "monthly_aggregate")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if skip {
|
||||||
|
logger.Warn("Monthly aggregate skipped because a previous run is still active")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { done(err) }()
|
||||||
|
|
||||||
|
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
startedAt := time.Now()
|
startedAt := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
logger.Info("Monthly summary job finished", "duration", time.Since(startedAt))
|
logger.Info("Monthly summary job finished", "duration", time.Since(startedAt))
|
||||||
@@ -260,7 +347,8 @@ func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.
|
|||||||
now := time.Now()
|
now := time.Now()
|
||||||
firstOfThisMonth := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, now.Location())
|
firstOfThisMonth := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, now.Location())
|
||||||
targetMonth := firstOfThisMonth.AddDate(0, -1, 0)
|
targetMonth := firstOfThisMonth.AddDate(0, -1, 0)
|
||||||
return c.aggregateMonthlySummary(ctx, targetMonth, false)
|
err = c.aggregateMonthlySummary(jobCtx, targetMonth, false)
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *CronTask) AggregateMonthlySummary(ctx context.Context, month time.Time, force bool) error {
|
func (c *CronTask) AggregateMonthlySummary(ctx context.Context, month time.Time, force bool) error {
|
||||||
@@ -348,7 +436,29 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunSnapshotCleanup drops hourly and daily snapshot tables older than retention.
|
// RunSnapshotCleanup drops hourly and daily snapshot tables older than retention.
|
||||||
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) error {
|
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) (err error) {
|
||||||
|
jobCtx := ctx
|
||||||
|
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.CleanupJobTimeoutSeconds, 10*time.Minute)
|
||||||
|
if jobTimeout > 0 {
|
||||||
|
var cancel context.CancelFunc
|
||||||
|
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||||
|
defer cancel()
|
||||||
|
}
|
||||||
|
tracker := NewCronTracker(c.Database)
|
||||||
|
done, skip, err := tracker.Start(jobCtx, "snapshot_cleanup")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if skip {
|
||||||
|
logger.Warn("Snapshot cleanup skipped because a previous run is still active")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer func() { done(err) }()
|
||||||
|
|
||||||
|
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
startedAt := time.Now()
|
startedAt := time.Now()
|
||||||
defer func() {
|
defer func() {
|
||||||
logger.Info("Snapshot cleanup job finished", "duration", time.Since(startedAt))
|
logger.Info("Snapshot cleanup job finished", "duration", time.Since(startedAt))
|
||||||
@@ -582,6 +692,13 @@ func intWithDefault(value int, fallback int) int {
|
|||||||
return value
|
return value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func durationFromSeconds(seconds int, fallback time.Duration) time.Duration {
|
||||||
|
if seconds > 0 {
|
||||||
|
return time.Duration(seconds) * time.Second
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
|
||||||
func normalizeResourcePool(value string) string {
|
func normalizeResourcePool(value string) string {
|
||||||
trimmed := strings.TrimSpace(value)
|
trimmed := strings.TrimSpace(value)
|
||||||
if trimmed == "" {
|
if trimmed == "" {
|
||||||
@@ -800,6 +917,58 @@ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func insertHourlyBatch(ctx context.Context, dbConn *sqlx.DB, tableName string, rows []inventorySnapshotRow) error {
|
||||||
|
if len(rows) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
tx, err := dbConn.BeginTxx(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
stmt, err := tx.PreparexContext(ctx, sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), fmt.Sprintf(`
|
||||||
|
INSERT INTO %s (
|
||||||
|
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId", "CreationTime", "DeletionTime",
|
||||||
|
"ResourcePool", "Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount",
|
||||||
|
"RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid", "SnapshotTime", "IsPresent"
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`, tableName)))
|
||||||
|
if err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer stmt.Close()
|
||||||
|
|
||||||
|
for _, row := range rows {
|
||||||
|
if _, err := stmt.ExecContext(ctx,
|
||||||
|
row.InventoryId,
|
||||||
|
row.Name,
|
||||||
|
row.Vcenter,
|
||||||
|
row.VmId,
|
||||||
|
row.EventKey,
|
||||||
|
row.CloudId,
|
||||||
|
row.CreationTime,
|
||||||
|
row.DeletionTime,
|
||||||
|
row.ResourcePool,
|
||||||
|
row.Datacenter,
|
||||||
|
row.Cluster,
|
||||||
|
row.Folder,
|
||||||
|
row.ProvisionedDisk,
|
||||||
|
row.VcpuCount,
|
||||||
|
row.RamGB,
|
||||||
|
row.IsTemplate,
|
||||||
|
row.PoweredOn,
|
||||||
|
row.SrmPlaceholder,
|
||||||
|
row.VmUuid,
|
||||||
|
row.SnapshotTime,
|
||||||
|
row.IsPresent,
|
||||||
|
); err != nil {
|
||||||
|
tx.Rollback()
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tx.Commit()
|
||||||
|
}
|
||||||
|
|
||||||
func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTime time.Time, tableName string, url string) error {
|
func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTime time.Time, tableName string, url string) error {
|
||||||
c.Logger.Debug("connecting to vcenter for hourly snapshot", "url", url)
|
c.Logger.Debug("connecting to vcenter for hourly snapshot", "url", url)
|
||||||
vc := vcenter.New(c.Logger, c.VcCreds)
|
vc := vcenter.New(c.Logger, c.VcCreds)
|
||||||
@@ -886,10 +1055,9 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
|||||||
totals.DiskTotal += nullFloat64ToFloat(row.ProvisionedDisk)
|
totals.DiskTotal += nullFloat64ToFloat(row.ProvisionedDisk)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
batch := make([]inventorySnapshotRow, 0, len(presentSnapshots)+len(inventoryRows))
|
||||||
for _, row := range presentSnapshots {
|
for _, row := range presentSnapshots {
|
||||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
batch = append(batch, row)
|
||||||
c.Logger.Error("failed to insert hourly snapshot", "error", err, "vm_id", row.VmId.String)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !canDetectMissing {
|
if !canDetectMissing {
|
||||||
@@ -927,9 +1095,11 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
|||||||
c.Logger.Warn("failed to mark inventory record deleted", "error", err, "vm_id", row.VmId.String)
|
c.Logger.Warn("failed to mark inventory record deleted", "error", err, "vm_id", row.VmId.String)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
batch = append(batch, row)
|
||||||
c.Logger.Error("failed to insert missing VM snapshot", "error", err, "vm_id", row.VmId.String)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := insertHourlyBatch(ctx, dbConn, tableName, batch); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
c.Logger.Info("Hourly snapshot summary",
|
c.Logger.Info("Hourly snapshot summary",
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import (
|
|||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -70,3 +71,29 @@ func SleepWithContext(ctx context.Context, d time.Duration) {
|
|||||||
case <-timer.C:
|
case <-timer.C:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EnvInt parses an environment variable into an int; returns (value, true) when set and valid.
|
||||||
|
func EnvInt(key string) (int, bool) {
|
||||||
|
val := os.Getenv(key)
|
||||||
|
if val == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
parsed, err := strconv.Atoi(val)
|
||||||
|
if err != nil {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return parsed, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// DurationFromEnv parses an environment variable representing seconds into a duration, defaulting when unset/invalid.
|
||||||
|
func DurationFromEnv(key string, fallback time.Duration) time.Duration {
|
||||||
|
val := os.Getenv(key)
|
||||||
|
if val == "" {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
seconds, err := strconv.ParseInt(val, 10, 64)
|
||||||
|
if err != nil || seconds <= 0 {
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
|
return time.Duration(seconds) * time.Second
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,6 +19,11 @@ settings:
|
|||||||
hourly_snapshot_max_age_days: 60
|
hourly_snapshot_max_age_days: 60
|
||||||
daily_snapshot_max_age_months: 12
|
daily_snapshot_max_age_months: 12
|
||||||
snapshot_cleanup_cron: "30 2 * * *"
|
snapshot_cleanup_cron: "30 2 * * *"
|
||||||
|
hourly_job_timeout_seconds: 1200
|
||||||
|
hourly_snapshot_timeout_seconds: 600
|
||||||
|
daily_job_timeout_seconds: 900
|
||||||
|
monthly_job_timeout_seconds: 1200
|
||||||
|
cleanup_job_timeout_seconds: 600
|
||||||
tenants_to_filter:
|
tenants_to_filter:
|
||||||
node_charge_clusters:
|
node_charge_clusters:
|
||||||
srm_activeactive_vms:
|
srm_activeactive_vms:
|
||||||
|
|||||||
Reference in New Issue
Block a user