This commit is contained in:
@@ -13,6 +13,7 @@ import (
|
||||
"vctp/db"
|
||||
"vctp/db/queries"
|
||||
"vctp/internal/report"
|
||||
"vctp/internal/utils"
|
||||
"vctp/internal/vcenter"
|
||||
|
||||
"github.com/jmoiron/sqlx"
|
||||
@@ -47,13 +48,37 @@ type inventorySnapshotRow struct {
|
||||
type snapshotTotals = db.SnapshotTotals
|
||||
|
||||
// RunVcenterSnapshotHourly records hourly inventory snapshots into a daily table.
|
||||
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.HourlyJobTimeoutSeconds, 20*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Hourly snapshot job finished", "duration", time.Since(startedAt))
|
||||
}()
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "hourly_snapshot")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Hourly snapshot skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
ctx, cancel := context.WithCancel(jobCtx)
|
||||
defer cancel()
|
||||
startTime := time.Now()
|
||||
|
||||
if err := db.CheckMigrationState(ctx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// reload settings in case vcenter list has changed
|
||||
c.Settings.ReadYMLSettings()
|
||||
|
||||
@@ -83,6 +108,7 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
}
|
||||
|
||||
dbConn := c.Database.DB()
|
||||
db.ApplySQLiteTuning(ctx, dbConn)
|
||||
if err := ensureDailyInventoryTable(ctx, dbConn, tableName); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -90,6 +116,9 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
var wg sync.WaitGroup
|
||||
var errCount int64
|
||||
concurrencyLimit := c.Settings.Values.Settings.HourlySnapshotConcurrency
|
||||
if override, ok := utils.EnvInt("VCTP_HOURLY_SNAPSHOT_CONCURRENCY"); ok && override >= 0 {
|
||||
concurrencyLimit = override
|
||||
}
|
||||
var sem chan struct{}
|
||||
if concurrencyLimit > 0 {
|
||||
sem = make(chan struct{}, concurrencyLimit)
|
||||
@@ -99,23 +128,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
wg.Add(1)
|
||||
go func(url string) {
|
||||
defer wg.Done()
|
||||
waitStarted := time.Now()
|
||||
vcStart := time.Now()
|
||||
if sem != nil {
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
}
|
||||
waitDuration := time.Since(waitStarted)
|
||||
|
||||
timeout := durationFromSeconds(c.Settings.Values.Settings.HourlySnapshotTimeoutSeconds, 10*time.Minute)
|
||||
runCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
c.Logger.Info("Starting hourly snapshot for vcenter", "url", url)
|
||||
if err := c.captureHourlySnapshotForVcenter(ctx, startTime, tableName, url); err != nil {
|
||||
if err := c.captureHourlySnapshotForVcenter(runCtx, startTime, tableName, url); err != nil {
|
||||
atomic.AddInt64(&errCount, 1)
|
||||
c.Logger.Error("hourly snapshot failed", "error", err, "url", url)
|
||||
} else {
|
||||
c.Logger.Info("Finished hourly snapshot for vcenter", "url", url, "duration", time.Since(vcStart))
|
||||
c.Logger.Info("Finished hourly snapshot for vcenter",
|
||||
"url", url,
|
||||
"queue_wait", waitDuration,
|
||||
"duration", time.Since(vcStart),
|
||||
"timeout", timeout,
|
||||
)
|
||||
}
|
||||
}(url)
|
||||
}
|
||||
wg.Wait()
|
||||
if errCount > 0 {
|
||||
return fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
||||
err = fmt.Errorf("hourly snapshot failed for %d vcenter(s)", errCount)
|
||||
return err
|
||||
}
|
||||
|
||||
rowCount, err := db.TableRowCount(ctx, dbConn, tableName)
|
||||
@@ -131,13 +173,36 @@ func (c *CronTask) RunVcenterSnapshotHourly(ctx context.Context, logger *slog.Lo
|
||||
}
|
||||
|
||||
// RunVcenterDailyAggregate summarizes hourly snapshots into a daily summary table.
|
||||
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterDailyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.DailyJobTimeoutSeconds, 15*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "daily_aggregate")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Daily aggregate skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Daily summary job finished", "duration", time.Since(startedAt))
|
||||
}()
|
||||
targetTime := time.Now().Add(-time.Minute)
|
||||
return c.aggregateDailySummary(ctx, targetTime, false)
|
||||
err = c.aggregateDailySummary(jobCtx, targetTime, false)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *CronTask) AggregateDailySummary(ctx context.Context, date time.Time, force bool) error {
|
||||
@@ -252,7 +317,29 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti
|
||||
}
|
||||
|
||||
// RunVcenterMonthlyAggregate summarizes the previous month's daily snapshots.
|
||||
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.MonthlyJobTimeoutSeconds, 20*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "monthly_aggregate")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Monthly aggregate skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Monthly summary job finished", "duration", time.Since(startedAt))
|
||||
@@ -260,7 +347,8 @@ func (c *CronTask) RunVcenterMonthlyAggregate(ctx context.Context, logger *slog.
|
||||
now := time.Now()
|
||||
firstOfThisMonth := time.Date(now.Year(), now.Month(), 1, 0, 0, 0, 0, now.Location())
|
||||
targetMonth := firstOfThisMonth.AddDate(0, -1, 0)
|
||||
return c.aggregateMonthlySummary(ctx, targetMonth, false)
|
||||
err = c.aggregateMonthlySummary(jobCtx, targetMonth, false)
|
||||
return err
|
||||
}
|
||||
|
||||
func (c *CronTask) AggregateMonthlySummary(ctx context.Context, month time.Time, force bool) error {
|
||||
@@ -348,7 +436,29 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
||||
}
|
||||
|
||||
// RunSnapshotCleanup drops hourly and daily snapshot tables older than retention.
|
||||
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) error {
|
||||
func (c *CronTask) RunSnapshotCleanup(ctx context.Context, logger *slog.Logger) (err error) {
|
||||
jobCtx := ctx
|
||||
jobTimeout := durationFromSeconds(c.Settings.Values.Settings.CleanupJobTimeoutSeconds, 10*time.Minute)
|
||||
if jobTimeout > 0 {
|
||||
var cancel context.CancelFunc
|
||||
jobCtx, cancel = context.WithTimeout(ctx, jobTimeout)
|
||||
defer cancel()
|
||||
}
|
||||
tracker := NewCronTracker(c.Database)
|
||||
done, skip, err := tracker.Start(jobCtx, "snapshot_cleanup")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if skip {
|
||||
logger.Warn("Snapshot cleanup skipped because a previous run is still active")
|
||||
return nil
|
||||
}
|
||||
defer func() { done(err) }()
|
||||
|
||||
if err := db.CheckMigrationState(jobCtx, c.Database.DB()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
startedAt := time.Now()
|
||||
defer func() {
|
||||
logger.Info("Snapshot cleanup job finished", "duration", time.Since(startedAt))
|
||||
@@ -582,6 +692,13 @@ func intWithDefault(value int, fallback int) int {
|
||||
return value
|
||||
}
|
||||
|
||||
func durationFromSeconds(seconds int, fallback time.Duration) time.Duration {
|
||||
if seconds > 0 {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func normalizeResourcePool(value string) string {
|
||||
trimmed := strings.TrimSpace(value)
|
||||
if trimmed == "" {
|
||||
@@ -800,6 +917,58 @@ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
|
||||
return err
|
||||
}
|
||||
|
||||
func insertHourlyBatch(ctx context.Context, dbConn *sqlx.DB, tableName string, rows []inventorySnapshotRow) error {
|
||||
if len(rows) == 0 {
|
||||
return nil
|
||||
}
|
||||
tx, err := dbConn.BeginTxx(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stmt, err := tx.PreparexContext(ctx, sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), fmt.Sprintf(`
|
||||
INSERT INTO %s (
|
||||
"InventoryId", "Name", "Vcenter", "VmId", "EventKey", "CloudId", "CreationTime", "DeletionTime",
|
||||
"ResourcePool", "Datacenter", "Cluster", "Folder", "ProvisionedDisk", "VcpuCount",
|
||||
"RamGB", "IsTemplate", "PoweredOn", "SrmPlaceholder", "VmUuid", "SnapshotTime", "IsPresent"
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`, tableName)))
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, row := range rows {
|
||||
if _, err := stmt.ExecContext(ctx,
|
||||
row.InventoryId,
|
||||
row.Name,
|
||||
row.Vcenter,
|
||||
row.VmId,
|
||||
row.EventKey,
|
||||
row.CloudId,
|
||||
row.CreationTime,
|
||||
row.DeletionTime,
|
||||
row.ResourcePool,
|
||||
row.Datacenter,
|
||||
row.Cluster,
|
||||
row.Folder,
|
||||
row.ProvisionedDisk,
|
||||
row.VcpuCount,
|
||||
row.RamGB,
|
||||
row.IsTemplate,
|
||||
row.PoweredOn,
|
||||
row.SrmPlaceholder,
|
||||
row.VmUuid,
|
||||
row.SnapshotTime,
|
||||
row.IsPresent,
|
||||
); err != nil {
|
||||
tx.Rollback()
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTime time.Time, tableName string, url string) error {
|
||||
c.Logger.Debug("connecting to vcenter for hourly snapshot", "url", url)
|
||||
vc := vcenter.New(c.Logger, c.VcCreds)
|
||||
@@ -886,10 +1055,9 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
||||
totals.DiskTotal += nullFloat64ToFloat(row.ProvisionedDisk)
|
||||
}
|
||||
|
||||
batch := make([]inventorySnapshotRow, 0, len(presentSnapshots)+len(inventoryRows))
|
||||
for _, row := range presentSnapshots {
|
||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
||||
c.Logger.Error("failed to insert hourly snapshot", "error", err, "vm_id", row.VmId.String)
|
||||
}
|
||||
batch = append(batch, row)
|
||||
}
|
||||
|
||||
if !canDetectMissing {
|
||||
@@ -927,9 +1095,11 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
|
||||
c.Logger.Warn("failed to mark inventory record deleted", "error", err, "vm_id", row.VmId.String)
|
||||
}
|
||||
}
|
||||
if err := insertDailyInventoryRow(ctx, dbConn, tableName, row); err != nil {
|
||||
c.Logger.Error("failed to insert missing VM snapshot", "error", err, "vm_id", row.VmId.String)
|
||||
}
|
||||
batch = append(batch, row)
|
||||
}
|
||||
|
||||
if err := insertHourlyBatch(ctx, dbConn, tableName, batch); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Logger.Info("Hourly snapshot summary",
|
||||
|
||||
Reference in New Issue
Block a user