more accurate resource pool data in aggregation reports
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-01-23 11:59:52 +11:00
parent 871d7c2024
commit 25564efa54
4 changed files with 252 additions and 83 deletions

View File

@@ -1057,6 +1057,7 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
missingCount, deletionsMarked, candidates := prepareDeletionCandidates(ctx, log, dbConn, url, inventoryRows, presentSnapshots, presentByUuid, presentByName, startTime)
newCount := 0
prevTableName := ""
reportTables := make(map[string]struct{})
// If deletions detected, refine deletion time using vCenter events in a small window.
if missingCount > 0 {
@@ -1084,6 +1085,23 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, cand.vmID, cand.vmUUID, cand.name, cand.cluster, t.Unix()); err != nil {
log.Warn("failed to refine lifecycle cache deletion time", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "vcenter", url, "error", err)
}
if snapRow, snapTable, found := findVMInHourlySnapshots(ctx, dbConn, url, cand.vmID); found {
vmUUID := cand.vmUUID
if vmUUID == "" && snapRow.VmUuid.Valid {
vmUUID = snapRow.VmUuid.String
}
name := cand.name
if name == "" {
name = snapRow.Name
}
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, snapTable, url, cand.vmID, vmUUID, name, delTs.Int64); err != nil {
log.Warn("failed to update hourly snapshot deletion time from event", "table", snapTable, "vm_id", cand.vmID, "vm_uuid", vmUUID, "vcenter", url, "error", err)
} else if rowsAffected > 0 {
reportTables[snapTable] = struct{}{}
deletionsMarked = true
log.Debug("updated hourly snapshot deletion time from event", "table", snapTable, "vm_id", cand.vmID, "vm_uuid", vmUUID, "vcenter", url, "event_time", t)
}
}
log.Info("refined deletion time from vcenter event", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "vcenter", url, "event_time", t)
}
}
@@ -1124,17 +1142,26 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
}
// Discover previous snapshots once per run (serial) to avoid concurrent probes across vCenters.
prevTableName, newCount, missingCount = c.compareWithPreviousSnapshot(ctx, dbConn, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName, missingCount)
var prevTableTouched bool
prevTableName, newCount, missingCount, prevTableTouched = c.compareWithPreviousSnapshot(ctx, dbConn, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName, missingCount)
if prevTableTouched && prevTableName != "" {
reportTables[prevTableName] = struct{}{}
deletionsMarked = true
}
// If VM count dropped versus totals and we still haven't marked missing, try another comparison + wider event window.
if missingCount == 0 && prevVmCount.Valid && prevVmCount.Int64 > int64(totals.VmCount) {
// Fallback: locate a previous table only if we didn't already find one.
if prevTableName == "" {
if prevTable, err := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger)); err == nil && prevTable != "" {
moreMissing := c.markMissingFromPrevious(ctx, dbConn, prevTable, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
moreMissing, tableUpdated := c.markMissingFromPrevious(ctx, dbConn, prevTable, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
if moreMissing > 0 {
missingCount += moreMissing
}
if tableUpdated {
reportTables[prevTable] = struct{}{}
deletionsMarked = true
}
// Reuse this table name for later snapshot lookups when correlating deletion events.
prevTableName = prevTable
}
@@ -1157,9 +1184,10 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
}
inv, ok := inventoryByVmID[vmID]
var snapRow InventorySnapshotRow
var snapTable string
if !ok {
var found bool
snapRow, found = findVMInHourlySnapshots(ctx, dbConn, url, vmID, prevTableName)
snapRow, snapTable, found = findVMInHourlySnapshots(ctx, dbConn, url, vmID, prevTableName)
if !found {
c.Logger.Debug("count-drop: deletion event has no snapshot match", "vm_id", vmID, "vcenter", url, "event_time", t)
continue
@@ -1170,7 +1198,7 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
Name: snapRow.Name,
Datacenter: snapRow.Datacenter,
}
c.Logger.Info("count-drop: correlated deletion via snapshot lookup", "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "name", inv.Name, "vcenter", url, "event_time", t, "snapshot_table", prevTableName)
c.Logger.Info("count-drop: correlated deletion via snapshot lookup", "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "name", inv.Name, "vcenter", url, "event_time", t, "snapshot_table", snapTable)
}
// Prefer UUID from snapshot if inventory entry lacks it.
if !inv.VmUuid.Valid && snapRow.VmUuid.Valid {
@@ -1193,6 +1221,19 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, vmID, inv.VmUuid.String, inv.Name, clusterName, t.Unix()); err != nil {
c.Logger.Warn("count-drop: failed to refine lifecycle cache deletion time", "vm_id", vmID, "vm_uuid", inv.VmUuid, "vcenter", url, "error", err)
}
tableToUpdate := snapTable
if tableToUpdate == "" {
tableToUpdate = prevTableName
}
if tableToUpdate != "" {
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, tableToUpdate, url, vmID, inv.VmUuid.String, inv.Name, delTs.Int64); err != nil {
c.Logger.Warn("count-drop: failed to update hourly snapshot deletion time from event", "table", tableToUpdate, "vm_id", vmID, "vcenter", url, "error", err)
} else if rowsAffected > 0 {
reportTables[tableToUpdate] = struct{}{}
deletionsMarked = true
c.Logger.Debug("count-drop: updated hourly snapshot deletion time from event", "table", tableToUpdate, "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "vcenter", url, "event_time", t)
}
}
missingCount++
deletionsMarked = true
}
@@ -1200,8 +1241,13 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
}
// Backfill lifecycle deletions for VMs missing from inventory and without DeletedAt.
if err := backfillLifecycleDeletionsToday(ctx, log, dbConn, url, startTime, presentSnapshots); err != nil {
if backfillTables, err := backfillLifecycleDeletionsToday(ctx, log, dbConn, url, startTime, presentSnapshots); err != nil {
log.Warn("failed to backfill lifecycle deletions for today", "vcenter", url, "error", err)
} else if len(backfillTables) > 0 {
for _, table := range backfillTables {
reportTables[table] = struct{}{}
}
deletionsMarked = true
}
log.Info("Hourly snapshot summary",
@@ -1219,10 +1265,15 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
log.Warn("failed to record snapshot run", "url", url, "error", upErr)
}
if deletionsMarked {
if err := c.generateReport(ctx, tableName); err != nil {
log.Warn("failed to regenerate hourly report after deletions", "error", err, "table", tableName)
} else {
log.Debug("Regenerated hourly report after deletions", "table", tableName)
if len(reportTables) == 0 {
reportTables[tableName] = struct{}{}
}
for reportTable := range reportTables {
if err := c.generateReport(ctx, reportTable); err != nil {
log.Warn("failed to regenerate hourly report after deletions", "error", err, "table", reportTable)
} else {
log.Debug("Regenerated hourly report after deletions", "table", reportTable)
}
}
}
return nil
@@ -1242,7 +1293,7 @@ func (c *CronTask) compareWithPreviousSnapshot(
inventoryByUuid map[string]queries.Inventory,
inventoryByName map[string]queries.Inventory,
missingCount int,
) (string, int, int) {
) (string, int, int, bool) {
prevTableName, prevTableErr := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger))
if prevTableErr != nil {
c.Logger.Warn("failed to locate previous hourly snapshot for deletion comparison", "error", prevTableErr, "url", url)
@@ -1251,9 +1302,13 @@ func (c *CronTask) compareWithPreviousSnapshot(
prevSnapshotTime, _ := parseSnapshotTime(prevTableName)
newCount := 0
prevTableTouched := false
if prevTableName != "" {
moreMissing := c.markMissingFromPrevious(ctx, dbConn, prevTableName, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
moreMissing, tableUpdated := c.markMissingFromPrevious(ctx, dbConn, prevTableName, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
missingCount += moreMissing
if tableUpdated {
prevTableTouched = true
}
expectedSeconds := int64(c.Settings.Values.Settings.VcenterInventorySnapshotSeconds) / 2
// Skip only if snapshots are closer together than half the configured cadence
if SnapshotTooSoon(prevSnapshotTime, startTime.Unix(), expectedSeconds) {
@@ -1278,5 +1333,5 @@ func (c *CronTask) compareWithPreviousSnapshot(
newCount = len(presentSnapshots)
}
return prevTableName, newCount, missingCount
return prevTableName, newCount, missingCount, prevTableTouched
}