more accurate resource pool data in aggregation reports
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-01-23 11:59:52 +11:00
parent 871d7c2024
commit 25564efa54
4 changed files with 252 additions and 83 deletions

View File

@@ -1067,7 +1067,7 @@ func normalizeCellValue(value interface{}) interface{} {
type totalsPoint struct { type totalsPoint struct {
Label string Label string
VmCount int64 VmCount float64
VcpuTotal float64 VcpuTotal float64
RamTotal float64 RamTotal float64
PresenceRatio float64 PresenceRatio float64
@@ -1078,7 +1078,18 @@ type totalsPoint struct {
} }
func buildHourlyTotals(ctx context.Context, dbConn *sqlx.DB, records []SnapshotRecord) ([]totalsPoint, error) { func buildHourlyTotals(ctx context.Context, dbConn *sqlx.DB, records []SnapshotRecord) ([]totalsPoint, error) {
points := make([]totalsPoint, 0, len(records)) type hourBucket struct {
samples int
vmSum float64
vcpuSum float64
ramSum float64
tinSum float64
bronzeSum float64
silverSum float64
goldSum float64
}
buckets := make(map[int64]*hourBucket)
for _, record := range records { for _, record := range records {
if err := db.ValidateTableName(record.TableName); err != nil { if err := db.ValidateTableName(record.TableName); err != nil {
return nil, err return nil, err
@@ -1112,17 +1123,46 @@ WHERE %s
if err := dbConn.GetContext(ctx, &row, query); err != nil { if err := dbConn.GetContext(ctx, &row, query); err != nil {
return nil, err return nil, err
} }
hourKey := record.SnapshotTime.Local().Truncate(time.Hour).Unix()
bucket := buckets[hourKey]
if bucket == nil {
bucket = &hourBucket{}
buckets[hourKey] = bucket
}
bucket.samples++
bucket.vmSum += float64(row.VmCount)
bucket.vcpuSum += float64(row.VcpuTotal)
bucket.ramSum += float64(row.RamTotal)
bucket.tinSum += row.TinTotal
bucket.bronzeSum += row.BronzeTotal
bucket.silverSum += row.SilverTotal
bucket.goldSum += row.GoldTotal
}
keys := make([]int64, 0, len(buckets))
for key := range buckets {
keys = append(keys, key)
}
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
points := make([]totalsPoint, 0, len(keys))
for _, key := range keys {
bucket := buckets[key]
if bucket.samples == 0 {
continue
}
denom := float64(bucket.samples)
vmAvg := bucket.vmSum / denom
points = append(points, totalsPoint{ points = append(points, totalsPoint{
Label: record.SnapshotTime.Local().Format("2006-01-02 15:04"), Label: time.Unix(key, 0).Local().Format("2006-01-02 15:00"),
VmCount: row.VmCount, VmCount: vmAvg,
VcpuTotal: float64(row.VcpuTotal), VcpuTotal: bucket.vcpuSum / denom,
RamTotal: float64(row.RamTotal), RamTotal: bucket.ramSum / denom,
// For hourly snapshots, prorated VM count equals VM count (no finer granularity). PresenceRatio: vmAvg,
PresenceRatio: float64(row.VmCount), TinTotal: bucket.tinSum / denom,
TinTotal: row.TinTotal, BronzeTotal: bucket.bronzeSum / denom,
BronzeTotal: row.BronzeTotal, SilverTotal: bucket.silverSum / denom,
SilverTotal: row.SilverTotal, GoldTotal: bucket.goldSum / denom,
GoldTotal: row.GoldTotal,
}) })
} }
return points, nil return points, nil
@@ -1175,10 +1215,10 @@ WHERE %s
} }
points = append(points, totalsPoint{ points = append(points, totalsPoint{
Label: record.SnapshotTime.Local().Format("2006-01-02"), Label: record.SnapshotTime.Local().Format("2006-01-02"),
VmCount: row.VmCount, VmCount: float64(row.VmCount),
VcpuTotal: row.VcpuTotal, VcpuTotal: row.VcpuTotal,
RamTotal: row.RamTotal, RamTotal: row.RamTotal,
PresenceRatio: computeProratedVmCount(row.PresenceRatio, row.VmCount, prorateByAvg), PresenceRatio: computeProratedVmCount(row.PresenceRatio, float64(row.VmCount), prorateByAvg),
TinTotal: row.TinTotal, TinTotal: row.TinTotal,
BronzeTotal: row.BronzeTotal, BronzeTotal: row.BronzeTotal,
SilverTotal: row.SilverTotal, SilverTotal: row.SilverTotal,
@@ -1188,11 +1228,11 @@ WHERE %s
return points, nil return points, nil
} }
func computeProratedVmCount(presenceRatio float64, vmCount int64, prorate bool) float64 { func computeProratedVmCount(presenceRatio float64, vmCount float64, prorate bool) float64 {
if !prorate { if !prorate {
return float64(vmCount) return vmCount
} }
return presenceRatio * float64(vmCount) return presenceRatio * vmCount
} }
func writeTotalsChart(logger *slog.Logger, xlsx *excelize.File, sheetName string, points []totalsPoint) { func writeTotalsChart(logger *slog.Logger, xlsx *excelize.File, sheetName string, points []totalsPoint) {
@@ -1224,6 +1264,18 @@ func writeTotalsChart(logger *slog.Logger, xlsx *excelize.File, sheetName string
xlsx.SetCellValue(sheetName, fmt.Sprintf("I%d", row), point.GoldTotal) xlsx.SetCellValue(sheetName, fmt.Sprintf("I%d", row), point.GoldTotal)
} }
if lastRow := len(points) + 1; lastRow >= 2 {
numFmt := "0.00000000"
styleID, err := xlsx.NewStyle(&excelize.Style{CustomNumFmt: &numFmt})
if err == nil {
if err := xlsx.SetCellStyle(sheetName, "E2", fmt.Sprintf("I%d", lastRow), styleID); err != nil {
logger.Error("Error setting totals number format", "error", err)
}
} else {
logger.Error("Error creating totals number format", "error", err)
}
}
if endCell, err := excelize.CoordinatesToCellName(len(headers), 1); err == nil { if endCell, err := excelize.CoordinatesToCellName(len(headers), 1); err == nil {
filterRange := "A1:" + endCell filterRange := "A1:" + endCell
if err := xlsx.AutoFilter(sheetName, filterRange, nil); err != nil { if err := xlsx.AutoFilter(sheetName, filterRange, nil); err != nil {

View File

@@ -179,13 +179,46 @@ func querySnapshotRows(ctx context.Context, dbConn *sqlx.DB, table string, colum
return dbConn.QueryxContext(ctx, query, args...) return dbConn.QueryxContext(ctx, query, args...)
} }
func updateDeletionTimeInSnapshot(ctx context.Context, dbConn *sqlx.DB, table, vcenter, vmID, vmUUID, name string, deletionUnix int64) (int64, error) {
if err := db.ValidateTableName(table); err != nil {
return 0, err
}
matchColumn := ""
matchValue := ""
switch {
case vmID != "":
matchColumn = "VmId"
matchValue = vmID
case vmUUID != "":
matchColumn = "VmUuid"
matchValue = vmUUID
case name != "":
matchColumn = "Name"
matchValue = name
default:
return 0, nil
}
query := fmt.Sprintf(`UPDATE %s SET "DeletionTime" = ? WHERE "Vcenter" = ? AND "%s" = ? AND ("DeletionTime" IS NULL OR "DeletionTime" = 0 OR "DeletionTime" > ?)`, table, matchColumn)
query = sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), query)
result, err := dbConn.ExecContext(ctx, query, deletionUnix, vcenter, matchValue, deletionUnix)
if err != nil {
return 0, err
}
rowsAffected, err := result.RowsAffected()
if err != nil {
return 0, err
}
return rowsAffected, nil
}
// markMissingFromPrevious marks VMs that were present in the previous snapshot but missing now. // markMissingFromPrevious marks VMs that were present in the previous snapshot but missing now.
func (c *CronTask) markMissingFromPrevious(ctx context.Context, dbConn *sqlx.DB, prevTable string, vcenter string, snapshotTime time.Time, func (c *CronTask) markMissingFromPrevious(ctx context.Context, dbConn *sqlx.DB, prevTable string, vcenter string, snapshotTime time.Time,
currentByID map[string]InventorySnapshotRow, currentByUuid map[string]struct{}, currentByName map[string]struct{}, currentByID map[string]InventorySnapshotRow, currentByUuid map[string]struct{}, currentByName map[string]struct{},
invByID map[string]queries.Inventory, invByUuid map[string]queries.Inventory, invByName map[string]queries.Inventory) int { invByID map[string]queries.Inventory, invByUuid map[string]queries.Inventory, invByName map[string]queries.Inventory) (int, bool) {
if err := db.ValidateTableName(prevTable); err != nil { if err := db.ValidateTableName(prevTable); err != nil {
return 0 return 0, false
} }
type prevRow struct { type prevRow struct {
@@ -200,11 +233,12 @@ func (c *CronTask) markMissingFromPrevious(ctx context.Context, dbConn *sqlx.DB,
rows, err := querySnapshotRows(ctx, dbConn, prevTable, []string{"VmId", "VmUuid", "Name", "Cluster", "Datacenter", "DeletionTime"}, `"Vcenter" = ?`, vcenter) rows, err := querySnapshotRows(ctx, dbConn, prevTable, []string{"VmId", "VmUuid", "Name", "Cluster", "Datacenter", "DeletionTime"}, `"Vcenter" = ?`, vcenter)
if err != nil { if err != nil {
c.Logger.Warn("failed to read previous snapshot for deletion detection", "error", err, "table", prevTable, "vcenter", vcenter) c.Logger.Warn("failed to read previous snapshot for deletion detection", "error", err, "table", prevTable, "vcenter", vcenter)
return 0 return 0, false
} }
defer rows.Close() defer rows.Close()
missing := 0 missing := 0
tableUpdated := false
for rows.Next() { for rows.Next() {
var r prevRow var r prevRow
if err := rows.StructScan(&r); err != nil { if err := rows.StructScan(&r); err != nil {
@@ -255,18 +289,16 @@ func (c *CronTask) markMissingFromPrevious(ctx context.Context, dbConn *sqlx.DB,
if !ok { if !ok {
continue continue
} }
if inv.DeletionTime.Valid { delTime := inv.DeletionTime
continue if !delTime.Valid {
} delTime = sql.NullInt64{Int64: snapshotTime.Unix(), Valid: true}
if err := c.Database.Queries().InventoryMarkDeleted(ctx, queries.InventoryMarkDeletedParams{
delTime := sql.NullInt64{Int64: snapshotTime.Unix(), Valid: true} DeletionTime: delTime,
if err := c.Database.Queries().InventoryMarkDeleted(ctx, queries.InventoryMarkDeletedParams{ VmId: inv.VmId,
DeletionTime: delTime, DatacenterName: inv.Datacenter,
VmId: inv.VmId, }); err != nil {
DatacenterName: inv.Datacenter, c.Logger.Warn("failed to mark inventory record deleted from previous snapshot", "error", err, "vm_id", inv.VmId.String)
}); err != nil { }
c.Logger.Warn("failed to mark inventory record deleted from previous snapshot", "error", err, "vm_id", inv.VmId.String)
continue
} }
// Also update lifecycle cache so deletion time is available for rollups. // Also update lifecycle cache so deletion time is available for rollups.
vmUUID := "" vmUUID := ""
@@ -276,11 +308,17 @@ func (c *CronTask) markMissingFromPrevious(ctx context.Context, dbConn *sqlx.DB,
if err := db.MarkVmDeletedWithDetails(ctx, dbConn, vcenter, inv.VmId.String, vmUUID, inv.Name, inv.Cluster.String, delTime.Int64); err != nil { if err := db.MarkVmDeletedWithDetails(ctx, dbConn, vcenter, inv.VmId.String, vmUUID, inv.Name, inv.Cluster.String, delTime.Int64); err != nil {
c.Logger.Warn("failed to mark lifecycle cache deleted from previous snapshot", "error", err, "vm_id", inv.VmId.String, "vm_uuid", vmUUID, "vcenter", vcenter) c.Logger.Warn("failed to mark lifecycle cache deleted from previous snapshot", "error", err, "vm_id", inv.VmId.String, "vm_uuid", vmUUID, "vcenter", vcenter)
} }
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, prevTable, vcenter, inv.VmId.String, vmUUID, inv.Name, delTime.Int64); err != nil {
c.Logger.Warn("failed to update hourly snapshot deletion time", "error", err, "table", prevTable, "vm_id", inv.VmId.String, "vm_uuid", vmUUID, "vcenter", vcenter)
} else if rowsAffected > 0 {
tableUpdated = true
c.Logger.Debug("updated hourly snapshot deletion time", "table", prevTable, "vm_id", inv.VmId.String, "vm_uuid", vmUUID, "vcenter", vcenter, "deletion_time", delTime.Int64)
}
c.Logger.Debug("Detected VM missing compared to previous snapshot", "name", inv.Name, "vm_id", inv.VmId.String, "vm_uuid", inv.VmUuid.String, "vcenter", vcenter, "snapshot_time", snapshotTime, "prev_table", prevTable) c.Logger.Debug("Detected VM missing compared to previous snapshot", "name", inv.Name, "vm_id", inv.VmId.String, "vm_uuid", inv.VmUuid.String, "vcenter", vcenter, "snapshot_time", snapshotTime, "prev_table", prevTable)
missing++ missing++
} }
return missing return missing, tableUpdated
} }
// countNewFromPrevious returns how many VMs are present in the current snapshot but not in the previous snapshot. // countNewFromPrevious returns how many VMs are present in the current snapshot but not in the previous snapshot.
@@ -409,9 +447,9 @@ func listNewFromPrevious(ctx context.Context, dbConn *sqlx.DB, prevTable string,
// findVMInHourlySnapshots searches recent hourly snapshot tables for a VM by ID for the given vCenter. // findVMInHourlySnapshots searches recent hourly snapshot tables for a VM by ID for the given vCenter.
// extraTables are searched first (e.g., known previous snapshot tables). // extraTables are searched first (e.g., known previous snapshot tables).
func findVMInHourlySnapshots(ctx context.Context, dbConn *sqlx.DB, vcenter string, vmID string, extraTables ...string) (InventorySnapshotRow, bool) { func findVMInHourlySnapshots(ctx context.Context, dbConn *sqlx.DB, vcenter string, vmID string, extraTables ...string) (InventorySnapshotRow, string, bool) {
if vmID == "" { if vmID == "" {
return InventorySnapshotRow{}, false return InventorySnapshotRow{}, "", false
} }
// Use a short timeout to avoid hanging if the DB is busy. // Use a short timeout to avoid hanging if the DB is busy.
ctx, cancel := context.WithTimeout(ctx, 5*time.Second) ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
@@ -429,7 +467,7 @@ func findVMInHourlySnapshots(ctx context.Context, dbConn *sqlx.DB, vcenter strin
query = sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), query) query = sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), query)
var row InventorySnapshotRow var row InventorySnapshotRow
if err := dbConn.QueryRowxContext(ctx, query, vcenter, vmID).Scan(&row.VmId, &row.VmUuid, &row.Name, &row.Datacenter, &row.Cluster); err == nil { if err := dbConn.QueryRowxContext(ctx, query, vcenter, vmID).Scan(&row.VmId, &row.VmUuid, &row.Name, &row.Datacenter, &row.Cluster); err == nil {
return row, true return row, table, true
} }
} }
@@ -442,7 +480,7 @@ ORDER BY snapshot_time DESC
LIMIT 20 LIMIT 20
`) `)
if err != nil { if err != nil {
return InventorySnapshotRow{}, false return InventorySnapshotRow{}, "", false
} }
defer rows.Close() defer rows.Close()
@@ -459,12 +497,12 @@ LIMIT 20
query = sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), query) query = sqlx.Rebind(sqlx.BindType(dbConn.DriverName()), query)
var row InventorySnapshotRow var row InventorySnapshotRow
if err := dbConn.QueryRowxContext(ctx, query, vcenter, vmID).Scan(&row.VmId, &row.VmUuid, &row.Name, &row.Datacenter, &row.Cluster); err == nil { if err := dbConn.QueryRowxContext(ctx, query, vcenter, vmID).Scan(&row.VmId, &row.VmUuid, &row.Name, &row.Datacenter, &row.Cluster); err == nil {
return row, true return row, table, true
} }
checked++ checked++
if checked >= 10 { // limit work if checked >= 10 { // limit work
break break
} }
} }
return InventorySnapshotRow{}, false return InventorySnapshotRow{}, "", false
} }

View File

@@ -28,21 +28,22 @@ func presenceKeys(vmID, vmUUID, name string) []string {
// backfillLifecycleDeletionsToday looks for VMs in the lifecycle cache that are not in the current inventory, // backfillLifecycleDeletionsToday looks for VMs in the lifecycle cache that are not in the current inventory,
// have no DeletedAt, and determines their deletion time from today's hourly snapshots, optionally checking the next snapshot (next day) to confirm. // have no DeletedAt, and determines their deletion time from today's hourly snapshots, optionally checking the next snapshot (next day) to confirm.
func backfillLifecycleDeletionsToday(ctx context.Context, logger *slog.Logger, dbConn *sqlx.DB, vcenter string, snapshotTime time.Time, present map[string]InventorySnapshotRow) error { // It returns any hourly snapshot tables that were updated with deletion times.
func backfillLifecycleDeletionsToday(ctx context.Context, logger *slog.Logger, dbConn *sqlx.DB, vcenter string, snapshotTime time.Time, present map[string]InventorySnapshotRow) ([]string, error) {
dayStart := truncateDate(snapshotTime) dayStart := truncateDate(snapshotTime)
dayEnd := dayStart.Add(24 * time.Hour) dayEnd := dayStart.Add(24 * time.Hour)
candidates, err := loadLifecycleCandidates(ctx, dbConn, vcenter, present) candidates, err := loadLifecycleCandidates(ctx, dbConn, vcenter, present)
if err != nil || len(candidates) == 0 { if err != nil || len(candidates) == 0 {
return err return nil, err
} }
tables, err := listHourlyTablesForDay(ctx, dbConn, dayStart, dayEnd) tables, err := listHourlyTablesForDay(ctx, dbConn, dayStart, dayEnd)
if err != nil { if err != nil {
return err return nil, err
} }
if len(tables) == 0 { if len(tables) == 0 {
return nil return nil, nil
} }
nextPresence := make(map[string]struct{}) nextPresence := make(map[string]struct{})
@@ -50,10 +51,12 @@ func backfillLifecycleDeletionsToday(ctx context.Context, logger *slog.Logger, d
nextPresence = loadPresenceKeys(ctx, dbConn, nextTable, vcenter) nextPresence = loadPresenceKeys(ctx, dbConn, nextTable, vcenter)
} }
for _, cand := range candidates { updatedTables := make(map[string]struct{})
deletion, firstMiss := findDeletionInTables(ctx, dbConn, tables, vcenter, cand) for i := range candidates {
cand := &candidates[i]
deletion, firstMiss, lastSeenTable := findDeletionInTables(ctx, dbConn, tables, vcenter, cand)
if deletion == 0 && len(nextPresence) > 0 && firstMiss > 0 { if deletion == 0 && len(nextPresence) > 0 && firstMiss > 0 {
if !isPresent(nextPresence, cand) { if !isPresent(nextPresence, *cand) {
// Single miss at end of day, confirmed by next-day absence. // Single miss at end of day, confirmed by next-day absence.
deletion = firstMiss deletion = firstMiss
logger.Debug("cross-day deletion inferred from next snapshot", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "deletion", deletion) logger.Debug("cross-day deletion inferred from next snapshot", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "deletion", deletion)
@@ -64,10 +67,25 @@ func backfillLifecycleDeletionsToday(ctx context.Context, logger *slog.Logger, d
logger.Warn("lifecycle backfill mark deleted failed", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "deletion", deletion, "error", err) logger.Warn("lifecycle backfill mark deleted failed", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "deletion", deletion, "error", err)
continue continue
} }
if lastSeenTable != "" {
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, lastSeenTable, vcenter, cand.vmID, cand.vmUUID, cand.name, deletion); err != nil {
logger.Warn("lifecycle backfill failed to update hourly snapshot deletion time", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "table", lastSeenTable, "deletion", deletion, "error", err)
} else if rowsAffected > 0 {
updatedTables[lastSeenTable] = struct{}{}
logger.Debug("lifecycle backfill updated hourly snapshot deletion time", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "table", lastSeenTable, "deletion", deletion)
}
}
logger.Debug("lifecycle backfill applied", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "deletion", deletion) logger.Debug("lifecycle backfill applied", "vcenter", vcenter, "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "cluster", cand.cluster, "deletion", deletion)
} }
} }
return nil if len(updatedTables) == 0 {
return nil, nil
}
tablesUpdated := make([]string, 0, len(updatedTables))
for table := range updatedTables {
tablesUpdated = append(tablesUpdated, table)
}
return tablesUpdated, nil
} }
type lifecycleCandidate struct { type lifecycleCandidate struct {
@@ -212,41 +230,47 @@ func isPresent(presence map[string]struct{}, cand lifecycleCandidate) bool {
} }
// findDeletionInTables walks ordered hourly tables for a vCenter and returns the first confirmed deletion time // findDeletionInTables walks ordered hourly tables for a vCenter and returns the first confirmed deletion time
// (requiring two consecutive misses) plus the time of the first miss for cross-day handling. // (requiring two consecutive misses), the time of the first miss for cross-day handling, and the last table where
func findDeletionInTables(ctx context.Context, dbConn *sqlx.DB, tables []snapshotTable, vcenter string, cand lifecycleCandidate) (int64, int64) { // the VM was seen so we can backfill deletion time into that snapshot.
func findDeletionInTables(ctx context.Context, dbConn *sqlx.DB, tables []snapshotTable, vcenter string, cand *lifecycleCandidate) (int64, int64, string) {
var lastSeen int64 var lastSeen int64
var lastSeenTable string
var firstMiss int64 var firstMiss int64
for i, tbl := range tables { for i, tbl := range tables {
rows, err := querySnapshotRows(ctx, dbConn, tbl.Table, []string{"VmId", "VmUuid", "Name", "Cluster"}, `"Vcenter" = ? AND "VmId" = ?`, vcenter, cand.vmID) rows, err := querySnapshotRows(ctx, dbConn, tbl.Table, []string{"VmId", "VmUuid", "Name", "Cluster"}, `"Vcenter" = ? AND "VmId" = ?`, vcenter, cand.vmID)
if err == nil { if err != nil {
if rows.Next() { continue
var vmId, vmUuid, name, cluster sql.NullString }
if scanErr := rows.Scan(&vmId, &vmUuid, &name, &cluster); scanErr == nil { seen := false
lastSeen = tbl.Time if rows.Next() {
if cand.name == "" && name.Valid { var vmId, vmUuid, name, cluster sql.NullString
cand.name = name.String if scanErr := rows.Scan(&vmId, &vmUuid, &name, &cluster); scanErr == nil {
} seen = true
if cand.cluster == "" && cluster.Valid { lastSeen = tbl.Time
cand.cluster = cluster.String lastSeenTable = tbl.Table
} if cand.vmUUID == "" && vmUuid.Valid {
cand.vmUUID = vmUuid.String
}
if cand.name == "" && name.Valid {
cand.name = name.String
}
if cand.cluster == "" && cluster.Valid {
cand.cluster = cluster.String
} }
} }
rows.Close()
} }
if lastSeen > 0 && tbl.Time > lastSeen { rows.Close()
// first table after last seen -> first miss
if seen, _ := candSeenInTable(ctx, dbConn, tbl.Table, vcenter, cand.vmID); !seen { if lastSeen > 0 && !seen && firstMiss == 0 {
firstMiss = tbl.Time firstMiss = tbl.Time
// need two consecutive misses if i+1 < len(tables) {
if i+1 < len(tables) { if seen2, _ := candSeenInTable(ctx, dbConn, tables[i+1].Table, vcenter, cand.vmID); !seen2 {
if seen2, _ := candSeenInTable(ctx, dbConn, tables[i+1].Table, vcenter, cand.vmID); !seen2 { return firstMiss, firstMiss, lastSeenTable
return firstMiss, firstMiss
}
} }
} }
} }
} }
return 0, firstMiss return 0, firstMiss, lastSeenTable
} }
func candSeenInTable(ctx context.Context, dbConn *sqlx.DB, table, vcenter, vmID string) (bool, error) { func candSeenInTable(ctx context.Context, dbConn *sqlx.DB, table, vcenter, vmID string) (bool, error) {

View File

@@ -1057,6 +1057,7 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
missingCount, deletionsMarked, candidates := prepareDeletionCandidates(ctx, log, dbConn, url, inventoryRows, presentSnapshots, presentByUuid, presentByName, startTime) missingCount, deletionsMarked, candidates := prepareDeletionCandidates(ctx, log, dbConn, url, inventoryRows, presentSnapshots, presentByUuid, presentByName, startTime)
newCount := 0 newCount := 0
prevTableName := "" prevTableName := ""
reportTables := make(map[string]struct{})
// If deletions detected, refine deletion time using vCenter events in a small window. // If deletions detected, refine deletion time using vCenter events in a small window.
if missingCount > 0 { if missingCount > 0 {
@@ -1084,6 +1085,23 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, cand.vmID, cand.vmUUID, cand.name, cand.cluster, t.Unix()); err != nil { if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, cand.vmID, cand.vmUUID, cand.name, cand.cluster, t.Unix()); err != nil {
log.Warn("failed to refine lifecycle cache deletion time", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "vcenter", url, "error", err) log.Warn("failed to refine lifecycle cache deletion time", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "vcenter", url, "error", err)
} }
if snapRow, snapTable, found := findVMInHourlySnapshots(ctx, dbConn, url, cand.vmID); found {
vmUUID := cand.vmUUID
if vmUUID == "" && snapRow.VmUuid.Valid {
vmUUID = snapRow.VmUuid.String
}
name := cand.name
if name == "" {
name = snapRow.Name
}
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, snapTable, url, cand.vmID, vmUUID, name, delTs.Int64); err != nil {
log.Warn("failed to update hourly snapshot deletion time from event", "table", snapTable, "vm_id", cand.vmID, "vm_uuid", vmUUID, "vcenter", url, "error", err)
} else if rowsAffected > 0 {
reportTables[snapTable] = struct{}{}
deletionsMarked = true
log.Debug("updated hourly snapshot deletion time from event", "table", snapTable, "vm_id", cand.vmID, "vm_uuid", vmUUID, "vcenter", url, "event_time", t)
}
}
log.Info("refined deletion time from vcenter event", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "vcenter", url, "event_time", t) log.Info("refined deletion time from vcenter event", "vm_id", cand.vmID, "vm_uuid", cand.vmUUID, "name", cand.name, "vcenter", url, "event_time", t)
} }
} }
@@ -1124,17 +1142,26 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
} }
// Discover previous snapshots once per run (serial) to avoid concurrent probes across vCenters. // Discover previous snapshots once per run (serial) to avoid concurrent probes across vCenters.
prevTableName, newCount, missingCount = c.compareWithPreviousSnapshot(ctx, dbConn, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName, missingCount) var prevTableTouched bool
prevTableName, newCount, missingCount, prevTableTouched = c.compareWithPreviousSnapshot(ctx, dbConn, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName, missingCount)
if prevTableTouched && prevTableName != "" {
reportTables[prevTableName] = struct{}{}
deletionsMarked = true
}
// If VM count dropped versus totals and we still haven't marked missing, try another comparison + wider event window. // If VM count dropped versus totals and we still haven't marked missing, try another comparison + wider event window.
if missingCount == 0 && prevVmCount.Valid && prevVmCount.Int64 > int64(totals.VmCount) { if missingCount == 0 && prevVmCount.Valid && prevVmCount.Int64 > int64(totals.VmCount) {
// Fallback: locate a previous table only if we didn't already find one. // Fallback: locate a previous table only if we didn't already find one.
if prevTableName == "" { if prevTableName == "" {
if prevTable, err := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger)); err == nil && prevTable != "" { if prevTable, err := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger)); err == nil && prevTable != "" {
moreMissing := c.markMissingFromPrevious(ctx, dbConn, prevTable, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName) moreMissing, tableUpdated := c.markMissingFromPrevious(ctx, dbConn, prevTable, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
if moreMissing > 0 { if moreMissing > 0 {
missingCount += moreMissing missingCount += moreMissing
} }
if tableUpdated {
reportTables[prevTable] = struct{}{}
deletionsMarked = true
}
// Reuse this table name for later snapshot lookups when correlating deletion events. // Reuse this table name for later snapshot lookups when correlating deletion events.
prevTableName = prevTable prevTableName = prevTable
} }
@@ -1157,9 +1184,10 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
} }
inv, ok := inventoryByVmID[vmID] inv, ok := inventoryByVmID[vmID]
var snapRow InventorySnapshotRow var snapRow InventorySnapshotRow
var snapTable string
if !ok { if !ok {
var found bool var found bool
snapRow, found = findVMInHourlySnapshots(ctx, dbConn, url, vmID, prevTableName) snapRow, snapTable, found = findVMInHourlySnapshots(ctx, dbConn, url, vmID, prevTableName)
if !found { if !found {
c.Logger.Debug("count-drop: deletion event has no snapshot match", "vm_id", vmID, "vcenter", url, "event_time", t) c.Logger.Debug("count-drop: deletion event has no snapshot match", "vm_id", vmID, "vcenter", url, "event_time", t)
continue continue
@@ -1170,7 +1198,7 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
Name: snapRow.Name, Name: snapRow.Name,
Datacenter: snapRow.Datacenter, Datacenter: snapRow.Datacenter,
} }
c.Logger.Info("count-drop: correlated deletion via snapshot lookup", "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "name", inv.Name, "vcenter", url, "event_time", t, "snapshot_table", prevTableName) c.Logger.Info("count-drop: correlated deletion via snapshot lookup", "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "name", inv.Name, "vcenter", url, "event_time", t, "snapshot_table", snapTable)
} }
// Prefer UUID from snapshot if inventory entry lacks it. // Prefer UUID from snapshot if inventory entry lacks it.
if !inv.VmUuid.Valid && snapRow.VmUuid.Valid { if !inv.VmUuid.Valid && snapRow.VmUuid.Valid {
@@ -1193,6 +1221,19 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, vmID, inv.VmUuid.String, inv.Name, clusterName, t.Unix()); err != nil { if err := db.MarkVmDeletedWithDetails(ctx, dbConn, url, vmID, inv.VmUuid.String, inv.Name, clusterName, t.Unix()); err != nil {
c.Logger.Warn("count-drop: failed to refine lifecycle cache deletion time", "vm_id", vmID, "vm_uuid", inv.VmUuid, "vcenter", url, "error", err) c.Logger.Warn("count-drop: failed to refine lifecycle cache deletion time", "vm_id", vmID, "vm_uuid", inv.VmUuid, "vcenter", url, "error", err)
} }
tableToUpdate := snapTable
if tableToUpdate == "" {
tableToUpdate = prevTableName
}
if tableToUpdate != "" {
if rowsAffected, err := updateDeletionTimeInSnapshot(ctx, dbConn, tableToUpdate, url, vmID, inv.VmUuid.String, inv.Name, delTs.Int64); err != nil {
c.Logger.Warn("count-drop: failed to update hourly snapshot deletion time from event", "table", tableToUpdate, "vm_id", vmID, "vcenter", url, "error", err)
} else if rowsAffected > 0 {
reportTables[tableToUpdate] = struct{}{}
deletionsMarked = true
c.Logger.Debug("count-drop: updated hourly snapshot deletion time from event", "table", tableToUpdate, "vm_id", vmID, "vm_uuid", inv.VmUuid.String, "vcenter", url, "event_time", t)
}
}
missingCount++ missingCount++
deletionsMarked = true deletionsMarked = true
} }
@@ -1200,8 +1241,13 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
} }
// Backfill lifecycle deletions for VMs missing from inventory and without DeletedAt. // Backfill lifecycle deletions for VMs missing from inventory and without DeletedAt.
if err := backfillLifecycleDeletionsToday(ctx, log, dbConn, url, startTime, presentSnapshots); err != nil { if backfillTables, err := backfillLifecycleDeletionsToday(ctx, log, dbConn, url, startTime, presentSnapshots); err != nil {
log.Warn("failed to backfill lifecycle deletions for today", "vcenter", url, "error", err) log.Warn("failed to backfill lifecycle deletions for today", "vcenter", url, "error", err)
} else if len(backfillTables) > 0 {
for _, table := range backfillTables {
reportTables[table] = struct{}{}
}
deletionsMarked = true
} }
log.Info("Hourly snapshot summary", log.Info("Hourly snapshot summary",
@@ -1219,10 +1265,15 @@ func (c *CronTask) captureHourlySnapshotForVcenter(ctx context.Context, startTim
log.Warn("failed to record snapshot run", "url", url, "error", upErr) log.Warn("failed to record snapshot run", "url", url, "error", upErr)
} }
if deletionsMarked { if deletionsMarked {
if err := c.generateReport(ctx, tableName); err != nil { if len(reportTables) == 0 {
log.Warn("failed to regenerate hourly report after deletions", "error", err, "table", tableName) reportTables[tableName] = struct{}{}
} else { }
log.Debug("Regenerated hourly report after deletions", "table", tableName) for reportTable := range reportTables {
if err := c.generateReport(ctx, reportTable); err != nil {
log.Warn("failed to regenerate hourly report after deletions", "error", err, "table", reportTable)
} else {
log.Debug("Regenerated hourly report after deletions", "table", reportTable)
}
} }
} }
return nil return nil
@@ -1242,7 +1293,7 @@ func (c *CronTask) compareWithPreviousSnapshot(
inventoryByUuid map[string]queries.Inventory, inventoryByUuid map[string]queries.Inventory,
inventoryByName map[string]queries.Inventory, inventoryByName map[string]queries.Inventory,
missingCount int, missingCount int,
) (string, int, int) { ) (string, int, int, bool) {
prevTableName, prevTableErr := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger)) prevTableName, prevTableErr := latestHourlySnapshotBefore(ctx, dbConn, startTime, loggerFromCtx(ctx, c.Logger))
if prevTableErr != nil { if prevTableErr != nil {
c.Logger.Warn("failed to locate previous hourly snapshot for deletion comparison", "error", prevTableErr, "url", url) c.Logger.Warn("failed to locate previous hourly snapshot for deletion comparison", "error", prevTableErr, "url", url)
@@ -1251,9 +1302,13 @@ func (c *CronTask) compareWithPreviousSnapshot(
prevSnapshotTime, _ := parseSnapshotTime(prevTableName) prevSnapshotTime, _ := parseSnapshotTime(prevTableName)
newCount := 0 newCount := 0
prevTableTouched := false
if prevTableName != "" { if prevTableName != "" {
moreMissing := c.markMissingFromPrevious(ctx, dbConn, prevTableName, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName) moreMissing, tableUpdated := c.markMissingFromPrevious(ctx, dbConn, prevTableName, url, startTime, presentSnapshots, presentByUuid, presentByName, inventoryByVmID, inventoryByUuid, inventoryByName)
missingCount += moreMissing missingCount += moreMissing
if tableUpdated {
prevTableTouched = true
}
expectedSeconds := int64(c.Settings.Values.Settings.VcenterInventorySnapshotSeconds) / 2 expectedSeconds := int64(c.Settings.Values.Settings.VcenterInventorySnapshotSeconds) / 2
// Skip only if snapshots are closer together than half the configured cadence // Skip only if snapshots are closer together than half the configured cadence
if SnapshotTooSoon(prevSnapshotTime, startTime.Unix(), expectedSeconds) { if SnapshotTooSoon(prevSnapshotTime, startTime.Unix(), expectedSeconds) {
@@ -1278,5 +1333,5 @@ func (c *CronTask) compareWithPreviousSnapshot(
newCount = len(presentSnapshots) newCount = len(presentSnapshots)
} }
return prevTableName, newCount, missingCount return prevTableName, newCount, missingCount, prevTableTouched
} }