fix aggregation logic
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-01-23 09:38:08 +11:00
parent 8a3481b966
commit 3e2d95d3b9
16 changed files with 384 additions and 168 deletions

View File

@@ -7,6 +7,7 @@ import (
"log/slog"
"os"
"runtime"
"sort"
"strings"
"sync"
"time"
@@ -53,6 +54,12 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
monthEnd := monthStart.AddDate(0, 1, 0)
dbConn := c.Database.DB()
db.SetPostgresWorkMem(ctx, dbConn, c.Settings.Values.Settings.PostgresWorkMemMB)
driver := strings.ToLower(dbConn.DriverName())
useGoAgg := os.Getenv("MONTHLY_AGG_GO") == "1"
if !useGoAgg && granularity == "hourly" && driver == "sqlite" {
c.Logger.Warn("SQL monthly aggregation is slow on sqlite; overriding to Go path", "granularity", granularity)
useGoAgg = true
}
var snapshots []report.SnapshotRecord
var unionColumns []string
@@ -99,17 +106,28 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
}
// Optional Go-based aggregation path.
if os.Getenv("MONTHLY_AGG_GO") == "1" && granularity == "daily" {
c.Logger.Debug("Using go implementation of monthly aggregation")
if err := c.aggregateMonthlySummaryGo(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
if useGoAgg {
if granularity == "daily" {
c.Logger.Debug("Using go implementation of monthly aggregation (daily)")
if err := c.aggregateMonthlySummaryGo(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
} else {
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
return nil
}
} else if granularity == "hourly" {
c.Logger.Debug("Using go implementation of monthly aggregation (hourly)")
if err := c.aggregateMonthlySummaryGoHourly(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
} else {
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
return nil
}
} else {
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
return nil
c.Logger.Warn("MONTHLY_AGG_GO is set but granularity is unsupported; using SQL path", "granularity", granularity)
}
} else if os.Getenv("MONTHLY_AGG_GO") == "1" && granularity != "daily" {
c.Logger.Warn("MONTHLY_AGG_GO is set but only daily granularity supports Go aggregation; using SQL path", "granularity", granularity)
}
tables := make([]string, 0, len(snapshots))
@@ -148,11 +166,6 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
c.Logger.Error("failed to aggregate monthly inventory", "error", err, "month", targetMonth.Format("2006-01"))
return err
}
if applied, err := db.ApplyLifecycleCreationToSummary(ctx, dbConn, monthlyTable); err != nil {
c.Logger.Warn("failed to apply lifecycle creation times to monthly summary", "error", err, "table", monthlyTable)
} else {
c.Logger.Info("Monthly aggregation creation times", "source_lifecycle_cache", applied)
}
if applied, err := db.ApplyLifecycleDeletionToSummary(ctx, dbConn, monthlyTable, monthStart.Unix(), monthEnd.Unix()); err != nil {
c.Logger.Warn("failed to apply lifecycle deletions to monthly summary", "error", err, "table", monthlyTable)
} else {
@@ -183,6 +196,124 @@ func monthlySummaryTableName(t time.Time) (string, error) {
return db.SafeTableName(fmt.Sprintf("inventory_monthly_summary_%s", t.Format("200601")))
}
// aggregateMonthlySummaryGoHourly aggregates hourly snapshots directly into the monthly summary table.
func (c *CronTask) aggregateMonthlySummaryGoHourly(ctx context.Context, monthStart, monthEnd time.Time, summaryTable string, hourlySnapshots []report.SnapshotRecord) error {
jobStart := time.Now()
dbConn := c.Database.DB()
if err := clearTable(ctx, dbConn, summaryTable); err != nil {
return err
}
if len(hourlySnapshots) == 0 {
return fmt.Errorf("no hourly snapshot tables found for %s", monthStart.Format("2006-01"))
}
totalSamples := len(hourlySnapshots)
var (
aggMap map[dailyAggKey]*dailyAggVal
snapTimes []int64
)
if db.TableExists(ctx, dbConn, "vm_hourly_stats") {
cacheAgg, cacheTimes, cacheErr := c.scanHourlyCache(ctx, monthStart, monthEnd)
if cacheErr != nil {
c.Logger.Warn("failed to use hourly cache, falling back to table scans", "error", cacheErr)
} else if len(cacheAgg) > 0 {
c.Logger.Debug("using hourly cache for monthly aggregation", "month", monthStart.Format("2006-01"), "snapshots", len(cacheTimes), "vm_count", len(cacheAgg))
aggMap = cacheAgg
snapTimes = cacheTimes
totalSamples = len(cacheTimes)
}
}
if aggMap == nil {
var errScan error
aggMap, errScan = c.scanHourlyTablesParallel(ctx, hourlySnapshots)
if errScan != nil {
return errScan
}
c.Logger.Debug("scanned hourly tables for monthly aggregation", "month", monthStart.Format("2006-01"), "tables", len(hourlySnapshots), "vm_count", len(aggMap))
if len(aggMap) == 0 {
return fmt.Errorf("no VM records aggregated for %s", monthStart.Format("2006-01"))
}
snapTimes = make([]int64, 0, len(hourlySnapshots))
for _, snap := range hourlySnapshots {
snapTimes = append(snapTimes, snap.SnapshotTime.Unix())
}
sort.Slice(snapTimes, func(i, j int) bool { return snapTimes[i] < snapTimes[j] })
}
lifecycleDeletions := c.applyLifecycleDeletions(ctx, aggMap, monthStart, monthEnd)
c.Logger.Info("Monthly aggregation deletion times", "source_lifecycle_cache", lifecycleDeletions)
inventoryDeletions := c.applyInventoryDeletions(ctx, aggMap, monthStart, monthEnd)
c.Logger.Info("Monthly aggregation deletion times", "source_inventory", inventoryDeletions)
if len(snapTimes) > 0 {
maxSnap := snapTimes[len(snapTimes)-1]
inferredDeletions := 0
for _, v := range aggMap {
if v.deletion != 0 {
continue
}
consecutiveMisses := 0
firstMiss := int64(0)
for _, t := range snapTimes {
if t <= v.lastSeen {
continue
}
if _, ok := v.seen[t]; ok {
consecutiveMisses = 0
firstMiss = 0
continue
}
consecutiveMisses++
if firstMiss == 0 {
firstMiss = t
}
if consecutiveMisses >= 2 {
v.deletion = firstMiss
inferredDeletions++
break
}
}
if v.deletion == 0 && v.lastSeen < maxSnap && firstMiss > 0 {
c.Logger.Debug("pending deletion inference (insufficient consecutive misses)", "vm_id", v.key.VmId, "vm_uuid", v.key.VmUuid, "name", v.key.Name, "last_seen", v.lastSeen, "first_missing_snapshot", firstMiss)
}
}
c.Logger.Info("Monthly aggregation deletion times", "source_inferred", inferredDeletions)
}
totalSamplesByVcenter := sampleCountsByVcenter(aggMap)
if err := c.insertDailyAggregates(ctx, summaryTable, aggMap, totalSamples, totalSamplesByVcenter); err != nil {
return err
}
db.AnalyzeTableIfPostgres(ctx, dbConn, summaryTable)
rowCount, err := db.TableRowCount(ctx, dbConn, summaryTable)
if err != nil {
c.Logger.Warn("unable to count monthly summary rows (Go hourly)", "error", err, "table", summaryTable)
}
if err := report.RegisterSnapshot(ctx, c.Database, "monthly", summaryTable, monthStart, rowCount); err != nil {
c.Logger.Warn("failed to register monthly snapshot (Go hourly)", "error", err, "table", summaryTable)
}
if err := c.generateReport(ctx, summaryTable); err != nil {
c.Logger.Warn("failed to generate monthly report (Go hourly)", "error", err, "table", summaryTable)
return err
}
c.Logger.Debug("Finished monthly inventory aggregation (Go hourly)",
"summary_table", summaryTable,
"duration", time.Since(jobStart),
"tables_scanned", len(hourlySnapshots),
"rows_written", rowCount,
"total_samples", totalSamples,
)
return nil
}
// aggregateMonthlySummaryGo mirrors the SQL-based monthly aggregation but performs the work in Go,
// reading daily summaries in parallel and reducing them to a single monthly summary table.
func (c *CronTask) aggregateMonthlySummaryGo(ctx context.Context, monthStart, monthEnd time.Time, summaryTable string, dailySnapshots []report.SnapshotRecord) error {
@@ -223,11 +354,6 @@ func (c *CronTask) aggregateMonthlySummaryGo(ctx context.Context, monthStart, mo
return err
}
if applied, err := db.ApplyLifecycleCreationToSummary(ctx, dbConn, summaryTable); err != nil {
c.Logger.Warn("failed to apply lifecycle creation times to monthly summary (Go)", "error", err, "table", summaryTable)
} else {
c.Logger.Info("Monthly aggregation creation times", "source_lifecycle_cache", applied)
}
if applied, err := db.ApplyLifecycleDeletionToSummary(ctx, dbConn, summaryTable, monthStart.Unix(), monthEnd.Unix()); err != nil {
c.Logger.Warn("failed to apply lifecycle deletions to monthly summary (Go)", "error", err, "table", summaryTable)
} else {