This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -53,6 +54,12 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
||||
monthEnd := monthStart.AddDate(0, 1, 0)
|
||||
dbConn := c.Database.DB()
|
||||
db.SetPostgresWorkMem(ctx, dbConn, c.Settings.Values.Settings.PostgresWorkMemMB)
|
||||
driver := strings.ToLower(dbConn.DriverName())
|
||||
useGoAgg := os.Getenv("MONTHLY_AGG_GO") == "1"
|
||||
if !useGoAgg && granularity == "hourly" && driver == "sqlite" {
|
||||
c.Logger.Warn("SQL monthly aggregation is slow on sqlite; overriding to Go path", "granularity", granularity)
|
||||
useGoAgg = true
|
||||
}
|
||||
|
||||
var snapshots []report.SnapshotRecord
|
||||
var unionColumns []string
|
||||
@@ -99,17 +106,28 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
||||
}
|
||||
|
||||
// Optional Go-based aggregation path.
|
||||
if os.Getenv("MONTHLY_AGG_GO") == "1" && granularity == "daily" {
|
||||
c.Logger.Debug("Using go implementation of monthly aggregation")
|
||||
if err := c.aggregateMonthlySummaryGo(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
|
||||
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
|
||||
if useGoAgg {
|
||||
if granularity == "daily" {
|
||||
c.Logger.Debug("Using go implementation of monthly aggregation (daily)")
|
||||
if err := c.aggregateMonthlySummaryGo(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
|
||||
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
|
||||
} else {
|
||||
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
|
||||
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
|
||||
return nil
|
||||
}
|
||||
} else if granularity == "hourly" {
|
||||
c.Logger.Debug("Using go implementation of monthly aggregation (hourly)")
|
||||
if err := c.aggregateMonthlySummaryGoHourly(ctx, monthStart, monthEnd, monthlyTable, snapshots); err != nil {
|
||||
c.Logger.Warn("go-based monthly aggregation failed, falling back to SQL path", "error", err)
|
||||
} else {
|
||||
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
|
||||
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
metrics.RecordMonthlyAggregation(time.Since(jobStart), nil)
|
||||
c.Logger.Debug("Finished monthly inventory aggregation (Go path)", "summary_table", monthlyTable)
|
||||
return nil
|
||||
c.Logger.Warn("MONTHLY_AGG_GO is set but granularity is unsupported; using SQL path", "granularity", granularity)
|
||||
}
|
||||
} else if os.Getenv("MONTHLY_AGG_GO") == "1" && granularity != "daily" {
|
||||
c.Logger.Warn("MONTHLY_AGG_GO is set but only daily granularity supports Go aggregation; using SQL path", "granularity", granularity)
|
||||
}
|
||||
|
||||
tables := make([]string, 0, len(snapshots))
|
||||
@@ -148,11 +166,6 @@ func (c *CronTask) aggregateMonthlySummary(ctx context.Context, targetMonth time
|
||||
c.Logger.Error("failed to aggregate monthly inventory", "error", err, "month", targetMonth.Format("2006-01"))
|
||||
return err
|
||||
}
|
||||
if applied, err := db.ApplyLifecycleCreationToSummary(ctx, dbConn, monthlyTable); err != nil {
|
||||
c.Logger.Warn("failed to apply lifecycle creation times to monthly summary", "error", err, "table", monthlyTable)
|
||||
} else {
|
||||
c.Logger.Info("Monthly aggregation creation times", "source_lifecycle_cache", applied)
|
||||
}
|
||||
if applied, err := db.ApplyLifecycleDeletionToSummary(ctx, dbConn, monthlyTable, monthStart.Unix(), monthEnd.Unix()); err != nil {
|
||||
c.Logger.Warn("failed to apply lifecycle deletions to monthly summary", "error", err, "table", monthlyTable)
|
||||
} else {
|
||||
@@ -183,6 +196,124 @@ func monthlySummaryTableName(t time.Time) (string, error) {
|
||||
return db.SafeTableName(fmt.Sprintf("inventory_monthly_summary_%s", t.Format("200601")))
|
||||
}
|
||||
|
||||
// aggregateMonthlySummaryGoHourly aggregates hourly snapshots directly into the monthly summary table.
|
||||
func (c *CronTask) aggregateMonthlySummaryGoHourly(ctx context.Context, monthStart, monthEnd time.Time, summaryTable string, hourlySnapshots []report.SnapshotRecord) error {
|
||||
jobStart := time.Now()
|
||||
dbConn := c.Database.DB()
|
||||
|
||||
if err := clearTable(ctx, dbConn, summaryTable); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(hourlySnapshots) == 0 {
|
||||
return fmt.Errorf("no hourly snapshot tables found for %s", monthStart.Format("2006-01"))
|
||||
}
|
||||
|
||||
totalSamples := len(hourlySnapshots)
|
||||
var (
|
||||
aggMap map[dailyAggKey]*dailyAggVal
|
||||
snapTimes []int64
|
||||
)
|
||||
|
||||
if db.TableExists(ctx, dbConn, "vm_hourly_stats") {
|
||||
cacheAgg, cacheTimes, cacheErr := c.scanHourlyCache(ctx, monthStart, monthEnd)
|
||||
if cacheErr != nil {
|
||||
c.Logger.Warn("failed to use hourly cache, falling back to table scans", "error", cacheErr)
|
||||
} else if len(cacheAgg) > 0 {
|
||||
c.Logger.Debug("using hourly cache for monthly aggregation", "month", monthStart.Format("2006-01"), "snapshots", len(cacheTimes), "vm_count", len(cacheAgg))
|
||||
aggMap = cacheAgg
|
||||
snapTimes = cacheTimes
|
||||
totalSamples = len(cacheTimes)
|
||||
}
|
||||
}
|
||||
|
||||
if aggMap == nil {
|
||||
var errScan error
|
||||
aggMap, errScan = c.scanHourlyTablesParallel(ctx, hourlySnapshots)
|
||||
if errScan != nil {
|
||||
return errScan
|
||||
}
|
||||
c.Logger.Debug("scanned hourly tables for monthly aggregation", "month", monthStart.Format("2006-01"), "tables", len(hourlySnapshots), "vm_count", len(aggMap))
|
||||
if len(aggMap) == 0 {
|
||||
return fmt.Errorf("no VM records aggregated for %s", monthStart.Format("2006-01"))
|
||||
}
|
||||
|
||||
snapTimes = make([]int64, 0, len(hourlySnapshots))
|
||||
for _, snap := range hourlySnapshots {
|
||||
snapTimes = append(snapTimes, snap.SnapshotTime.Unix())
|
||||
}
|
||||
sort.Slice(snapTimes, func(i, j int) bool { return snapTimes[i] < snapTimes[j] })
|
||||
}
|
||||
|
||||
lifecycleDeletions := c.applyLifecycleDeletions(ctx, aggMap, monthStart, monthEnd)
|
||||
c.Logger.Info("Monthly aggregation deletion times", "source_lifecycle_cache", lifecycleDeletions)
|
||||
|
||||
inventoryDeletions := c.applyInventoryDeletions(ctx, aggMap, monthStart, monthEnd)
|
||||
c.Logger.Info("Monthly aggregation deletion times", "source_inventory", inventoryDeletions)
|
||||
|
||||
if len(snapTimes) > 0 {
|
||||
maxSnap := snapTimes[len(snapTimes)-1]
|
||||
inferredDeletions := 0
|
||||
for _, v := range aggMap {
|
||||
if v.deletion != 0 {
|
||||
continue
|
||||
}
|
||||
consecutiveMisses := 0
|
||||
firstMiss := int64(0)
|
||||
for _, t := range snapTimes {
|
||||
if t <= v.lastSeen {
|
||||
continue
|
||||
}
|
||||
if _, ok := v.seen[t]; ok {
|
||||
consecutiveMisses = 0
|
||||
firstMiss = 0
|
||||
continue
|
||||
}
|
||||
consecutiveMisses++
|
||||
if firstMiss == 0 {
|
||||
firstMiss = t
|
||||
}
|
||||
if consecutiveMisses >= 2 {
|
||||
v.deletion = firstMiss
|
||||
inferredDeletions++
|
||||
break
|
||||
}
|
||||
}
|
||||
if v.deletion == 0 && v.lastSeen < maxSnap && firstMiss > 0 {
|
||||
c.Logger.Debug("pending deletion inference (insufficient consecutive misses)", "vm_id", v.key.VmId, "vm_uuid", v.key.VmUuid, "name", v.key.Name, "last_seen", v.lastSeen, "first_missing_snapshot", firstMiss)
|
||||
}
|
||||
}
|
||||
c.Logger.Info("Monthly aggregation deletion times", "source_inferred", inferredDeletions)
|
||||
}
|
||||
|
||||
totalSamplesByVcenter := sampleCountsByVcenter(aggMap)
|
||||
if err := c.insertDailyAggregates(ctx, summaryTable, aggMap, totalSamples, totalSamplesByVcenter); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
db.AnalyzeTableIfPostgres(ctx, dbConn, summaryTable)
|
||||
rowCount, err := db.TableRowCount(ctx, dbConn, summaryTable)
|
||||
if err != nil {
|
||||
c.Logger.Warn("unable to count monthly summary rows (Go hourly)", "error", err, "table", summaryTable)
|
||||
}
|
||||
if err := report.RegisterSnapshot(ctx, c.Database, "monthly", summaryTable, monthStart, rowCount); err != nil {
|
||||
c.Logger.Warn("failed to register monthly snapshot (Go hourly)", "error", err, "table", summaryTable)
|
||||
}
|
||||
if err := c.generateReport(ctx, summaryTable); err != nil {
|
||||
c.Logger.Warn("failed to generate monthly report (Go hourly)", "error", err, "table", summaryTable)
|
||||
return err
|
||||
}
|
||||
|
||||
c.Logger.Debug("Finished monthly inventory aggregation (Go hourly)",
|
||||
"summary_table", summaryTable,
|
||||
"duration", time.Since(jobStart),
|
||||
"tables_scanned", len(hourlySnapshots),
|
||||
"rows_written", rowCount,
|
||||
"total_samples", totalSamples,
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// aggregateMonthlySummaryGo mirrors the SQL-based monthly aggregation but performs the work in Go,
|
||||
// reading daily summaries in parallel and reducing them to a single monthly summary table.
|
||||
func (c *CronTask) aggregateMonthlySummaryGo(ctx context.Context, monthStart, monthEnd time.Time, summaryTable string, dailySnapshots []report.SnapshotRecord) error {
|
||||
@@ -223,11 +354,6 @@ func (c *CronTask) aggregateMonthlySummaryGo(ctx context.Context, monthStart, mo
|
||||
return err
|
||||
}
|
||||
|
||||
if applied, err := db.ApplyLifecycleCreationToSummary(ctx, dbConn, summaryTable); err != nil {
|
||||
c.Logger.Warn("failed to apply lifecycle creation times to monthly summary (Go)", "error", err, "table", summaryTable)
|
||||
} else {
|
||||
c.Logger.Info("Monthly aggregation creation times", "source_lifecycle_cache", applied)
|
||||
}
|
||||
if applied, err := db.ApplyLifecycleDeletionToSummary(ctx, dbConn, summaryTable, monthStart.Unix(), monthEnd.Unix()); err != nil {
|
||||
c.Logger.Warn("failed to apply lifecycle deletions to monthly summary (Go)", "error", err, "table", summaryTable)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user