progress on go based aggregation
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -63,6 +64,7 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti
|
||||
|
||||
// If enabled, use the Go fan-out/reduce path to parallelize aggregation.
|
||||
if os.Getenv("DAILY_AGG_GO") == "1" {
|
||||
c.Logger.Debug("Using go implementation of aggregation")
|
||||
if err := c.aggregateDailySummaryGo(ctx, dayStart, dayEnd, summaryTable, force); err != nil {
|
||||
c.Logger.Warn("go-based daily aggregation failed, falling back to SQL path", "error", err)
|
||||
} else {
|
||||
@@ -223,12 +225,54 @@ func (c *CronTask) aggregateDailySummaryGo(ctx context.Context, dayStart, dayEnd
|
||||
}
|
||||
|
||||
totalSamples := len(hourlyTables)
|
||||
aggMap, err := c.scanHourlyTablesParallel(ctx, hourlySnapshots, totalSamples)
|
||||
if err != nil {
|
||||
return err
|
||||
var (
|
||||
aggMap map[dailyAggKey]*dailyAggVal
|
||||
snapTimes []int64
|
||||
)
|
||||
|
||||
if db.TableExists(ctx, dbConn, "vm_hourly_stats") {
|
||||
cacheAgg, cacheTimes, cacheErr := c.scanHourlyCache(ctx, dayStart, dayEnd)
|
||||
if cacheErr != nil {
|
||||
c.Logger.Warn("failed to use hourly cache, falling back to table scans", "error", cacheErr)
|
||||
} else if len(cacheAgg) > 0 {
|
||||
aggMap = cacheAgg
|
||||
snapTimes = cacheTimes
|
||||
totalSamples = len(cacheTimes)
|
||||
}
|
||||
}
|
||||
if len(aggMap) == 0 {
|
||||
return fmt.Errorf("no VM records aggregated for %s", dayStart.Format("2006-01-02"))
|
||||
|
||||
if aggMap == nil {
|
||||
var errScan error
|
||||
aggMap, errScan = c.scanHourlyTablesParallel(ctx, hourlySnapshots, totalSamples)
|
||||
if errScan != nil {
|
||||
return errScan
|
||||
}
|
||||
if len(aggMap) == 0 {
|
||||
return fmt.Errorf("no VM records aggregated for %s", dayStart.Format("2006-01-02"))
|
||||
}
|
||||
|
||||
// Build ordered list of snapshot times for deletion inference.
|
||||
snapTimes = make([]int64, 0, len(hourlySnapshots))
|
||||
for _, snap := range hourlySnapshots {
|
||||
snapTimes = append(snapTimes, snap.SnapshotTime.Unix())
|
||||
}
|
||||
sort.Slice(snapTimes, func(i, j int) bool { return snapTimes[i] < snapTimes[j] })
|
||||
}
|
||||
|
||||
for _, v := range aggMap {
|
||||
if v.creation == 0 {
|
||||
v.creation = v.firstSeen
|
||||
}
|
||||
// Infer deletion as the first snapshot time after lastSeen where the VM is absent.
|
||||
for _, t := range snapTimes {
|
||||
if t <= v.lastSeen {
|
||||
continue
|
||||
}
|
||||
if _, ok := v.seen[t]; !ok {
|
||||
v.deletion = t
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert aggregated rows.
|
||||
@@ -277,6 +321,9 @@ type dailyAggVal struct {
|
||||
creation int64
|
||||
firstSeen int64
|
||||
lastSeen int64
|
||||
lastDisk float64
|
||||
lastVcpu int64
|
||||
lastRam int64
|
||||
sumVcpu int64
|
||||
sumRam int64
|
||||
sumDisk float64
|
||||
@@ -285,6 +332,8 @@ type dailyAggVal struct {
|
||||
bronzeHits int64
|
||||
silverHits int64
|
||||
goldHits int64
|
||||
seen map[int64]struct{}
|
||||
deletion int64
|
||||
}
|
||||
|
||||
func (c *CronTask) scanHourlyTablesParallel(ctx context.Context, snapshots []report.SnapshotRecord, totalSamples int) (map[dailyAggKey]*dailyAggVal, error) {
|
||||
@@ -346,6 +395,9 @@ func mergeDailyAgg(dst, src *dailyAggVal) {
|
||||
dst.isTemplate = src.isTemplate
|
||||
dst.poweredOn = src.poweredOn
|
||||
dst.srmPlaceholder = src.srmPlaceholder
|
||||
dst.lastDisk = src.lastDisk
|
||||
dst.lastVcpu = src.lastVcpu
|
||||
dst.lastRam = src.lastRam
|
||||
}
|
||||
dst.sumVcpu += src.sumVcpu
|
||||
dst.sumRam += src.sumRam
|
||||
@@ -355,6 +407,12 @@ func mergeDailyAgg(dst, src *dailyAggVal) {
|
||||
dst.bronzeHits += src.bronzeHits
|
||||
dst.silverHits += src.silverHits
|
||||
dst.goldHits += src.goldHits
|
||||
if dst.seen == nil {
|
||||
dst.seen = make(map[int64]struct{}, len(src.seen))
|
||||
}
|
||||
for t := range src.seen {
|
||||
dst.seen[t] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CronTask) scanHourlyTable(ctx context.Context, snap report.SnapshotRecord) (map[dailyAggKey]*dailyAggVal, error) {
|
||||
@@ -428,6 +486,9 @@ FROM %s
|
||||
creation: int64OrZero(creation),
|
||||
firstSeen: int64OrZero(snapshotTime),
|
||||
lastSeen: int64OrZero(snapshotTime),
|
||||
lastDisk: disk.Float64,
|
||||
lastVcpu: vcpu.Int64,
|
||||
lastRam: ram.Int64,
|
||||
sumVcpu: vcpu.Int64,
|
||||
sumRam: ram.Int64,
|
||||
sumDisk: disk.Float64,
|
||||
@@ -436,12 +497,120 @@ FROM %s
|
||||
bronzeHits: hitBronze,
|
||||
silverHits: hitSilver,
|
||||
goldHits: hitGold,
|
||||
seen: map[int64]struct{}{int64OrZero(snapshotTime): {}},
|
||||
}
|
||||
out[key] = row
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// scanHourlyCache aggregates directly from vm_hourly_stats when available.
|
||||
func (c *CronTask) scanHourlyCache(ctx context.Context, start, end time.Time) (map[dailyAggKey]*dailyAggVal, []int64, error) {
|
||||
dbConn := c.Database.DB()
|
||||
query := `
|
||||
SELECT
|
||||
"Name","Vcenter","VmId","VmUuid","ResourcePool","Datacenter","Cluster","Folder",
|
||||
COALESCE("ProvisionedDisk",0) AS disk,
|
||||
COALESCE("VcpuCount",0) AS vcpu,
|
||||
COALESCE("RamGB",0) AS ram,
|
||||
COALESCE("CreationTime",0) AS creation,
|
||||
COALESCE("DeletionTime",0) AS deletion,
|
||||
COALESCE("IsTemplate",'') AS is_template,
|
||||
COALESCE("PoweredOn",'') AS powered_on,
|
||||
COALESCE("SrmPlaceholder",'') AS srm_placeholder,
|
||||
"SnapshotTime"
|
||||
FROM vm_hourly_stats
|
||||
WHERE "SnapshotTime" >= ? AND "SnapshotTime" < ?`
|
||||
q := dbConn.Rebind(query)
|
||||
rows, err := dbConn.QueryxContext(ctx, q, start.Unix(), end.Unix())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
agg := make(map[dailyAggKey]*dailyAggVal, 512)
|
||||
timeSet := make(map[int64]struct{}, 64)
|
||||
for rows.Next() {
|
||||
var (
|
||||
name, vcenter, vmId, vmUuid, resourcePool string
|
||||
dc, cluster, folder sql.NullString
|
||||
disk sql.NullFloat64
|
||||
vcpu, ram sql.NullInt64
|
||||
creation, deletion, snapshotTime sql.NullInt64
|
||||
isTemplate, poweredOn, srmPlaceholder sql.NullString
|
||||
)
|
||||
if err := rows.Scan(&name, &vcenter, &vmId, &vmUuid, &resourcePool, &dc, &cluster, &folder, &disk, &vcpu, &ram, &creation, &deletion, &isTemplate, &poweredOn, &srmPlaceholder, &snapshotTime); err != nil {
|
||||
continue
|
||||
}
|
||||
if vcenter == "" {
|
||||
continue
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(isTemplate.String), "true") || isTemplate.String == "1" {
|
||||
continue
|
||||
}
|
||||
key := dailyAggKey{
|
||||
Vcenter: vcenter,
|
||||
VmId: strings.TrimSpace(vmId),
|
||||
VmUuid: strings.TrimSpace(vmUuid),
|
||||
Name: strings.TrimSpace(name),
|
||||
}
|
||||
if key.VmId == "" && key.VmUuid == "" && key.Name == "" {
|
||||
continue
|
||||
}
|
||||
if key.VmId == "" {
|
||||
key.VmId = key.VmUuid
|
||||
}
|
||||
pool := strings.ToLower(strings.TrimSpace(resourcePool))
|
||||
hitTin := btoi(pool == "tin")
|
||||
hitBronze := btoi(pool == "bronze")
|
||||
hitSilver := btoi(pool == "silver")
|
||||
hitGold := btoi(pool == "gold")
|
||||
|
||||
snapTs := int64OrZero(snapshotTime)
|
||||
timeSet[snapTs] = struct{}{}
|
||||
|
||||
row := &dailyAggVal{
|
||||
key: key,
|
||||
resourcePool: resourcePool,
|
||||
datacenter: dc.String,
|
||||
cluster: cluster.String,
|
||||
folder: folder.String,
|
||||
isTemplate: isTemplate.String,
|
||||
poweredOn: poweredOn.String,
|
||||
srmPlaceholder: srmPlaceholder.String,
|
||||
creation: int64OrZero(creation),
|
||||
firstSeen: snapTs,
|
||||
lastSeen: snapTs,
|
||||
lastDisk: disk.Float64,
|
||||
lastVcpu: vcpu.Int64,
|
||||
lastRam: ram.Int64,
|
||||
sumVcpu: vcpu.Int64,
|
||||
sumRam: ram.Int64,
|
||||
sumDisk: disk.Float64,
|
||||
samples: 1,
|
||||
tinHits: hitTin,
|
||||
bronzeHits: hitBronze,
|
||||
silverHits: hitSilver,
|
||||
goldHits: hitGold,
|
||||
seen: map[int64]struct{}{snapTs: {}},
|
||||
}
|
||||
if deletion.Valid && deletion.Int64 > 0 {
|
||||
row.deletion = deletion.Int64
|
||||
}
|
||||
if existing, ok := agg[key]; ok {
|
||||
mergeDailyAgg(existing, row)
|
||||
} else {
|
||||
agg[key] = row
|
||||
}
|
||||
}
|
||||
snapTimes := make([]int64, 0, len(timeSet))
|
||||
for t := range timeSet {
|
||||
snapTimes = append(snapTimes, t)
|
||||
}
|
||||
sort.Slice(snapTimes, func(i, j int) bool { return snapTimes[i] < snapTimes[j] })
|
||||
return agg, snapTimes, rows.Err()
|
||||
}
|
||||
|
||||
func (c *CronTask) insertDailyAggregates(ctx context.Context, table string, agg map[dailyAggKey]*dailyAggVal, totalSamples int) error {
|
||||
dbConn := c.Database.DB()
|
||||
tx, err := dbConn.Beginx()
|
||||
@@ -476,10 +645,12 @@ INSERT INTO %s (
|
||||
goldPct := 0.0
|
||||
if total > 0 {
|
||||
avgPresent = float64(v.samples) / total
|
||||
tinPct = float64(v.tinHits) * 100 / total
|
||||
bronzePct = float64(v.bronzeHits) * 100 / total
|
||||
silverPct = float64(v.silverHits) * 100 / total
|
||||
goldPct = float64(v.goldHits) * 100 / total
|
||||
}
|
||||
if v.samples > 0 {
|
||||
tinPct = float64(v.tinHits) * 100 / float64(v.samples)
|
||||
bronzePct = float64(v.bronzeHits) * 100 / float64(v.samples)
|
||||
silverPct = float64(v.silverHits) * 100 / float64(v.samples)
|
||||
goldPct = float64(v.goldHits) * 100 / float64(v.samples)
|
||||
}
|
||||
args := []interface{}{
|
||||
v.key.Name,
|
||||
@@ -490,14 +661,14 @@ INSERT INTO %s (
|
||||
nullIfEmpty(v.datacenter),
|
||||
nullIfEmpty(v.cluster),
|
||||
nullIfEmpty(v.folder),
|
||||
v.sumDisk,
|
||||
v.sumVcpu,
|
||||
v.sumRam,
|
||||
v.lastDisk,
|
||||
v.lastVcpu,
|
||||
v.lastRam,
|
||||
v.isTemplate,
|
||||
v.poweredOn,
|
||||
v.srmPlaceholder,
|
||||
v.creation,
|
||||
int64(0), // deletion time refined later
|
||||
v.deletion,
|
||||
v.samples,
|
||||
avgVcpu,
|
||||
avgRam,
|
||||
|
||||
Reference in New Issue
Block a user