add diagnostic endpoint
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Nathan Coad
2026-01-27 11:02:39 +11:00
parent 35b4a50cf6
commit fe96172253
7 changed files with 489 additions and 3 deletions

View File

@@ -173,6 +173,7 @@ func (c *CronTask) aggregateDailySummary(ctx context.Context, targetTime time.Ti
c.Logger.Warn("unable to count daily summary rows", "error", err, "table", summaryTable)
}
c.Logger.Debug("Counted daily summary rows", "table", summaryTable, "rows", rowCount, "duration", time.Since(rowCountStart))
logMissingCreationSummary(ctx, c.Logger, c.Database, summaryTable, rowCount)
registerStart := time.Now()
c.Logger.Debug("Registering daily snapshot", "table", summaryTable, "date", dayStart.Format("2006-01-02"), "rows", rowCount)
@@ -421,6 +422,7 @@ LIMIT 1
c.Logger.Warn("unable to count daily summary rows", "error", err, "table", summaryTable)
}
c.Logger.Debug("Counted daily summary rows", "table", summaryTable, "rows", rowCount, "duration", time.Since(rowCountStart))
logMissingCreationSummary(ctx, c.Logger, c.Database, summaryTable, rowCount)
registerStart := time.Now()
c.Logger.Debug("Registering daily snapshot", "table", summaryTable, "date", dayStart.Format("2006-01-02"), "rows", rowCount)
@@ -1010,6 +1012,106 @@ func btoi(b bool) int64 {
return 0
}
func logMissingCreationSummary(ctx context.Context, logger *slog.Logger, database db.Database, summaryTable string, totalRows int64) {
if logger == nil {
logger = slog.Default()
}
if err := db.ValidateTableName(summaryTable); err != nil {
logger.Warn("daily summary creation diagnostics skipped (invalid table)", "table", summaryTable, "error", err)
return
}
if ctx == nil {
ctx = context.Background()
}
diagCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
dbConn := database.DB()
var missingTotal int64
countQuery := fmt.Sprintf(`SELECT COUNT(*) FROM %s WHERE "CreationTime" IS NULL OR "CreationTime" = 0`, summaryTable)
if err := dbConn.GetContext(diagCtx, &missingTotal, countQuery); err != nil {
logger.Warn("daily summary creation diagnostics failed", "table", summaryTable, "error", err)
return
}
if missingTotal == 0 {
logger.Debug("daily summary creation diagnostics", "table", summaryTable, "missing_creation", 0)
return
}
missingPct := 0.0
if totalRows > 0 {
missingPct = float64(missingTotal) * 100 / float64(totalRows)
}
logger.Warn("daily summary rows missing CreationTime",
"table", summaryTable,
"missing_count", missingTotal,
"total_rows", totalRows,
"missing_pct", missingPct,
)
byVcenterQuery := fmt.Sprintf(`
SELECT "Vcenter", COUNT(*)
FROM %s
WHERE "CreationTime" IS NULL OR "CreationTime" = 0
GROUP BY "Vcenter"
ORDER BY COUNT(*) DESC
`, summaryTable)
if rows, err := dbConn.QueryxContext(diagCtx, byVcenterQuery); err != nil {
logger.Warn("daily summary creation diagnostics (by vcenter) failed", "table", summaryTable, "error", err)
} else {
for rows.Next() {
var vcenter string
var count int64
if err := rows.Scan(&vcenter, &count); err != nil {
continue
}
logger.Warn("daily summary rows missing CreationTime by vcenter", "table", summaryTable, "vcenter", vcenter, "missing_count", count)
}
rows.Close()
if err := rows.Err(); err != nil {
logger.Warn("daily summary creation diagnostics (by vcenter) iteration failed", "table", summaryTable, "error", err)
}
}
const sampleLimit = 10
sampleQuery := fmt.Sprintf(`
SELECT "Vcenter","VmId","VmUuid","Name","SamplesPresent","AvgIsPresent","SnapshotTime"
FROM %s
WHERE "CreationTime" IS NULL OR "CreationTime" = 0
ORDER BY "SamplesPresent" DESC
LIMIT %d
`, summaryTable, sampleLimit)
if rows, err := dbConn.QueryxContext(diagCtx, sampleQuery); err != nil {
logger.Warn("daily summary creation diagnostics (sample) failed", "table", summaryTable, "error", err)
} else {
for rows.Next() {
var (
vcenter string
vmId, vmUuid sql.NullString
name sql.NullString
samplesPresent, snapshotTime sql.NullInt64
avgIsPresent sql.NullFloat64
)
if err := rows.Scan(&vcenter, &vmId, &vmUuid, &name, &samplesPresent, &avgIsPresent, &snapshotTime); err != nil {
continue
}
logger.Debug("daily summary missing CreationTime sample",
"table", summaryTable,
"vcenter", vcenter,
"vm_id", vmId.String,
"vm_uuid", vmUuid.String,
"name", name.String,
"samples_present", samplesPresent.Int64,
"avg_is_present", avgIsPresent.Float64,
"snapshot_time", snapshotTime.Int64,
)
}
rows.Close()
if err := rows.Err(); err != nil {
logger.Warn("daily summary creation diagnostics (sample) iteration failed", "table", summaryTable, "error", err)
}
}
}
// persistDailyRollup stores per-day aggregates into vm_daily_rollup to speed monthly aggregation.
func (c *CronTask) persistDailyRollup(ctx context.Context, dayUnix int64, agg map[dailyAggKey]*dailyAggVal, totalSamples int, totalSamplesByVcenter map[string]int) error {
dbConn := c.Database.DB()