add repair functionality
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
219
server/handler/snapshotRepair.go
Normal file
219
server/handler/snapshotRepair.go
Normal file
@@ -0,0 +1,219 @@
|
||||
package handler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"vctp/db"
|
||||
"vctp/internal/report"
|
||||
)
|
||||
|
||||
// SnapshotRepair scans existing daily summaries and backfills missing SnapshotTime and lifecycle fields.
|
||||
// @Summary Repair daily summaries
|
||||
// @Description Backfills SnapshotTime and lifecycle info for existing daily summary tables and reruns monthly lifecycle refinement using hourly data.
|
||||
// @Tags snapshots
|
||||
// @Produce json
|
||||
// @Success 200 {object} map[string]string
|
||||
// @Router /api/snapshots/repair [post]
|
||||
func (h *Handler) SnapshotRepair(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
h.Logger.Info("snapshot repair started", "scope", "daily")
|
||||
repaired, failed := h.repairDailySummaries(r.Context(), time.Now())
|
||||
h.Logger.Info("snapshot repair finished", "daily_repaired", repaired, "daily_failed", failed)
|
||||
|
||||
resp := map[string]string{
|
||||
"status": "ok",
|
||||
"repaired": strconv.Itoa(repaired),
|
||||
"failed": strconv.Itoa(failed),
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
func (h *Handler) repairDailySummaries(ctx context.Context, now time.Time) (repaired int, failed int) {
|
||||
dbConn := h.Database.DB()
|
||||
|
||||
dailyRecs, err := report.SnapshotRecordsWithFallback(ctx, h.Database, "daily", "inventory_daily_summary_", "20060102", time.Time{}, now)
|
||||
if err != nil {
|
||||
h.Logger.Warn("failed to list daily summaries", "error", err)
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
for _, rec := range dailyRecs {
|
||||
h.Logger.Debug("repair daily summary table", "table", rec.TableName, "snapshot_time", rec.SnapshotTime)
|
||||
dayStart := rec.SnapshotTime
|
||||
dayEnd := dayStart.Add(24 * time.Hour)
|
||||
|
||||
if err := db.EnsureSummaryTable(ctx, dbConn, rec.TableName); err != nil {
|
||||
h.Logger.Warn("ensure summary table failed", "table", rec.TableName, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
hourlyRecs, err := report.SnapshotRecordsWithFallback(ctx, h.Database, "hourly", "inventory_hourly_", "epoch", dayStart, dayEnd)
|
||||
if err != nil || len(hourlyRecs) == 0 {
|
||||
h.Logger.Warn("no hourly snapshots for repair window", "table", rec.TableName, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
cols := []string{
|
||||
`"InventoryId"`, `"Name"`, `"Vcenter"`, `"VmId"`, `"EventKey"`, `"CloudId"`, `"CreationTime"`,
|
||||
`"DeletionTime"`, `"ResourcePool"`, `"Datacenter"`, `"Cluster"`, `"Folder"`,
|
||||
`"ProvisionedDisk"`, `"VcpuCount"`, `"RamGB"`, `"IsTemplate"`, `"PoweredOn"`,
|
||||
`"SrmPlaceholder"`, `"VmUuid"`, `"SnapshotTime"`,
|
||||
}
|
||||
union, err := buildUnionFromRecords(hourlyRecs, cols, `COALESCE(CAST("IsTemplate" AS TEXT), '') NOT IN ('TRUE','true','1')`)
|
||||
if err != nil {
|
||||
h.Logger.Warn("failed to build union for repair", "table", rec.TableName, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
h.Logger.Debug("built hourly union for repair", "table", rec.TableName, "hourly_tables", len(hourlyRecs))
|
||||
if err := db.BackfillSnapshotTimeFromUnion(ctx, dbConn, rec.TableName, union); err != nil {
|
||||
h.Logger.Warn("failed to backfill snapshot time", "table", rec.TableName, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
h.Logger.Debug("snapshot time backfill complete", "table", rec.TableName)
|
||||
if err := db.RefineCreationDeletionFromUnion(ctx, dbConn, rec.TableName, union); err != nil {
|
||||
h.Logger.Warn("failed to refine lifecycle during repair", "table", rec.TableName, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
h.Logger.Debug("lifecycle refinement complete", "table", rec.TableName)
|
||||
h.Logger.Info("repair applied", "table", rec.TableName, "actions", "snapshot_time+lifecycle")
|
||||
repaired++
|
||||
}
|
||||
return repaired, failed
|
||||
}
|
||||
|
||||
// SnapshotRepairSuite runs a sequence of repair routines to fix older deployments in one call.
|
||||
// It rebuilds the snapshot registry, syncs vcenter totals, repairs daily summaries, and refines monthly lifecycle data.
|
||||
// @Summary Run full snapshot repair suite
|
||||
// @Description Rebuilds snapshot registry, backfills per-vCenter totals, repairs daily summaries (SnapshotTime/lifecycle), and refines monthly lifecycle.
|
||||
// @Tags snapshots
|
||||
// @Produce json
|
||||
// @Success 200 {object} map[string]string
|
||||
// @Router /api/snapshots/repair/all [post]
|
||||
func (h *Handler) SnapshotRepairSuite(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
dbConn := h.Database.DB()
|
||||
|
||||
// Step 1: rebuild snapshot registry from existing tables.
|
||||
h.Logger.Info("repair suite step", "step", "snapshot_registry")
|
||||
if stats, err := report.MigrateSnapshotRegistry(ctx, h.Database); err != nil {
|
||||
h.Logger.Warn("snapshot registry migration failed", "error", err)
|
||||
} else {
|
||||
h.Logger.Info("snapshot registry migration complete", "hourly_renamed", stats.HourlyRenamed, "daily_registered", stats.DailyRegistered, "monthly_registered", stats.MonthlyRegistered, "errors", stats.Errors)
|
||||
}
|
||||
|
||||
// Step 2: backfill vcenter_totals from registry hourly tables.
|
||||
h.Logger.Info("repair suite step", "step", "vcenter_totals")
|
||||
if err := db.SyncVcenterTotalsFromSnapshots(ctx, dbConn); err != nil {
|
||||
h.Logger.Warn("sync vcenter totals failed", "error", err)
|
||||
}
|
||||
|
||||
// Step 3: repair daily summaries (snapshot time + lifecycle).
|
||||
h.Logger.Info("repair suite step", "step", "daily_summaries")
|
||||
dailyRepaired, dailyFailed := h.repairDailySummaries(ctx, time.Now())
|
||||
|
||||
// Step 4: refine monthly lifecycle using daily summaries (requires SnapshotTime now present after step 3).
|
||||
h.Logger.Info("repair suite step", "step", "monthly_refine")
|
||||
monthlyRefined, monthlyFailed := h.refineMonthlyFromDaily(ctx, time.Now())
|
||||
|
||||
resp := map[string]string{
|
||||
"status": "ok",
|
||||
"daily_repaired": strconv.Itoa(dailyRepaired),
|
||||
"daily_failed": strconv.Itoa(dailyFailed),
|
||||
"monthly_refined": strconv.Itoa(monthlyRefined),
|
||||
"monthly_failed": strconv.Itoa(monthlyFailed),
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
func (h *Handler) refineMonthlyFromDaily(ctx context.Context, now time.Time) (refined int, failed int) {
|
||||
dbConn := h.Database.DB()
|
||||
|
||||
dailyRecs, err := report.SnapshotRecordsWithFallback(ctx, h.Database, "daily", "inventory_daily_summary_", "20060102", time.Time{}, now)
|
||||
if err != nil {
|
||||
h.Logger.Warn("failed to list daily summaries for monthly refine", "error", err)
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// Group daily tables by month (YYYYMM).
|
||||
grouped := make(map[string][]report.SnapshotRecord)
|
||||
for _, rec := range dailyRecs {
|
||||
key := rec.SnapshotTime.Format("200601")
|
||||
grouped[key] = append(grouped[key], rec)
|
||||
}
|
||||
|
||||
cols := []string{
|
||||
`"InventoryId"`, `"Name"`, `"Vcenter"`, `"VmId"`, `"EventKey"`, `"CloudId"`, `"CreationTime"`,
|
||||
`"DeletionTime"`, `"ResourcePool"`, `"Datacenter"`, `"Cluster"`, `"Folder"`,
|
||||
`"ProvisionedDisk"`, `"VcpuCount"`, `"RamGB"`, `"IsTemplate"`, `"PoweredOn"`,
|
||||
`"SrmPlaceholder"`, `"VmUuid"`, `"SnapshotTime"`,
|
||||
}
|
||||
|
||||
for monthKey, recs := range grouped {
|
||||
summaryTable := fmt.Sprintf("inventory_monthly_summary_%s", monthKey)
|
||||
h.Logger.Debug("monthly refine", "table", summaryTable, "daily_tables", len(recs))
|
||||
if err := db.EnsureSummaryTable(ctx, dbConn, summaryTable); err != nil {
|
||||
h.Logger.Warn("ensure monthly summary failed", "table", summaryTable, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
union, err := buildUnionFromRecords(recs, cols, `COALESCE(CAST("IsTemplate" AS TEXT), '') NOT IN ('TRUE','true','1')`)
|
||||
if err != nil {
|
||||
h.Logger.Warn("failed to build union for monthly refine", "table", summaryTable, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
|
||||
if err := db.RefineCreationDeletionFromUnion(ctx, dbConn, summaryTable, union); err != nil {
|
||||
h.Logger.Warn("failed to refine monthly lifecycle", "table", summaryTable, "error", err)
|
||||
failed++
|
||||
continue
|
||||
}
|
||||
h.Logger.Debug("monthly refine applied", "table", summaryTable)
|
||||
refined++
|
||||
}
|
||||
return refined, failed
|
||||
}
|
||||
|
||||
func buildUnionFromRecords(recs []report.SnapshotRecord, columns []string, where string) (string, error) {
|
||||
if len(recs) == 0 {
|
||||
return "", fmt.Errorf("no tables provided for union")
|
||||
}
|
||||
colList := strings.Join(columns, ", ")
|
||||
parts := make([]string, 0, len(recs))
|
||||
for _, rec := range recs {
|
||||
if err := db.ValidateTableName(rec.TableName); err != nil {
|
||||
continue
|
||||
}
|
||||
q := fmt.Sprintf(`SELECT %s FROM %s`, colList, rec.TableName)
|
||||
if where != "" {
|
||||
q = q + " WHERE " + where
|
||||
}
|
||||
parts = append(parts, q)
|
||||
}
|
||||
if len(parts) == 0 {
|
||||
return "", fmt.Errorf("no valid tables for union")
|
||||
}
|
||||
return strings.Join(parts, "\nUNION ALL\n"), nil
|
||||
}
|
||||
Reference in New Issue
Block a user