add prometheus instrumentation
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2026-01-15 11:43:29 +11:00
parent 4d754ee263
commit ea68331208
9 changed files with 193 additions and 42 deletions

125
internal/metrics/metrics.go Normal file
View File

@@ -0,0 +1,125 @@
package metrics
import (
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
registry = prometheus.NewRegistry()
HourlySnapshotTotal = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_hourly_snapshots_total", Help: "Total number of hourly snapshot jobs completed."})
HourlySnapshotFailures = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_hourly_snapshots_failed_total", Help: "Hourly snapshot jobs that failed."})
HourlySnapshotLast = prometheus.NewGauge(prometheus.GaugeOpts{Name: "vctp_hourly_snapshot_last_unix", Help: "Unix timestamp of the last hourly snapshot start time."})
HourlySnapshotRows = prometheus.NewGauge(prometheus.GaugeOpts{Name: "vctp_hourly_snapshot_last_rows", Help: "Row count of the last hourly snapshot table."})
DailyAggregationsTotal = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_daily_aggregations_total", Help: "Total number of daily aggregation jobs completed."})
DailyAggregationFailures = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_daily_aggregations_failed_total", Help: "Daily aggregation jobs that failed."})
DailyAggregationDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "vctp_daily_aggregation_duration_seconds",
Help: "Duration of daily aggregation jobs.",
Buckets: prometheus.ExponentialBuckets(1, 2, 10),
})
MonthlyAggregationsTotal = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_monthly_aggregations_total", Help: "Total number of monthly aggregation jobs completed."})
MonthlyAggregationFailures = prometheus.NewCounter(prometheus.CounterOpts{Name: "vctp_monthly_aggregations_failed_total", Help: "Monthly aggregation jobs that failed."})
MonthlyAggregationDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "vctp_monthly_aggregation_duration_seconds",
Help: "Duration of monthly aggregation jobs.",
Buckets: prometheus.ExponentialBuckets(1, 2, 10),
})
ReportsAvailable = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "vctp_reports_available",
Help: "Number of downloadable reports present on disk.",
})
VcenterConnectFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "vctp_vcenter_connect_failures_total",
Help: "Failed connections to vCenter during snapshot runs.",
}, []string{"vcenter"})
VcenterSnapshotDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "vctp_vcenter_snapshot_duration_seconds",
Help: "Duration of per-vCenter hourly snapshot jobs.",
Buckets: prometheus.ExponentialBuckets(0.5, 2, 10),
}, []string{"vcenter"})
VcenterInventorySize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "vctp_vcenter_inventory_size",
Help: "Number of VMs seen in the last successful snapshot per vCenter.",
}, []string{"vcenter"})
)
func init() {
registry.MustRegister(
HourlySnapshotTotal,
HourlySnapshotFailures,
HourlySnapshotLast,
HourlySnapshotRows,
DailyAggregationsTotal,
DailyAggregationFailures,
DailyAggregationDuration,
MonthlyAggregationsTotal,
MonthlyAggregationFailures,
MonthlyAggregationDuration,
ReportsAvailable,
VcenterConnectFailures,
VcenterSnapshotDuration,
VcenterInventorySize,
)
}
// Handler returns an http.Handler that serves Prometheus metrics.
func Handler() http.Handler {
return promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
}
// RecordVcenterSnapshot logs per-vCenter snapshot metrics.
func RecordVcenterSnapshot(vcenter string, duration time.Duration, vmCount int64, err error) {
VcenterSnapshotDuration.WithLabelValues(vcenter).Observe(duration.Seconds())
if err != nil {
VcenterConnectFailures.WithLabelValues(vcenter).Inc()
return
}
VcenterInventorySize.WithLabelValues(vcenter).Set(float64(vmCount))
}
// RecordHourlySnapshot logs aggregate hourly snapshot results.
func RecordHourlySnapshot(start time.Time, rows int64, err error) {
HourlySnapshotLast.Set(float64(start.Unix()))
HourlySnapshotRows.Set(float64(rows))
if err != nil {
HourlySnapshotFailures.Inc()
return
}
HourlySnapshotTotal.Inc()
}
// RecordDailyAggregation logs daily aggregation metrics.
func RecordDailyAggregation(duration time.Duration, err error) {
DailyAggregationDuration.Observe(duration.Seconds())
if err != nil {
DailyAggregationFailures.Inc()
return
}
DailyAggregationsTotal.Inc()
}
// RecordMonthlyAggregation logs monthly aggregation metrics.
func RecordMonthlyAggregation(duration time.Duration, err error) {
MonthlyAggregationDuration.Observe(duration.Seconds())
if err != nil {
MonthlyAggregationFailures.Inc()
return
}
MonthlyAggregationsTotal.Inc()
}
// SetReportsAvailable updates the gauge for report files found on disk.
func SetReportsAvailable(count int) {
ReportsAvailable.Set(float64(count))
}