From bdce4288036fc862f4c32cd03e67e95baa08c625 Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Mon, 18 Mar 2024 10:05:44 +1100 Subject: [PATCH] use HA unreachable events when trying to find a failure --- main.go | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/main.go b/main.go index 13a02be..716ab72 100644 --- a/main.go +++ b/main.go @@ -42,14 +42,15 @@ type HostFailureResults struct { } var ( - c *govmomi.Client - ctx context.Context - cancel context.CancelFunc - location *time.Location - sha1ver string // sha1 revision used to build the program - buildTime string // when the executable was built - results []OutageResults - hostResults []HostFailureResults + c *govmomi.Client + ctx context.Context + cancel context.CancelFunc + location *time.Location + sha1ver string // sha1 revision used to build the program + buildTime string // when the executable was built + results []OutageResults + hostResults []HostFailureResults + haUnreachableEvents []types.Event ) // This function optionally filters events by a single MoRef, any additonal MoRefs are ignored @@ -269,6 +270,18 @@ func main() { log.Printf("Found at least one host failure, proceeding with VM restart search\n") vmFailures := getEvents([]string{"com.vmware.vc.ha.VmRestartedByHAEvent"}, []types.ManagedObjectReference{}, *begin, *end) + log.Printf("Searching for ha status change events\n") + haStatusChanges := getEvents([]string{"com.vmware.vc.HA.HostStateChangedEvent"}, []types.ManagedObjectReference{}, *begin, *end) + + // filter ha status changed messages for unreachable ones + for _, h := range haStatusChanges { + unreachableMessage := strings.Contains(strings.ToLower(h.FullFormattedMessage), "changed to unreachable") + if unreachableMessage { + haUnreachableEvents = append(haUnreachableEvents, h) + log.Printf("Host %s unreachable HA status event at %s : '%s'\n", h.Host.Name, h.CreatedTime.In(location).Format(time.ANSIC), h.FullFormattedMessage) + } + } + // Sort the host failure events by time sort.Slice(hostFailures[:], func(i, j int) bool { return hostFailures[i].CreatedTime.Before(hostFailures[j].CreatedTime) @@ -338,13 +351,20 @@ func main() { } else { // Didn't find any VM disconnected events log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to fuzzy VM restart time '%s'\n", fuzzyTime) - // Search for host failures - for _, hostEvent := range hostFailures { + // TODO Use HA unreachable events to find the host + for _, hostEvent := range haUnreachableEvents { if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) { possibleHosts = append(possibleHosts, hostEvent) } } - + /* + // Search for host failures + for _, hostEvent := range hostFailures { + if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) { + possibleHosts = append(possibleHosts, hostEvent) + } + } + */ log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts)) if len(possibleHosts) == 0 {