From 54fcbb8d867c94bbe1a0f038af9ca9f30d9ecdd3 Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Thu, 15 Jun 2023 10:30:02 +1000 Subject: [PATCH] search for host HA events starting 5 minutes before VM restart --- main.go | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/main.go b/main.go index 67d96bc..d728d6e 100644 --- a/main.go +++ b/main.go @@ -278,6 +278,9 @@ func main() { var possibleHosts []types.Event event := vmFailures[i] vmRestartTime := event.CreatedTime.In(location) + + // Sometimes host HA events can come through a few minutes after a VM restart event, so create a "fuzzy" starting time to search for host HA events + fuzzyTime := vmRestartTime.Add(-5 * time.Minute) log.Printf("Failure event for VM '%s' restarted in cluster '%s'\n", event.Vm.Name, event.ComputeResource.Name) // Get a reference to the cluster mentioned @@ -302,9 +305,10 @@ func main() { } // Search for any disconnected messages prior to restart time - log.Printf("Filtering VmDisconnectedEvent list based on VM restart time %s\n", vmRestartTime) + + log.Printf("Filtering VmDisconnectedEvent list based on fuzzy VM restart time %s\n", fuzzyTime) for _, e := range disconnectedEvents { - if e.CreatedTime.In(location).Before(vmRestartTime) || e.CreatedTime.In(location).Equal(vmRestartTime) { + if e.CreatedTime.In(location).Before(fuzzyTime) || e.CreatedTime.In(location).Equal(fuzzyTime) { log.Printf("VM disconnected event on host %s at time %s is applicable\n", e.Host.Name, e.CreatedTime.In(location)) possibleHosts = append(possibleHosts, e) } @@ -317,7 +321,7 @@ func main() { log.Printf("No corresponding VM disconnected messages, falling back to any applicable host that experienced a HA event.\n") // Search for host failures for _, hostEvent := range hostFailures { - if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) { + if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) { possibleHosts = append(possibleHosts, hostEvent) } } @@ -325,16 +329,16 @@ func main() { log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts)) if len(possibleHosts) == 0 { - log.Printf("No ESXi outage events happened before VM %s restart event at %s, skipping this event.\n", event.Vm.Name, vmRestartTime) + log.Printf("No ESXi outage events happened before VM %s fuzzy restart event at %s, skipping this event.\n", event.Vm.Name, fuzzyTime) continue } } } else { // Didn't find any VM disconnected events - log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to VM restart time '%s'\n", vmRestartTime) + log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to fuzzy VM restart time '%s'\n", fuzzyTime) // Search for host failures for _, hostEvent := range hostFailures { - if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) { + if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) { possibleHosts = append(possibleHosts, hostEvent) } } @@ -342,7 +346,7 @@ func main() { log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts)) if len(possibleHosts) == 0 { - log.Printf("No ESXi outage events happened before VM %s restart event at %s, skipping this event.\n", event.Vm.Name, vmRestartTime) + log.Printf("No ESXi outage events happened before VM %s fuzzy restart event at %s, skipping this event.\n", event.Vm.Name, fuzzyTime) continue } }