From 348dc474b724c335ad102284276da378481bd904 Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Tue, 18 Jul 2023 09:56:38 +1000 Subject: [PATCH] try narrowing down possible hosts by also checking actual VM restart time --- main.go | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/main.go b/main.go index 13a02be..ee86abc 100644 --- a/main.go +++ b/main.go @@ -362,13 +362,30 @@ func main() { } else if len(possibleHosts) > 1 { log.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name) - // possible hosts is sorted by time, so use the last value if there are multiple - lastIndex := len(possibleHosts) - 1 + var checkActualTime []types.Event - log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[lastIndex].Host.Name, possibleHosts[lastIndex].CreatedTime.In(location)) - failedHost = possibleHosts[lastIndex].Host.Name - outageStart = possibleHosts[lastIndex].CreatedTime.In(location) - restartTime = vmRestartTime + // Search for any disconnected messages prior to actual restart time rather than fuzzy time + log.Printf("Checking host failure list based on actual VM restart time %s\n", vmRestartTime) + for _, hostEvent := range hostFailures { + if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) { + checkActualTime = append(checkActualTime, hostEvent) + } + } + + // if that search gives us exactly one result then use that + if len(checkActualTime) == 1 { + log.Printf("Found exactly one host corresponding to actual VM restart time. Failed host was '%s', using outage start time of '%s'\n", checkActualTime[0].Host.Name, checkActualTime[0].CreatedTime.In(location)) + failedHost = checkActualTime[0].Host.Name + outageStart = checkActualTime[0].CreatedTime.In(location) + restartTime = vmRestartTime + } else { + // if using the actual VM restart time doesn't narrow things down then go back to using the last host failure time before the fuzzy VM restart time + lastIndex := len(possibleHosts) - 1 + log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[lastIndex].Host.Name, possibleHosts[lastIndex].CreatedTime.In(location)) + failedHost = possibleHosts[lastIndex].Host.Name + outageStart = possibleHosts[lastIndex].CreatedTime.In(location) + restartTime = vmRestartTime + } } else { log.Printf("Didn't find any data to suggest which host this VM was running on before!\n") restartTime = vmRestartTime