better searching for correct host failure time
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Nathan Coad
2023-07-18 10:23:49 +10:00
parent 348dc474b7
commit 6e6ed55bd3

49
main.go
View File

@@ -291,31 +291,32 @@ func main() {
//vm := getVM(event.Vm.Name)
// Use VmDisconnectedEvent to see which host this VM was on
disconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end)
log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(disconnectedEvents))
vmDisconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end)
log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(vmDisconnectedEvents))
// Determine which host the VM was previoulsy running on
if len(disconnectedEvents) > 0 {
if len(vmDisconnectedEvents) > 0 {
// Sort the disconnected events by time
sort.Slice(disconnectedEvents[:], func(i, j int) bool {
return disconnectedEvents[i].CreatedTime.Before(disconnectedEvents[j].CreatedTime)
sort.Slice(vmDisconnectedEvents[:], func(i, j int) bool {
return vmDisconnectedEvents[i].CreatedTime.Before(vmDisconnectedEvents[j].CreatedTime)
})
log.Printf("After sorting, VmDisconnectedEvent list looks like this:\n")
for _, h := range disconnectedEvents {
for _, h := range vmDisconnectedEvents {
log.Printf("%d [%s] VM: %s, Host: %s, Message: %s\n", h.Key, h.CreatedTime.In(location).Format(time.ANSIC), h.Vm.Name, h.Host.Name, h.FullFormattedMessage)
}
// Search for any disconnected messages prior to restart time
// What if there are multiple host HA event messages but only one VM disconnected message?
// Search for any disconnected messages prior to restart time
log.Printf("Filtering VmDisconnectedEvent list based on fuzzy VM restart time %s\n", fuzzyTime)
for _, e := range disconnectedEvents {
for _, e := range vmDisconnectedEvents {
if e.CreatedTime.In(location).Before(fuzzyTime) || e.CreatedTime.In(location).Equal(fuzzyTime) {
log.Printf("VM disconnected event on host %s at time %s is applicable\n", e.Host.Name, e.CreatedTime.In(location))
possibleHosts = append(possibleHosts, e)
}
}
log.Printf("After filtering there are %d events\n", len(possibleHosts))
log.Printf("After filtering VmDisconnectedEvent there are %d events\n", len(possibleHosts))
// Its possible that the VM disconnected messages dont' relate to the host HA events that we found
// If that is the case then we fall back to the most recent host failure message in our list
@@ -354,11 +355,31 @@ func main() {
}
if len(possibleHosts) == 1 {
log.Printf("Found a single host failure event relating to VM %s\n", event.Vm.Name)
log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location))
failedHost = possibleHosts[0].Host.Name
outageStart = possibleHosts[0].CreatedTime.In(location)
restartTime = vmRestartTime
log.Printf("Found a single host that failed relating to VM %s\n", event.Vm.Name)
var checkActualTime []types.Event
// Search the list of host failures to get the last host HA event before this VM was disconnected
for _, hostEvent := range hostFailures {
if hostEvent.Host.Name == possibleHosts[0].Host.Name {
if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) {
checkActualTime = append(checkActualTime, hostEvent)
}
}
}
if len(checkActualTime) == 1 {
log.Printf("Found a single host failure event for our failed host that occurred before vm restart time\n")
failedHost = checkActualTime[0].Host.Name
outageStart = checkActualTime[0].CreatedTime.In(location)
restartTime = vmRestartTime
} else {
log.Printf("Found %d failure event(s) for our failed host that occurred before vm restart time\n", len(checkActualTime))
log.Printf("Assuming failed host was '%s', with outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location))
failedHost = possibleHosts[0].Host.Name
outageStart = possibleHosts[0].CreatedTime.In(location)
restartTime = vmRestartTime
}
} else if len(possibleHosts) > 1 {
log.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name)