better searching for correct host failure time
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
49
main.go
49
main.go
@@ -291,31 +291,32 @@ func main() {
|
||||
//vm := getVM(event.Vm.Name)
|
||||
|
||||
// Use VmDisconnectedEvent to see which host this VM was on
|
||||
disconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end)
|
||||
log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(disconnectedEvents))
|
||||
vmDisconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end)
|
||||
log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(vmDisconnectedEvents))
|
||||
|
||||
// Determine which host the VM was previoulsy running on
|
||||
if len(disconnectedEvents) > 0 {
|
||||
if len(vmDisconnectedEvents) > 0 {
|
||||
// Sort the disconnected events by time
|
||||
sort.Slice(disconnectedEvents[:], func(i, j int) bool {
|
||||
return disconnectedEvents[i].CreatedTime.Before(disconnectedEvents[j].CreatedTime)
|
||||
sort.Slice(vmDisconnectedEvents[:], func(i, j int) bool {
|
||||
return vmDisconnectedEvents[i].CreatedTime.Before(vmDisconnectedEvents[j].CreatedTime)
|
||||
})
|
||||
|
||||
log.Printf("After sorting, VmDisconnectedEvent list looks like this:\n")
|
||||
for _, h := range disconnectedEvents {
|
||||
for _, h := range vmDisconnectedEvents {
|
||||
log.Printf("%d [%s] VM: %s, Host: %s, Message: %s\n", h.Key, h.CreatedTime.In(location).Format(time.ANSIC), h.Vm.Name, h.Host.Name, h.FullFormattedMessage)
|
||||
}
|
||||
|
||||
// Search for any disconnected messages prior to restart time
|
||||
// What if there are multiple host HA event messages but only one VM disconnected message?
|
||||
|
||||
// Search for any disconnected messages prior to restart time
|
||||
log.Printf("Filtering VmDisconnectedEvent list based on fuzzy VM restart time %s\n", fuzzyTime)
|
||||
for _, e := range disconnectedEvents {
|
||||
for _, e := range vmDisconnectedEvents {
|
||||
if e.CreatedTime.In(location).Before(fuzzyTime) || e.CreatedTime.In(location).Equal(fuzzyTime) {
|
||||
log.Printf("VM disconnected event on host %s at time %s is applicable\n", e.Host.Name, e.CreatedTime.In(location))
|
||||
possibleHosts = append(possibleHosts, e)
|
||||
}
|
||||
}
|
||||
log.Printf("After filtering there are %d events\n", len(possibleHosts))
|
||||
log.Printf("After filtering VmDisconnectedEvent there are %d events\n", len(possibleHosts))
|
||||
|
||||
// Its possible that the VM disconnected messages dont' relate to the host HA events that we found
|
||||
// If that is the case then we fall back to the most recent host failure message in our list
|
||||
@@ -354,11 +355,31 @@ func main() {
|
||||
}
|
||||
|
||||
if len(possibleHosts) == 1 {
|
||||
log.Printf("Found a single host failure event relating to VM %s\n", event.Vm.Name)
|
||||
log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location))
|
||||
failedHost = possibleHosts[0].Host.Name
|
||||
outageStart = possibleHosts[0].CreatedTime.In(location)
|
||||
restartTime = vmRestartTime
|
||||
log.Printf("Found a single host that failed relating to VM %s\n", event.Vm.Name)
|
||||
|
||||
var checkActualTime []types.Event
|
||||
|
||||
// Search the list of host failures to get the last host HA event before this VM was disconnected
|
||||
for _, hostEvent := range hostFailures {
|
||||
if hostEvent.Host.Name == possibleHosts[0].Host.Name {
|
||||
if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) {
|
||||
checkActualTime = append(checkActualTime, hostEvent)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(checkActualTime) == 1 {
|
||||
log.Printf("Found a single host failure event for our failed host that occurred before vm restart time\n")
|
||||
failedHost = checkActualTime[0].Host.Name
|
||||
outageStart = checkActualTime[0].CreatedTime.In(location)
|
||||
restartTime = vmRestartTime
|
||||
} else {
|
||||
log.Printf("Found %d failure event(s) for our failed host that occurred before vm restart time\n", len(checkActualTime))
|
||||
log.Printf("Assuming failed host was '%s', with outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location))
|
||||
failedHost = possibleHosts[0].Host.Name
|
||||
outageStart = possibleHosts[0].CreatedTime.In(location)
|
||||
restartTime = vmRestartTime
|
||||
}
|
||||
} else if len(possibleHosts) > 1 {
|
||||
log.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name)
|
||||
|
||||
|
Reference in New Issue
Block a user