use HA unreachable events when trying to find a failure

This commit is contained in:
2024-03-18 10:05:44 +11:00
parent f0c9789819
commit bdce428803

42
main.go
View File

@@ -42,14 +42,15 @@ type HostFailureResults struct {
}
var (
c *govmomi.Client
ctx context.Context
cancel context.CancelFunc
location *time.Location
sha1ver string // sha1 revision used to build the program
buildTime string // when the executable was built
results []OutageResults
hostResults []HostFailureResults
c *govmomi.Client
ctx context.Context
cancel context.CancelFunc
location *time.Location
sha1ver string // sha1 revision used to build the program
buildTime string // when the executable was built
results []OutageResults
hostResults []HostFailureResults
haUnreachableEvents []types.Event
)
// This function optionally filters events by a single MoRef, any additonal MoRefs are ignored
@@ -269,6 +270,18 @@ func main() {
log.Printf("Found at least one host failure, proceeding with VM restart search\n")
vmFailures := getEvents([]string{"com.vmware.vc.ha.VmRestartedByHAEvent"}, []types.ManagedObjectReference{}, *begin, *end)
log.Printf("Searching for ha status change events\n")
haStatusChanges := getEvents([]string{"com.vmware.vc.HA.HostStateChangedEvent"}, []types.ManagedObjectReference{}, *begin, *end)
// filter ha status changed messages for unreachable ones
for _, h := range haStatusChanges {
unreachableMessage := strings.Contains(strings.ToLower(h.FullFormattedMessage), "changed to unreachable")
if unreachableMessage {
haUnreachableEvents = append(haUnreachableEvents, h)
log.Printf("Host %s unreachable HA status event at %s : '%s'\n", h.Host.Name, h.CreatedTime.In(location).Format(time.ANSIC), h.FullFormattedMessage)
}
}
// Sort the host failure events by time
sort.Slice(hostFailures[:], func(i, j int) bool {
return hostFailures[i].CreatedTime.Before(hostFailures[j].CreatedTime)
@@ -338,13 +351,20 @@ func main() {
} else { // Didn't find any VM disconnected events
log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to fuzzy VM restart time '%s'\n", fuzzyTime)
// Search for host failures
for _, hostEvent := range hostFailures {
// TODO Use HA unreachable events to find the host
for _, hostEvent := range haUnreachableEvents {
if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) {
possibleHosts = append(possibleHosts, hostEvent)
}
}
/*
// Search for host failures
for _, hostEvent := range hostFailures {
if hostEvent.CreatedTime.In(location).Before(fuzzyTime) || hostEvent.CreatedTime.In(location).Equal(fuzzyTime) {
possibleHosts = append(possibleHosts, hostEvent)
}
}
*/
log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts))
if len(possibleHosts) == 0 {