diff --git a/.drone.yml b/.drone.yml index 102b0e5..9c9e4be 100644 --- a/.drone.yml +++ b/.drone.yml @@ -27,6 +27,7 @@ steps: # PLUGIN_INCLUDE: ^events$,^events_checksum.txt$ # PLUGIN_EXCLUDE: ^\.git/$ +# https://github.com/hypervtechnics/drone-sftp - name: dell-sftp-deploy image: hypervtechnics/drone-sftp settings: diff --git a/main.go b/main.go index 14f7135..7eb117a 100644 --- a/main.go +++ b/main.go @@ -182,7 +182,7 @@ func getCluster(name string) mo.ClusterComputeResource { return mo.VirtualMachine{} } */ -func getVmInCluster(name string, cluster types.ManagedObjectReference) mo.VirtualMachine { +func getVmInCluster(name string, cluster types.ManagedObjectReference) (mo.VirtualMachine, error) { // Create a container view so that we can search vCenter for a VM if we found any failure events m := view.NewManager(c.Client) cv, _ := m.CreateContainerView(ctx, cluster, []string{"VirtualMachine"}, true) @@ -192,18 +192,18 @@ func getVmInCluster(name string, cluster types.ManagedObjectReference) mo.Virtua err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms) if err != nil { log.Printf("Failed searching for VM %s : %s\n", name, err) - return mo.VirtualMachine{} + return mo.VirtualMachine{}, fmt.Errorf("error searching for VM %s : %s", name, err) } else { for _, vm := range vms { if vm.Name == name { - log.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference()) - return vm + log.Printf("Found corresponding VM with MoRef '%s'", vm.Reference()) + return vm, nil } } } // If we reached here then we didn't find a VM - return mo.VirtualMachine{} + return mo.VirtualMachine{}, fmt.Errorf("no VM found with name %s", name) } func main() { @@ -272,6 +272,7 @@ func main() { log.Printf("Searching for ha status change events\n") haStatusChanges := getEvents([]string{"com.vmware.vc.HA.HostStateChangedEvent"}, []types.ManagedObjectReference{}, *begin, *end) + log.Printf("Found %d ha status change events\n", len(haStatusChanges)) // filter ha status changed messages for unreachable ones for _, h := range haStatusChanges { @@ -290,19 +291,18 @@ func main() { } } + // make sure that this event is within 10 minutes either side of the corresponding host failed event if hostFound { - // make sure that this event is within 10 minutes of the corresponding host failed event unreachableStartComparison := h.CreatedTime.In(location).Add(time.Duration(int64(time.Minute) * -10)) unreachableEndComparison := h.CreatedTime.In(location).Add(time.Duration(int64(time.Minute) * 10)) if haFailedTime.Before(unreachableEndComparison) && haFailedTime.After(unreachableStartComparison) { haUnreachableEvents = append(haUnreachableEvents, h) - log.Printf("Recording host %s unreachable HA status event at %s\n", h.Host.Name, h.CreatedTime.In(location).Format(time.ANSIC)) + log.Printf("Keeping host %s unreachable HA status event at %s\n", h.Host.Name, h.CreatedTime.In(location).Format(time.ANSIC)) } else { - log.Printf("Host %s HA failure time at %s was before %s or after %s, excluding this ha unreachable event\n", h.Host.Name, haFailedTime.Format(time.ANSIC), - unreachableStartComparison, unreachableEndComparison) + log.Printf("Excluding HA Unreachable for Host %s at time %s since it was before %s or after %s\n", h.Host.Name, haFailedTime.Format(time.ANSIC), + unreachableEndComparison, unreachableStartComparison) } - } else { log.Printf("Host %s was not found in the list of hostfailure events, skipping this host\n", h.Host.Name) } @@ -325,6 +325,7 @@ func main() { } for i := range vmFailures { + var vm mo.VirtualMachine var outageStart, restartTime time.Time var failedHost string var possibleHosts []types.Event @@ -339,28 +340,31 @@ func main() { fuzzyTime := vmRestartTime.Add(time.Duration(int64(time.Minute) * int64(*fuzzyMinutes))) log.Printf("Failure event for VM '%s' restarted in cluster '%s' at %s\n", event.Vm.Name, event.ComputeResource.Name, event.CreatedTime.In(location).Format(time.ANSIC)) - // Get a reference to the cluster mentioned + // filter all the disconnected events to the ones belonging to this VM + for _, e := range allVmDisconnectedEvents { + if e.Vm.Name == event.Vm.Name { + log.Printf("Adding VM disconnected event on host %s at time %s\n", e.Host.Name, e.CreatedTime.In(location)) + vmDisconnectedEvents = append(vmDisconnectedEvents, e) + } + } + log.Printf("Filtered '%d' VmDisconnectedEvent events belonging to VM '%s'\n", len(vmDisconnectedEvents), event.Vm.Name) + + // Get a reference to the cluster mentioned in the event cluster := getCluster((event.ComputeResource.Name)) - vm := getVmInCluster(event.Vm.Name, cluster.Reference()) + vm, err = getVmInCluster(event.Vm.Name, cluster.Reference()) //log.Printf("VM: '%+v'\n", vm) //vm := getVM(event.Vm.Name) - // If we couldn't find the vm then skip this event - if len(vm.ExtensibleManagedObject.Self.Value) == 0 { + // If we couldn't find the vm then try using a list of all the disconnected events found + //if len(vm.ExtensibleManagedObject.Self.Value) == 0 { + if err != nil { log.Printf("No VM matching string '%s' found in cluster '%s'\n", event.Vm.Name, cluster.Reference()) - for _, e := range allVmDisconnectedEvents { - if e.Vm.Name == event.Vm.Name { - log.Printf("Adding VM disconnected event on host %s at time %s\n", e.Host.Name, e.CreatedTime.In(location)) - vmDisconnectedEvents = append(vmDisconnectedEvents, e) - } - } - log.Printf("Filtered '%d' VmDisconnectedEvent events.\n", len(vmDisconnectedEvents)) - //continue + vmFound = false } else { // Use VmDisconnectedEvent to see which host this VM was on vmFound = true - vmDisconnectedEvents = getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end) - log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(vmDisconnectedEvents)) + //vmDisconnectedEvents = getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end) + //log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(vmDisconnectedEvents)) } // Determine which host the VM was previoulsy running on