diff --git a/main.go b/main.go index c114e68..f36a923 100644 --- a/main.go +++ b/main.go @@ -18,6 +18,19 @@ import ( "github.com/vmware/govmomi/vim25/types" ) +type OutageResults struct { + VM string + OutageDuration string + OutageStart time.Time + RestartTime time.Time + ComputeResource string + FailedHost string + NewHost string + GuestOS string + CurrentPowerState string + Description string +} + var ( c *govmomi.Client ctx context.Context @@ -25,6 +38,7 @@ var ( location *time.Location sha1ver string // sha1 revision used to build the program buildTime string // when the executable was built + results []OutageResults ) func getEvents(eventTypes []string, entities []types.ManagedObjectReference, begin time.Duration, end time.Duration) []types.Event { @@ -93,13 +107,61 @@ func getEvents(eventTypes []string, entities []types.ManagedObjectReference, beg return returnEvents } +func getCluster(name string) mo.ClusterComputeResource { + // Create a container view so that we can search vCenter + m := view.NewManager(c.Client) + cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"ClusterComputeResource"}, true) + + var clusters []mo.ClusterComputeResource + fmt.Printf("Searching for Cluster '%s'\n", name) + err := cv.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"summary", "name"}, &clusters) + if err != nil { + fmt.Printf("Failed searching for Cluster %s : %s\n", name, err) + return mo.ClusterComputeResource{} + } else { + for _, cluster := range clusters { + if cluster.Name == name { + fmt.Printf("Found corresponding Cluster with MoRef '%s'\n", cluster.Reference()) + return cluster + } + } + } + + // If we reached here then we didn't find the Cluster + return mo.ClusterComputeResource{} +} + func getVM(name string) mo.VirtualMachine { // Create a container view so that we can search vCenter for a VM if we found any failure events m := view.NewManager(c.Client) cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"VirtualMachine"}, true) var vms []mo.VirtualMachine - fmt.Printf("Searching for VM\n") + fmt.Printf("Searching for VM '%s'\n", name) + err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms) + if err != nil { + fmt.Printf("Failed searching for VM %s : %s\n", name, err) + return mo.VirtualMachine{} + } else { + for _, vm := range vms { + if vm.Name == name { + fmt.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference()) + return vm + } + } + } + + // If we reached here then we didn't find a VM + return mo.VirtualMachine{} +} + +func getVmInCluster(name string, cluster types.ManagedObjectReference) mo.VirtualMachine { + // Create a container view so that we can search vCenter for a VM if we found any failure events + m := view.NewManager(c.Client) + cv, _ := m.CreateContainerView(ctx, cluster, []string{"VirtualMachine"}, true) + + var vms []mo.VirtualMachine + fmt.Printf("Searching for VM '%s'\n", name) err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms) if err != nil { fmt.Printf("Failed searching for VM %s : %s\n", name, err) @@ -167,186 +229,110 @@ func main() { fmt.Printf("Found at least one host failure, proceeding with VM restart search\n") vmFailures := getEvents([]string{"com.vmware.vc.ha.VmRestartedByHAEvent"}, []types.ManagedObjectReference{}, *begin, *end) + // Sort the host failure events by time + sort.Slice(hostFailures[:], func(i, j int) bool { + return hostFailures[i].CreatedTime.Before(hostFailures[j].CreatedTime) + }) + for i := range vmFailures { - var outageStart time.Time + var outageStart, restartTime time.Time + var failedHost string + var possibleHosts []types.Event event := vmFailures[i] + vmRestartTime := event.CreatedTime.In(location) fmt.Printf("Failure event for VM '%s' restarted in cluster '%s'\n", event.Vm.Name, event.ComputeResource.Name) - vm := getVM(event.Vm.Name) + // Get a reference to the cluster mentioned + cluster := getCluster((event.ComputeResource.Name)) + vm := getVmInCluster(event.Vm.Name, cluster.Reference()) + //vm := getVM(event.Vm.Name) // Use VmDisconnectedEvent to see which host this VM was on disconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end) fmt.Printf("Retrieved '%d' corresponding events.\n", len(disconnectedEvents)) - // Calculate the VM outage duration based on the previous host + // Determine which host the VM was previoulsy running on if len(disconnectedEvents) > 0 { // Sort the disconnected events by time sort.Slice(disconnectedEvents[:], func(i, j int) bool { return disconnectedEvents[i].CreatedTime.Before(disconnectedEvents[j].CreatedTime) }) - // Use the earliest event as the outage start disconnectedHost := disconnectedEvents[0] - outageStart = disconnectedHost.CreatedTime.In(location) - fmt.Printf("VM was running on host '%s' previously, setting outage start to '%s'\n", disconnectedHost.Host.Name, outageStart) + fmt.Printf("VM was running on host '%s' previously\n", disconnectedHost.Host.Name) + + // Capture all the failure events for this host + for _, hostEvent := range hostFailures { + if hostEvent.Host.Name == disconnectedHost.Host.Name { + possibleHosts = append(possibleHosts, hostEvent) + } + } } else { - fmt.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to VM restart time '%s'\n", event.CreatedTime.In(location)) + fmt.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to VM restart time '%s'\n", vmRestartTime) // Search for host failures + for _, hostEvent := range hostFailures { + if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) { + possibleHosts = append(possibleHosts, hostEvent) + } + } + + fmt.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts)) + + if len(possibleHosts) == 0 { + fmt.Printf("No ESXi outage events happened before VM %s restart event at %s, skipping this event.\n", event.Vm.Name, vmRestartTime) + continue + } + } + + if len(possibleHosts) == 1 { + fmt.Printf("Found a single host failure event relating to VM %s\n", event.Vm.Name) + fmt.Printf("Failed host was '%s', using outage start time of '%s'", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location)) + failedHost = possibleHosts[0].Host.Name + outageStart = possibleHosts[0].CreatedTime.In(location) + restartTime = vmRestartTime + } else if len(possibleHosts) > 1 { + fmt.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name) + + // possible hosts is sorted by time, so use the last value if there are multiple + lastIndex := len(possibleHosts) - 1 + + fmt.Printf("Failed host was '%s', using outage start time of '%s'", possibleHosts[lastIndex].Host.Name, possibleHosts[lastIndex].CreatedTime.In(location)) + failedHost = possibleHosts[lastIndex].Host.Name + outageStart = possibleHosts[lastIndex].CreatedTime.In(location) + restartTime = vmRestartTime + } else { + fmt.Printf("Didn't get anything useful!\n") } /* - // Get a reference to the cluster mentioned in the event - cluster, err := finder.ClusterComputeResource(ctx, vmFailures[i].ComputeResource.Name) - - view, _ := m.CreateContainerView(ctx, cluster, []string{"VirtualMachine"}, true) - var vms []mo.VirtualMachine - err = view.Retrieve(ctx, []string{"VirtualMachine"}, []string{}, &vms) - - // Specify what properties we want to retrieve - - err = v.Retrieve(ctx, []string{"HostSystem"}, []string{"summary", "runtime"}, &hs) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - os.Exit(1) - } - - // Run the actual esxcli command on each host - finder := find.NewFinder(c.Client) - for _, host := range hs { - //fmt.Printf("Host name %s is %s\n", host.Summary.Config.Name, host.Runtime.ConnectionState) - - // Make sure that the host is connected before we try running an esxcli command - if host.Runtime.ConnectionState == "connected" { - // Get a reference to the host object so that we can check the ramdisk values - objRef, err := finder.ObjectReference(ctx, host.Reference()) + OutageDuration time.Duration + OutageStart time.Time + RestartTime time.Time + ComputeResource string + FailedHost string + NewHost string + GuestOS string + CurrentPowerState string + Description string */ + + duration := restartTime.Sub(outageStart) + out := time.Time{}.Add(duration) + + // Create a new result + result := OutageResults{ + VM: event.Vm.Name, + OutageDuration: out.Format("15:04:05"), + OutageStart: outageStart, + RestartTime: restartTime, + ComputeResource: event.ComputeResource.Name, + FailedHost: failedHost, + } + // Append to list of all results + results = append(results, result) } } - // Selecting default datacenter - /* - finder := find.NewFinder(c.Client, true) - dc, err := finder.DefaultDatacenter(ctx) - if err != nil { - fmt.Fprintf(os.Stderr, "Error: %s\n", err) - os.Exit(1) - } - //refs := []types.ManagedObjectReference{dc.Reference()} - */ - - /* - // Refer https://github.com/vmware/govmomi/blob/main/examples/events/main.go - now, err := methods.GetCurrentTime(ctx, c) // vCenter server time (UTC) - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting vCenter time: %s\n", err) - os.Exit(1) - } - - m := event.NewManager(c.Client) - root := c.ServiceContent.RootFolder - - // configure the event stream filter (begin of stream) - filter := types.EventFilterSpec{ - EventTypeId: []string{"com.vmware.vc.HA.DasHostFailedEvent"}, // only stream specific types, e.g. VmEvent - Entity: &types.EventFilterSpecByEntity{ - Entity: root, - Recursion: types.EventFilterSpecRecursionOptionAll, - }, - Time: &types.EventFilterSpecByTime{ - BeginTime: types.NewTime(now.Add(*begin * -1)), - }, - } - if *end != 0 { - filter.Time.EndTime = types.NewTime(now.Add(*end * -1)) - } - - collector, err := m.CreateCollectorForEvents(ctx, filter) - if err != nil { - fmt.Fprintf(os.Stderr, "Error creating event collector: %s\n", err) - os.Exit(1) - } - - defer collector.Destroy(ctx) - - for { - events, err := collector.ReadNextEvents(ctx, 100) - if err != nil { - fmt.Fprintf(os.Stderr, "Error reading events: %s\n", err) - } - - if len(events) == 0 { - break - } - - for i := range events { - event := events[i].GetEvent() - kind := reflect.TypeOf(events[i]).Elem().Name() - fmt.Printf("%d [%s] [%s] %s\n", event.Key, event.CreatedTime.Format(time.ANSIC), kind, event.FullFormattedMessage) - } - } - */ - - // ************** - // previous code here - /* - // Do a recursive search for all the ESXi hosts (which have a type of HostSystem) - m := view.NewManager(c.Client) - v, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"HostSystem"}, true) - - // Specify what properties we want to retrieve - var hs []mo.HostSystem - err = v.Retrieve(ctx, []string{"HostSystem"}, []string{"summary", "runtime"}, &hs) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %s\n", err) - os.Exit(1) - } - - // Run the actual esxcli command on each host - finder := find.NewFinder(c.Client) - for _, host := range hs { - //fmt.Printf("Host name %s is %s\n", host.Summary.Config.Name, host.Runtime.ConnectionState) - - // Make sure that the host is connected before we try running an esxcli command - if host.Runtime.ConnectionState == "connected" { - // Get a reference to the host object so that we can check the ramdisk values - objRef, err := finder.ObjectReference(ctx, host.Reference()) - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting reference to host : %s\n", err) - continue - } - - // Get the HostSystem object - hs, ok := objRef.(*object.HostSystem) - if !ok { - fmt.Fprintf(os.Stderr, "Couldn't find Hostsytem : %s\n", host.Reference()) - continue - } - - // Get a reference to the esxcli executor - // https://gowalker.org/github.com/dhawalseth/govc-autocomplete/govc/host/esxcli - // https://pkg.go.dev/github.com/vmware/govmomi/govc/host/esxcli - // https://github.com/vmware/govmomi/blob/f4c5c4a58e445ba31393795c7e678fa05fb3452a/govc/host/esxcli/esxcli.go - // https://golang.hotexamples.com/examples/github.com.vmware.govmomi.object/HostSystem/-/golang-hostsystem-class-examples.html - // https://golang.hotexamples.com/examples/github.com.vmware.govmomi.govc.host.esxcli/-/GetFirewallInfo/golang-getfirewallinfo-function-examples.html - e, err := esxcli.NewExecutor(c.Client, hs) - if err != nil { - fmt.Fprintf(os.Stderr, "Error getting esxcli executor: %s\n", err) - continue - } - - // Run the desired esxcli command - result, err := e.Run([]string{"system", "visorfs", "ramdisk", "list"}) - if err != nil { - fmt.Fprintf(os.Stderr, "Error running esxcli command: %s\n", err) - continue - } - - // Print out all the values - for _, values := range result.Values { - fmt.Printf("%s;%s;%s\n", host.Summary.Config.Name, values["RamdiskName"][0], values["Free"][0]) - } - } - } - */ + // Output final results in JSON }