package main import ( "context" "encoding/json" "flag" "fmt" "log" "net/url" "os" "sort" "strings" "time" _ "time/tzdata" "github.com/vmware/govmomi" "github.com/vmware/govmomi/event" "github.com/vmware/govmomi/view" "github.com/vmware/govmomi/vim25/methods" "github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/types" ) type OutageResults struct { VM string OutageDuration string OutageStart time.Time RestartTime time.Time Cluster string FailedHost string NewHost string GuestOS string CurrentPowerState string Description string } type HostFailureResults struct { HostName string FailureTime time.Time Cluster string } var ( c *govmomi.Client ctx context.Context cancel context.CancelFunc location *time.Location sha1ver string // sha1 revision used to build the program buildTime string // when the executable was built results []OutageResults hostResults []HostFailureResults ) // This function optionally filters events by a single MoRef, any additonal MoRefs are ignored // See https://golang.hotexamples.com/site/file?hash=0x7414a58898f22f77b81f05c04e2e2a2280e018ba9a71a2f4efb2bda7c55c35ac&fullName=govc/command.go&project=kristinn/govmomi // for an example of how to deal with multiple entities func getEvents(eventTypes []string, entities []types.ManagedObjectReference, begin time.Duration, end time.Duration) []types.Event { var returnEvents []types.Event // Refer https://github.com/vmware/govmomi/blob/main/examples/events/main.go now, err := methods.GetCurrentTime(ctx, c) // vCenter server time (UTC) if err != nil { fmt.Fprintf(os.Stderr, "Error getting vCenter time: %s\n", err) os.Exit(1) } else { //log.Printf("vCenter time is '%v'\n", now) } m := event.NewManager(c.Client) root := c.ServiceContent.RootFolder // configure the event stream filter (begin of stream) filter := types.EventFilterSpec{ EventTypeId: eventTypes, // only stream specific types, e.g. VmEvent Entity: &types.EventFilterSpecByEntity{ Entity: root, Recursion: types.EventFilterSpecRecursionOptionAll, }, Time: &types.EventFilterSpecByTime{ BeginTime: types.NewTime(now.Add(begin * -1)), }, } if end != 0 { filter.Time.EndTime = types.NewTime(now.Add(end * -1)) } if len(entities) == 0 { entities = append(entities, root) } for _, e := range entities { log.Printf("getEvents setting entity '%v' to filter\n", entities[0]) filter.Entity = &types.EventFilterSpecByEntity{ Entity: e, Recursion: types.EventFilterSpecRecursionOptionAll, } collector, err := m.CreateCollectorForEvents(ctx, filter) if err != nil { fmt.Fprintf(os.Stderr, "Error creating event collector: %s\n", err) os.Exit(1) } defer collector.Destroy(ctx) for { events, err := collector.ReadNextEvents(ctx, 100) if err != nil { fmt.Fprintf(os.Stderr, "Error reading events: %s\n", err) } if len(events) == 0 { break } for i := range events { event := events[i].GetEvent() returnEvents = append(returnEvents, *event) //kind := reflect.TypeOf(events[i]).Elem().Name() //log.Printf("%d [%s] [%s] %s\n", event.Key, event.CreatedTime.In(location).Format(time.ANSIC), kind, event.FullFormattedMessage) log.Printf("%d [%s] %s\n", event.Key, event.CreatedTime.Format(time.ANSIC), event.FullFormattedMessage) } } } /* if len(entities) > 0 { log.Printf("getEvents setting entity '%v' to filter\n", entities[0]) filter.Entity = &types.EventFilterSpecByEntity{ Entity: entities[0], Recursion: types.EventFilterSpecRecursionOptionAll, } } collector, err := m.CreateCollectorForEvents(ctx, filter) if err != nil { fmt.Fprintf(os.Stderr, "Error creating event collector: %s\n", err) os.Exit(1) } defer collector.Destroy(ctx) for { events, err := collector.ReadNextEvents(ctx, 100) if err != nil { fmt.Fprintf(os.Stderr, "Error reading events: %s\n", err) } if len(events) == 0 { break } for i := range events { event := events[i].GetEvent() returnEvents = append(returnEvents, *event) //kind := reflect.TypeOf(events[i]).Elem().Name() //log.Printf("%d [%s] [%s] %s\n", event.Key, event.CreatedTime.In(location).Format(time.ANSIC), kind, event.FullFormattedMessage) log.Printf("%d [%s] %s\n", event.Key, event.CreatedTime.Format(time.ANSIC), event.FullFormattedMessage) } } */ return returnEvents } func getCluster(name string) mo.ClusterComputeResource { // Create a container view so that we can search vCenter m := view.NewManager(c.Client) cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"ClusterComputeResource"}, true) var clusters []mo.ClusterComputeResource log.Printf("Searching for Cluster '%s'\n", name) err := cv.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"summary", "name"}, &clusters) if err != nil { log.Printf("Failed searching for Cluster %s : %s\n", name, err) return mo.ClusterComputeResource{} } else { for _, cluster := range clusters { if cluster.Name == name { log.Printf("Found corresponding Cluster with MoRef '%s'\n", cluster.Reference()) return cluster } } } // If we reached here then we didn't find the Cluster return mo.ClusterComputeResource{} } func getVM(name string) mo.VirtualMachine { // Create a container view so that we can search vCenter for a VM if we found any failure events m := view.NewManager(c.Client) cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"VirtualMachine"}, true) var vms []mo.VirtualMachine log.Printf("Searching for VM '%s'\n", name) err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms) if err != nil { log.Printf("Failed searching for VM %s : %s\n", name, err) return mo.VirtualMachine{} } else { for _, vm := range vms { if vm.Name == name { log.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference()) return vm } } } // If we reached here then we didn't find a VM return mo.VirtualMachine{} } func getVmInCluster(name string, cluster types.ManagedObjectReference) mo.VirtualMachine { // Create a container view so that we can search vCenter for a VM if we found any failure events m := view.NewManager(c.Client) cv, _ := m.CreateContainerView(ctx, cluster, []string{"VirtualMachine"}, true) var vms []mo.VirtualMachine log.Printf("Searching for VM '%s'\n", name) err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms) if err != nil { log.Printf("Failed searching for VM %s : %s\n", name, err) return mo.VirtualMachine{} } else { for _, vm := range vms { if vm.Name == name { log.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference()) return vm } } } // If we reached here then we didn't find a VM return mo.VirtualMachine{} } func main() { // Command line flags for the vCenter connection vURL := flag.String("url", "", "The URL of a vCenter server, eg https://server.domain.example/sdk") vUser := flag.String("user", "", "The username to use when connecting to vCenter") vPass := flag.String("password", "", "The password to use when connecting to vCenter") vTZ := flag.String("tz", "Australia/Sydney", "The timezone to use when converting vCenter UTC times") vInsecure := flag.Bool("insecure", true, "Allow insecure connections to vCenter") begin := flag.Duration("b", time.Hour, "Begin time") // default BeginTime is 1h ago end := flag.Duration("e", 0, "End time") flag.Parse() // Print logs to file f, err := os.OpenFile("log.txt", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { log.Fatalf("error opening file: %v", err) } defer f.Close() log.SetOutput(f) log.Printf("Starting execution. Built on %s from sha1 %s\n", buildTime, sha1ver) // So we can convert vCenter UTC to our local timezone log.Printf("Setting timezone to '%s'\n", *vTZ) location, err = time.LoadLocation(*vTZ) if err != nil { fmt.Fprintf(os.Stderr, "Error setting timezone to %s : %s\n", *vTZ, err) os.Exit(1) } u, err := url.Parse(*vURL) if err != nil { fmt.Fprintf(os.Stderr, "Error parsing url %s : %s\n", *vURL, err) os.Exit(1) } else { if !strings.HasSuffix(u.Path, "/sdk") { u.Path, _ = url.JoinPath(u.Path, "/sdk") log.Printf("Updated vCenter URL to '%v'\n", u) } } log.Printf("Connecting to vCenter %s\n", u) u.User = url.UserPassword(*vUser, *vPass) ctx, cancel = context.WithCancel(context.Background()) defer cancel() // Login to vcenter c, err = govmomi.NewClient(ctx, u, *vInsecure) if err != nil { fmt.Fprintf(os.Stderr, "Logging in error: %s\n", err) os.Exit(1) } defer c.Logout(ctx) //finder := find.NewFinder(c.Client) log.Printf("Searching for hostfailure events\n") hostFailures := getEvents([]string{"com.vmware.vc.HA.DasHostFailedEvent"}, []types.ManagedObjectReference{}, *begin, *end) if len(hostFailures) > 0 { log.Printf("Found at least one host failure, proceeding with VM restart search\n") vmFailures := getEvents([]string{"com.vmware.vc.ha.VmRestartedByHAEvent"}, []types.ManagedObjectReference{}, *begin, *end) // Sort the host failure events by time sort.Slice(hostFailures[:], func(i, j int) bool { return hostFailures[i].CreatedTime.Before(hostFailures[j].CreatedTime) }) for i := range vmFailures { var outageStart, restartTime time.Time var failedHost string var possibleHosts []types.Event event := vmFailures[i] vmRestartTime := event.CreatedTime.In(location) log.Printf("Failure event for VM '%s' restarted in cluster '%s'\n", event.Vm.Name, event.ComputeResource.Name) // Get a reference to the cluster mentioned cluster := getCluster((event.ComputeResource.Name)) vm := getVmInCluster(event.Vm.Name, cluster.Reference()) //vm := getVM(event.Vm.Name) // Use VmDisconnectedEvent to see which host this VM was on disconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end) log.Printf("Retrieved '%d' corresponding events.\n", len(disconnectedEvents)) // Determine which host the VM was previoulsy running on if len(disconnectedEvents) > 0 { // Sort the disconnected events by time sort.Slice(disconnectedEvents[:], func(i, j int) bool { return disconnectedEvents[i].CreatedTime.Before(disconnectedEvents[j].CreatedTime) }) disconnectedHost := disconnectedEvents[0] log.Printf("VM was running on host '%s' previously\n", disconnectedHost.Host.Name) // Capture all the failure events for this host for _, hostEvent := range hostFailures { if hostEvent.Host.Name == disconnectedHost.Host.Name { possibleHosts = append(possibleHosts, hostEvent) } } } else { log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to VM restart time '%s'\n", vmRestartTime) // Search for host failures for _, hostEvent := range hostFailures { if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) { possibleHosts = append(possibleHosts, hostEvent) } } log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts)) if len(possibleHosts) == 0 { log.Printf("No ESXi outage events happened before VM %s restart event at %s, skipping this event.\n", event.Vm.Name, vmRestartTime) continue } } if len(possibleHosts) == 1 { log.Printf("Found a single host failure event relating to VM %s\n", event.Vm.Name) log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location)) failedHost = possibleHosts[0].Host.Name outageStart = possibleHosts[0].CreatedTime.In(location) restartTime = vmRestartTime } else if len(possibleHosts) > 1 { log.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name) // possible hosts is sorted by time, so use the last value if there are multiple lastIndex := len(possibleHosts) - 1 log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[lastIndex].Host.Name, possibleHosts[lastIndex].CreatedTime.In(location)) failedHost = possibleHosts[lastIndex].Host.Name outageStart = possibleHosts[lastIndex].CreatedTime.In(location) restartTime = vmRestartTime } else { log.Printf("Didn't get anything useful!\n") } /* OutageDuration time.Duration OutageStart time.Time RestartTime time.Time Cluster string FailedHost string NewHost string GuestOS string CurrentPowerState string Description string */ duration := restartTime.Sub(outageStart) out := time.Time{}.Add(duration) // Create a new result result := OutageResults{ VM: event.Vm.Name, OutageDuration: out.Format("15:04:05"), OutageStart: outageStart, RestartTime: restartTime, Cluster: event.ComputeResource.Name, FailedHost: failedHost, NewHost: event.Host.Name, GuestOS: vm.Summary.Guest.GuestFullName, CurrentPowerState: string(vm.Summary.Runtime.PowerState), Description: event.FullFormattedMessage, } // Append to list of all results results = append(results, result) } for _, hostEvent := range hostFailures { hostResults = append(hostResults, HostFailureResults{ HostName: hostEvent.Host.Name, FailureTime: hostEvent.CreatedTime.In(location), Cluster: hostEvent.ComputeResource.Name, }) } } // Combine details of host outages and VM outages into one interface var combined []interface{} for _, h := range hostResults { combined = append(combined, h) } for _, v := range results { combined = append(combined, v) } // Output final results in JSON if len(combined) > 0 { j, _ := json.Marshal(combined) fmt.Println(string(j)) } else { fmt.Println("{}") } }