All checks were successful
continuous-integration/drone/push Build is passing
454 lines
15 KiB
Go
454 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"log"
|
|
"net/url"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
_ "time/tzdata"
|
|
|
|
"github.com/vmware/govmomi"
|
|
"github.com/vmware/govmomi/event"
|
|
"github.com/vmware/govmomi/view"
|
|
"github.com/vmware/govmomi/vim25/methods"
|
|
"github.com/vmware/govmomi/vim25/mo"
|
|
"github.com/vmware/govmomi/vim25/types"
|
|
)
|
|
|
|
type OutageResults struct {
|
|
VM string
|
|
OutageDuration string
|
|
OutageStart time.Time
|
|
RestartTime time.Time
|
|
Cluster string
|
|
FailedHost string
|
|
NewHost string
|
|
GuestOS string
|
|
CurrentPowerState string
|
|
Description string
|
|
}
|
|
|
|
type HostFailureResults struct {
|
|
HostName string
|
|
FailureTime time.Time
|
|
Cluster string
|
|
}
|
|
|
|
var (
|
|
c *govmomi.Client
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
location *time.Location
|
|
sha1ver string // sha1 revision used to build the program
|
|
buildTime string // when the executable was built
|
|
results []OutageResults
|
|
hostResults []HostFailureResults
|
|
)
|
|
|
|
// This function optionally filters events by a single MoRef, any additonal MoRefs are ignored
|
|
// See https://golang.hotexamples.com/site/file?hash=0x7414a58898f22f77b81f05c04e2e2a2280e018ba9a71a2f4efb2bda7c55c35ac&fullName=govc/command.go&project=kristinn/govmomi
|
|
// for an example of how to deal with multiple entities
|
|
func getEvents(eventTypes []string, entities []types.ManagedObjectReference, begin time.Duration, end time.Duration) []types.Event {
|
|
var returnEvents []types.Event
|
|
// Refer https://github.com/vmware/govmomi/blob/main/examples/events/main.go
|
|
now, err := methods.GetCurrentTime(ctx, c) // vCenter server time (UTC)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error getting vCenter time: %s\n", err)
|
|
os.Exit(1)
|
|
} else {
|
|
//log.Printf("vCenter time is '%v'\n", now)
|
|
}
|
|
|
|
m := event.NewManager(c.Client)
|
|
root := c.ServiceContent.RootFolder
|
|
|
|
// configure the event stream filter (begin of stream)
|
|
filter := types.EventFilterSpec{
|
|
EventTypeId: eventTypes, // only stream specific types, e.g. VmEvent
|
|
Entity: &types.EventFilterSpecByEntity{
|
|
Entity: root,
|
|
Recursion: types.EventFilterSpecRecursionOptionAll,
|
|
},
|
|
Time: &types.EventFilterSpecByTime{
|
|
BeginTime: types.NewTime(now.Add(begin * -1)),
|
|
},
|
|
}
|
|
|
|
if end != 0 {
|
|
filter.Time.EndTime = types.NewTime(now.Add(end * -1))
|
|
}
|
|
|
|
if len(entities) == 0 {
|
|
entities = append(entities, root)
|
|
}
|
|
|
|
for _, e := range entities {
|
|
log.Printf("getEvents setting entity '%v' to filter\n", entities[0])
|
|
filter.Entity = &types.EventFilterSpecByEntity{
|
|
Entity: e,
|
|
Recursion: types.EventFilterSpecRecursionOptionAll,
|
|
}
|
|
|
|
collector, err := m.CreateCollectorForEvents(ctx, filter)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error creating event collector: %s\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
defer collector.Destroy(ctx)
|
|
|
|
for {
|
|
events, err := collector.ReadNextEvents(ctx, 100)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error reading events: %s\n", err)
|
|
}
|
|
|
|
if len(events) == 0 {
|
|
break
|
|
}
|
|
|
|
for i := range events {
|
|
event := events[i].GetEvent()
|
|
returnEvents = append(returnEvents, *event)
|
|
//kind := reflect.TypeOf(events[i]).Elem().Name()
|
|
//log.Printf("%d [%s] [%s] %s\n", event.Key, event.CreatedTime.In(location).Format(time.ANSIC), kind, event.FullFormattedMessage)
|
|
log.Printf("%d [%s] %s\n", event.Key, event.CreatedTime.In(location).Format(time.ANSIC), event.FullFormattedMessage)
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
if len(entities) > 0 {
|
|
log.Printf("getEvents setting entity '%v' to filter\n", entities[0])
|
|
filter.Entity = &types.EventFilterSpecByEntity{
|
|
Entity: entities[0],
|
|
Recursion: types.EventFilterSpecRecursionOptionAll,
|
|
}
|
|
}
|
|
|
|
|
|
collector, err := m.CreateCollectorForEvents(ctx, filter)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error creating event collector: %s\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
defer collector.Destroy(ctx)
|
|
|
|
for {
|
|
events, err := collector.ReadNextEvents(ctx, 100)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error reading events: %s\n", err)
|
|
}
|
|
|
|
if len(events) == 0 {
|
|
break
|
|
}
|
|
|
|
for i := range events {
|
|
event := events[i].GetEvent()
|
|
returnEvents = append(returnEvents, *event)
|
|
//kind := reflect.TypeOf(events[i]).Elem().Name()
|
|
//log.Printf("%d [%s] [%s] %s\n", event.Key, event.CreatedTime.In(location).Format(time.ANSIC), kind, event.FullFormattedMessage)
|
|
log.Printf("%d [%s] %s\n", event.Key, event.CreatedTime.Format(time.ANSIC), event.FullFormattedMessage)
|
|
}
|
|
}
|
|
*/
|
|
|
|
return returnEvents
|
|
}
|
|
|
|
func getCluster(name string) mo.ClusterComputeResource {
|
|
// Create a container view so that we can search vCenter
|
|
m := view.NewManager(c.Client)
|
|
cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"ClusterComputeResource"}, true)
|
|
|
|
var clusters []mo.ClusterComputeResource
|
|
log.Printf("Searching for Cluster '%s'\n", name)
|
|
err := cv.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"summary", "name"}, &clusters)
|
|
if err != nil {
|
|
log.Printf("Failed searching for Cluster %s : %s\n", name, err)
|
|
return mo.ClusterComputeResource{}
|
|
} else {
|
|
for _, cluster := range clusters {
|
|
if cluster.Name == name {
|
|
log.Printf("Found corresponding Cluster with MoRef '%s'\n", cluster.Reference())
|
|
return cluster
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we reached here then we didn't find the Cluster
|
|
return mo.ClusterComputeResource{}
|
|
}
|
|
|
|
func getVM(name string) mo.VirtualMachine {
|
|
// Create a container view so that we can search vCenter for a VM if we found any failure events
|
|
m := view.NewManager(c.Client)
|
|
cv, _ := m.CreateContainerView(ctx, c.ServiceContent.RootFolder, []string{"VirtualMachine"}, true)
|
|
|
|
var vms []mo.VirtualMachine
|
|
log.Printf("Searching for VM '%s'\n", name)
|
|
err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms)
|
|
if err != nil {
|
|
log.Printf("Failed searching for VM %s : %s\n", name, err)
|
|
return mo.VirtualMachine{}
|
|
} else {
|
|
for _, vm := range vms {
|
|
if vm.Name == name {
|
|
log.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference())
|
|
return vm
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we reached here then we didn't find a VM
|
|
return mo.VirtualMachine{}
|
|
}
|
|
|
|
func getVmInCluster(name string, cluster types.ManagedObjectReference) mo.VirtualMachine {
|
|
// Create a container view so that we can search vCenter for a VM if we found any failure events
|
|
m := view.NewManager(c.Client)
|
|
cv, _ := m.CreateContainerView(ctx, cluster, []string{"VirtualMachine"}, true)
|
|
|
|
var vms []mo.VirtualMachine
|
|
log.Printf("Searching for VM '%s'\n", name)
|
|
err := cv.Retrieve(ctx, []string{"VirtualMachine"}, []string{"summary", "name"}, &vms)
|
|
if err != nil {
|
|
log.Printf("Failed searching for VM %s : %s\n", name, err)
|
|
return mo.VirtualMachine{}
|
|
} else {
|
|
for _, vm := range vms {
|
|
if vm.Name == name {
|
|
log.Printf("Found corresponding VM with MoRef '%s'\n", vm.Reference())
|
|
return vm
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we reached here then we didn't find a VM
|
|
return mo.VirtualMachine{}
|
|
}
|
|
|
|
func main() {
|
|
// Command line flags for the vCenter connection
|
|
vURL := flag.String("url", "", "The URL of a vCenter server, eg https://server.domain.example/sdk")
|
|
vUser := flag.String("user", "", "The username to use when connecting to vCenter")
|
|
vPass := flag.String("password", "", "The password to use when connecting to vCenter")
|
|
vTZ := flag.String("tz", "Australia/Sydney", "The timezone to use when converting vCenter UTC times")
|
|
vInsecure := flag.Bool("insecure", true, "Allow insecure connections to vCenter")
|
|
begin := flag.Duration("b", time.Hour, "Begin time") // default BeginTime is 1h ago
|
|
end := flag.Duration("e", 0, "End time")
|
|
flag.Parse()
|
|
|
|
// Print logs to file
|
|
f, err := os.OpenFile("log.txt", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
|
|
if err != nil {
|
|
log.Fatalf("error opening file: %v", err)
|
|
}
|
|
defer f.Close()
|
|
log.SetOutput(f)
|
|
|
|
log.Printf("Starting execution. Built on %s from sha1 %s\n", buildTime, sha1ver)
|
|
|
|
// So we can convert vCenter UTC to our local timezone
|
|
log.Printf("Setting timezone to '%s'\n", *vTZ)
|
|
location, err = time.LoadLocation(*vTZ)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error setting timezone to %s : %s\n", *vTZ, err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
u, err := url.Parse(*vURL)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error parsing url %s : %s\n", *vURL, err)
|
|
os.Exit(1)
|
|
} else {
|
|
if !strings.HasSuffix(u.Path, "/sdk") {
|
|
u.Path, _ = url.JoinPath(u.Path, "/sdk")
|
|
log.Printf("Updated vCenter URL to '%v'\n", u)
|
|
}
|
|
}
|
|
|
|
log.Printf("Connecting to vCenter %s\n", u)
|
|
u.User = url.UserPassword(*vUser, *vPass)
|
|
|
|
ctx, cancel = context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
// Login to vcenter
|
|
c, err = govmomi.NewClient(ctx, u, *vInsecure)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Logging in error: %s\n", err)
|
|
os.Exit(1)
|
|
}
|
|
defer c.Logout(ctx)
|
|
|
|
//finder := find.NewFinder(c.Client)
|
|
|
|
log.Printf("Searching for hostfailure events\n")
|
|
hostFailures := getEvents([]string{"com.vmware.vc.HA.DasHostFailedEvent"}, []types.ManagedObjectReference{}, *begin, *end)
|
|
|
|
if len(hostFailures) > 0 {
|
|
log.Printf("Found at least one host failure, proceeding with VM restart search\n")
|
|
vmFailures := getEvents([]string{"com.vmware.vc.ha.VmRestartedByHAEvent"}, []types.ManagedObjectReference{}, *begin, *end)
|
|
|
|
// Sort the host failure events by time
|
|
sort.Slice(hostFailures[:], func(i, j int) bool {
|
|
return hostFailures[i].CreatedTime.Before(hostFailures[j].CreatedTime)
|
|
})
|
|
|
|
for i := range vmFailures {
|
|
var outageStart, restartTime time.Time
|
|
var failedHost string
|
|
var possibleHosts []types.Event
|
|
event := vmFailures[i]
|
|
vmRestartTime := event.CreatedTime.In(location)
|
|
log.Printf("Failure event for VM '%s' restarted in cluster '%s'\n", event.Vm.Name, event.ComputeResource.Name)
|
|
|
|
// Get a reference to the cluster mentioned
|
|
cluster := getCluster((event.ComputeResource.Name))
|
|
vm := getVmInCluster(event.Vm.Name, cluster.Reference())
|
|
//vm := getVM(event.Vm.Name)
|
|
|
|
// Use VmDisconnectedEvent to see which host this VM was on
|
|
disconnectedEvents := getEvents([]string{"VmDisconnectedEvent"}, []types.ManagedObjectReference{vm.Reference()}, *begin, *end)
|
|
log.Printf("Retrieved '%d' VmDisconnectedEvent events.\n", len(disconnectedEvents))
|
|
|
|
// Determine which host the VM was previoulsy running on
|
|
if len(disconnectedEvents) > 0 {
|
|
// Sort the disconnected events by time
|
|
sort.Slice(disconnectedEvents[:], func(i, j int) bool {
|
|
return disconnectedEvents[i].CreatedTime.Before(disconnectedEvents[j].CreatedTime)
|
|
})
|
|
|
|
log.Printf("After sorting, VmDisconnectedEvent list looks like this:\n")
|
|
for _, h := range disconnectedEvents {
|
|
log.Printf("%d [%s] VM: %s, Host: %s, Message: %s\n", h.Key, h.CreatedTime.In(location).Format(time.ANSIC), h.Vm.Name, h.Host.Name, h.FullFormattedMessage)
|
|
}
|
|
|
|
// Search for any disconnected messages prior to restart time
|
|
log.Printf("Filtering VmDisconnectedEvent list based on VM restart time %s\n", vmRestartTime)
|
|
for _, e := range disconnectedEvents {
|
|
if e.CreatedTime.In(location).Before(vmRestartTime) || e.CreatedTime.In(location).Equal(vmRestartTime) {
|
|
log.Printf("VM disconnected event on host %s at time %s is applicable\n", e.Host.Name, e.CreatedTime.In(location))
|
|
possibleHosts = append(possibleHosts, e)
|
|
}
|
|
}
|
|
|
|
/*
|
|
disconnectedHost := disconnectedEvents[0]
|
|
log.Printf("First disconnected message indicates that VM was running on host '%s' previously\n", disconnectedHost.Host.Name)
|
|
|
|
// Capture all the failure events for this host
|
|
for _, hostEvent := range hostFailures {
|
|
if hostEvent.Host.Name == disconnectedHost.Host.Name {
|
|
possibleHosts = append(possibleHosts, hostEvent)
|
|
}
|
|
}
|
|
*/
|
|
} else {
|
|
log.Printf("could not determine previous host for this VM. Filtering all host failures for events prior to VM restart time '%s'\n", vmRestartTime)
|
|
|
|
// Search for host failures
|
|
for _, hostEvent := range hostFailures {
|
|
if hostEvent.CreatedTime.In(location).Before(vmRestartTime) || hostEvent.CreatedTime.In(location).Equal(vmRestartTime) {
|
|
possibleHosts = append(possibleHosts, hostEvent)
|
|
}
|
|
}
|
|
|
|
log.Printf("Based on event times there were %d possible hosts this VM was running on\n", len(possibleHosts))
|
|
|
|
if len(possibleHosts) == 0 {
|
|
log.Printf("No ESXi outage events happened before VM %s restart event at %s, skipping this event.\n", event.Vm.Name, vmRestartTime)
|
|
continue
|
|
}
|
|
}
|
|
|
|
if len(possibleHosts) == 1 {
|
|
log.Printf("Found a single host failure event relating to VM %s\n", event.Vm.Name)
|
|
log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[0].Host.Name, possibleHosts[0].CreatedTime.In(location))
|
|
failedHost = possibleHosts[0].Host.Name
|
|
outageStart = possibleHosts[0].CreatedTime.In(location)
|
|
restartTime = vmRestartTime
|
|
} else if len(possibleHosts) > 1 {
|
|
log.Printf("Found multiple host failure events relating to VM %s\n", event.Vm.Name)
|
|
|
|
// possible hosts is sorted by time, so use the last value if there are multiple
|
|
lastIndex := len(possibleHosts) - 1
|
|
|
|
log.Printf("Failed host was '%s', using outage start time of '%s'\n", possibleHosts[lastIndex].Host.Name, possibleHosts[lastIndex].CreatedTime.In(location))
|
|
failedHost = possibleHosts[lastIndex].Host.Name
|
|
outageStart = possibleHosts[lastIndex].CreatedTime.In(location)
|
|
restartTime = vmRestartTime
|
|
} else {
|
|
log.Printf("Didn't get anything useful!\n")
|
|
}
|
|
|
|
/*
|
|
OutageDuration time.Duration
|
|
OutageStart time.Time
|
|
RestartTime time.Time
|
|
Cluster string
|
|
FailedHost string
|
|
NewHost string
|
|
GuestOS string
|
|
CurrentPowerState string
|
|
Description string
|
|
*/
|
|
|
|
duration := restartTime.Sub(outageStart)
|
|
out := time.Time{}.Add(duration)
|
|
|
|
// Create a new result
|
|
result := OutageResults{
|
|
VM: event.Vm.Name,
|
|
OutageDuration: out.Format("15:04:05"),
|
|
OutageStart: outageStart,
|
|
RestartTime: restartTime,
|
|
Cluster: event.ComputeResource.Name,
|
|
FailedHost: failedHost,
|
|
NewHost: event.Host.Name,
|
|
GuestOS: vm.Summary.Guest.GuestFullName,
|
|
CurrentPowerState: string(vm.Summary.Runtime.PowerState),
|
|
Description: event.FullFormattedMessage,
|
|
}
|
|
// Append to list of all results
|
|
results = append(results, result)
|
|
}
|
|
|
|
for _, hostEvent := range hostFailures {
|
|
hostResults = append(hostResults, HostFailureResults{
|
|
HostName: hostEvent.Host.Name,
|
|
FailureTime: hostEvent.CreatedTime.In(location),
|
|
Cluster: hostEvent.ComputeResource.Name,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Combine details of host outages and VM outages into one interface
|
|
var combined []interface{}
|
|
for _, h := range hostResults {
|
|
combined = append(combined, h)
|
|
}
|
|
for _, v := range results {
|
|
combined = append(combined, v)
|
|
}
|
|
|
|
// Output final results in JSON
|
|
if len(combined) > 0 {
|
|
j, _ := json.Marshal(combined)
|
|
fmt.Println(string(j))
|
|
} else {
|
|
fmt.Println("{}")
|
|
}
|
|
|
|
}
|