From 361ba7719bc096feae4a835acdc70e8ddc8df6af Mon Sep 17 00:00:00 2001
From: Nathan Coad <nathan.coad@dell.com>
Date: Tue, 21 Apr 2026 10:35:10 +1000
Subject: [PATCH] more auth logging

---
 README.md                           |  74 ++++++++++++++++++
 components/views/snapshots_templ.go |   2 +-
 components/views/vm_trace_templ.go  |   2 +-
 internal/auth/ldap.go               |  13 +++-
 plan.md                             |   7 +-
 server/handler/auth.go              | 116 ++++++++++++++++++++++++++--
 6 files changed, 204 insertions(+), 10 deletions(-)
diff --git a/README.md b/README.md
index 2d2854b..f32aed4 100644
--- a/README.md
+++ b/README.md
@@ -211,6 +211,80 @@ Validate connectivity before starting vCTP:
 psql "postgres://vctp_user:change-this-password@db-hostname:5432/vctp?sslmode=disable"
 ```
 
+### PostgreSQL tuning baseline (20 vCPU / 64 GB host)
+If your PostgreSQL instance is still running near-default settings, use this as a practical starting profile for vCTP workloads (hourly ingest + daily/monthly aggregation).
+
+Choose one profile:
+- Dedicated DB host (PostgreSQL is the primary service on this machine): use the `dedicated` values.
+- Shared host (vCTP app + PostgreSQL on same machine): use the `shared` values.
+
+Recommended `postgresql.conf` starting points:
+
+```conf
+# Memory
+shared_buffers = 16GB                 # dedicated
+# shared_buffers = 12GB               # shared
+effective_cache_size = 48GB           # dedicated
+# effective_cache_size = 36GB         # shared
+work_mem = 32MB                       # dedicated
+# work_mem = 16MB                     # shared
+maintenance_work_mem = 2GB            # dedicated
+# maintenance_work_mem = 1GB          # shared
+
+# WAL / checkpoints
+wal_compression = on
+checkpoint_timeout = 15min
+checkpoint_completion_target = 0.9
+max_wal_size = 16GB
+min_wal_size = 2GB
+
+# Parallelism and connections
+max_connections = 120
+max_worker_processes = 20
+max_parallel_workers = 20
+max_parallel_workers_per_gather = 4
+max_parallel_maintenance_workers = 4
+
+# Planner / IO (SSD/NVMe)
+random_page_cost = 1.1
+effective_io_concurrency = 200
+default_statistics_target = 200
+
+# Autovacuum for high-write canonical tables
+autovacuum_max_workers = 6
+autovacuum_naptime = 30s
+autovacuum_vacuum_scale_factor = 0.02
+autovacuum_analyze_scale_factor = 0.01
+autovacuum_vacuum_cost_limit = 2000
+
+# Useful diagnostics
+track_io_timing = on
+log_temp_files = 32MB
+```
+
+Apply and validate:
+- Reload config (`SELECT pg_reload_conf();`) or restart PostgreSQL if required by your platform.
+- Confirm active values with:
+
+```sql
+SHOW shared_buffers;
+SHOW effective_cache_size;
+SHOW work_mem;
+SHOW maintenance_work_mem;
+SHOW max_wal_size;
+SHOW autovacuum_vacuum_scale_factor;
+```
+
+After tuning, rerun the canonical benchmark and compare against your pre-tuning snapshot:
+
+```shell
+vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3
+```
+
+Notes:
+- `work_mem` is per sort/hash operation, not per session; avoid setting it too high globally.
+- Keep `settings.scheduled_aggregation_engine: go` as default unless repeated production-scale benchmarks show SQL is consistently faster on your canonical Postgres data.
+
 PostgreSQL migrations live in `db/migrations_postgres`, while SQLite migrations remain in
 `db/migrations`.
 
diff --git a/components/views/snapshots_templ.go b/components/views/snapshots_templ.go
index 0ee34b5..25fee17 100644
--- a/components/views/snapshots_templ.go
+++ b/components/views/snapshots_templ.go
@@ -473,7 +473,7 @@ func VcenterTotalsPage(vcenter string, entries []VcenterTotalsEntry, chart Vcent
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
diff --git a/components/views/vm_trace_templ.go b/components/views/vm_trace_templ.go
index 7004abe..a49dbd3 100644
--- a/components/views/vm_trace_templ.go
+++ b/components/views/vm_trace_templ.go
@@ -194,7 +194,7 @@ func VmTracePage(query string, display_query string, vm_id string, vm_uuid strin
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
-			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>")
+			templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
 			if templ_7745c5c3_Err != nil {
 				return templ_7745c5c3_Err
 			}
diff --git a/internal/auth/ldap.go b/internal/auth/ldap.go
index 09b5aaf..3a13935 100644
--- a/internal/auth/ldap.go
+++ b/internal/auth/ldap.go
@@ -35,6 +35,8 @@ type LDAPIdentity struct {
 	Username string
 	UserDN   string
 	Groups   []string
+	// Diagnostics contains non-sensitive LDAP processing notes useful for debugging auth decisions.
+	Diagnostics []string
 }
 
 type LDAPAuthenticator struct {
@@ -93,7 +95,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 
 	if err := conn.Bind(username, password); err != nil {
 		if ldap.IsErrorWithCode(err, ldap.LDAPResultInvalidCredentials) {
-			return LDAPIdentity{}, ErrLDAPInvalidCredentials
+			return LDAPIdentity{}, fmt.Errorf("%w: ldap bind rejected credentials", ErrLDAPInvalidCredentials)
 		}
 		return LDAPIdentity{}, fmt.Errorf("%w: bind failed: %v", ErrLDAPOperationFailed, err)
 	}
@@ -111,6 +113,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 		return LDAPIdentity{}, err
 	}
 	if entry != nil {
+		identity.Diagnostics = append(identity.Diagnostics, "user_entry_found")
 		if strings.TrimSpace(entry.DN) != "" {
 			identity.UserDN = entry.DN
 		}
@@ -122,6 +125,8 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 		); v != "" {
 			identity.Username = v
 		}
+	} else {
+		identity.Diagnostics = append(identity.Diagnostics, "user_entry_not_found")
 	}
 
 	groupSet := make(map[string]struct{})
@@ -156,9 +161,15 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 				groupSet[dn] = struct{}{}
 			}
 		}
+		if len(groupEntries.Entries) == 0 {
+			identity.Diagnostics = append(identity.Diagnostics, "group_search_returned_no_entries")
+		}
+	} else {
+		identity.Diagnostics = append(identity.Diagnostics, fmt.Sprintf("group_search_failed:%v", err))
 	}
 
 	identity.Groups = mapKeysSorted(groupSet)
+	identity.Diagnostics = compactTrimmedStrings(identity.Diagnostics)
 	return identity, nil
 }
 
diff --git a/plan.md b/plan.md
index c55cd3d..deb2041 100644
--- a/plan.md
+++ b/plan.md
@@ -304,8 +304,11 @@ The target architecture is:
 ### 3. Phase 3: Postgres-Ready Scale-Up
 - [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup.
 - [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls.
-- [ ] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
-  - Benchmark harness implemented via `-benchmark-aggregations` and `-benchmark-runs`; production-scale Postgres run pending.
+- [x] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
+  - Production-scale Postgres run completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations` with `runs_per_mode=1`, `driver=postgres`).
+  - Daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster on this run.
+  - Monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`).
+  - Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads.
 - [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins.
 - [x] If SQL wins, roll out behind a controlled flag before any default switch.
 
diff --git a/server/handler/auth.go b/server/handler/auth.go
index 9545b4f..130d372 100644
--- a/server/handler/auth.go
+++ b/server/handler/auth.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"errors"
 	"net/http"
+	"sort"
 	"strings"
 	"time"
 	"vctp/internal/auth"
@@ -15,6 +16,7 @@ import (
 const (
 	authLoginFailureMessage = "invalid username or password"
 	authLoginRequestTimeout = 30 * time.Second
+	maxDebugLogListItems    = 25
 )
 
 type ldapAuthenticator interface {
@@ -78,6 +80,17 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
 		writeJSONError(w, http.StatusBadRequest, "username and password are required")
 		return
 	}
+	audit.LogAuthEvent(h.Logger, r, "login", "observe",
+		"reason", "ldap_authentication_start",
+		"username", username,
+		"ldap_bind_address", cfg.LDAPBindAddress,
+		"ldap_base_dn", cfg.LDAPBaseDN,
+		"ldap_group_requirements", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
+		"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
+		"ldap_insecure", cfg.LDAPInsecure,
+		"ldap_disable_validation", cfg.LDAPDisableValidation,
+		"ldap_trust_cert_configured", strings.TrimSpace(cfg.LDAPTrustCertFile) != "",
+	)
 
 	ldapAuth, err := newLDAPAuthenticator(auth.LDAPConfig{
 		BindAddress:       cfg.LDAPBindAddress,
@@ -99,23 +112,70 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
 	identity, err := ldapAuth.AuthenticateAndFetchGroups(ctx, username, password)
 	if err != nil {
 		if errors.Is(err, auth.ErrLDAPInvalidCredentials) {
-			audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "invalid_credentials", "username", username)
+			audit.LogAuthEvent(h.Logger, r, "login", "deny",
+				"reason", "invalid_credentials",
+				"username", username,
+				"ldap_bind_address", cfg.LDAPBindAddress,
+				"ldap_base_dn", cfg.LDAPBaseDN,
+				"error", err,
+			)
 			writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
 			return
 		}
 		if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
-			audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_timeout", "username", username, "error", err)
+			audit.LogAuthEvent(h.Logger, r, "login", "deny",
+				"reason", "ldap_timeout",
+				"username", username,
+				"ldap_bind_address", cfg.LDAPBindAddress,
+				"ldap_base_dn", cfg.LDAPBaseDN,
+				"timeout_seconds", authLoginRequestTimeout.Seconds(),
+				"error", err,
+			)
 			writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
 			return
 		}
-		audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_authentication_failed", "username", username, "error", err)
+		audit.LogAuthEvent(h.Logger, r, "login", "deny",
+			"reason", "ldap_authentication_failed",
+			"username", username,
+			"ldap_bind_address", cfg.LDAPBindAddress,
+			"ldap_base_dn", cfg.LDAPBaseDN,
+			"error", err,
+		)
 		writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
 		return
 	}
+	audit.LogAuthEvent(h.Logger, r, "login", "observe",
+		"reason", "ldap_authentication_succeeded",
+		"username", username,
+		"ldap_identity_username", identity.Username,
+		"ldap_user_dn", identity.UserDN,
+		"ldap_group_count", len(identity.Groups),
+		"ldap_groups", limitStrings(identity.Groups, maxDebugLogListItems),
+		"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
+	)
 
 	roles := auth.ResolveRoles(identity.Groups, cfg.AuthGroupRoleMappings)
-	if !auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups) || len(roles) == 0 {
-		audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "group_or_role_denied", "username", username, "group_count", len(identity.Groups), "resolved_roles", roles)
+	hasRequiredGroup := auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups)
+	audit.LogAuthEvent(h.Logger, r, "login", "observe",
+		"reason", "authorization_evaluation",
+		"username", username,
+		"has_required_group", hasRequiredGroup,
+		"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
+		"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
+		"resolved_roles", roles,
+		"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
+	)
+	if !hasRequiredGroup || len(roles) == 0 {
+		audit.LogAuthEvent(h.Logger, r, "login", "deny",
+			"reason", "group_or_role_denied",
+			"username", username,
+			"group_count", len(identity.Groups),
+			"has_required_group", hasRequiredGroup,
+			"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
+			"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
+			"resolved_roles", roles,
+			"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
+		)
 		writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
 		return
 	}
@@ -191,3 +251,49 @@ func (h *Handler) AuthMe(w http.ResponseWriter, r *http.Request) {
 		TokenID:   claims.ID,
 	})
 }
+
+func sortedStringMapKeys(values map[string]string) []string {
+	if len(values) == 0 {
+		return nil
+	}
+	keys := make([]string, 0, len(values))
+	for key := range values {
+		key = strings.TrimSpace(key)
+		if key == "" {
+			continue
+		}
+		keys = append(keys, key)
+	}
+	if len(keys) == 0 {
+		return nil
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+func limitStrings(values []string, maxItems int) []string {
+	if len(values) == 0 {
+		return nil
+	}
+	if maxItems <= 0 || len(values) <= maxItems {
+		out := make([]string, 0, len(values))
+		for _, value := range values {
+			value = strings.TrimSpace(value)
+			if value == "" {
+				continue
+			}
+			out = append(out, value)
+		}
+		return out
+	}
+	out := make([]string, 0, maxItems+1)
+	for _, value := range values[:maxItems] {
+		value = strings.TrimSpace(value)
+		if value == "" {
+			continue
+		}
+		out = append(out, value)
+	}
+	out = append(out, "...")
+	return out
+}