From 361ba7719bc096feae4a835acdc70e8ddc8df6af Mon Sep 17 00:00:00 2001 From: Nathan Coad Date: Tue, 21 Apr 2026 10:35:10 +1000 Subject: [PATCH] more auth logging --- README.md | 74 ++++++++++++++++++ components/views/snapshots_templ.go | 2 +- components/views/vm_trace_templ.go | 2 +- internal/auth/ldap.go | 13 +++- plan.md | 7 +- server/handler/auth.go | 116 ++++++++++++++++++++++++++-- 6 files changed, 204 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2d2854b..f32aed4 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,80 @@ Validate connectivity before starting vCTP: psql "postgres://vctp_user:change-this-password@db-hostname:5432/vctp?sslmode=disable" ``` +### PostgreSQL tuning baseline (20 vCPU / 64 GB host) +If your PostgreSQL instance is still running near-default settings, use this as a practical starting profile for vCTP workloads (hourly ingest + daily/monthly aggregation). + +Choose one profile: +- Dedicated DB host (PostgreSQL is the primary service on this machine): use the `dedicated` values. +- Shared host (vCTP app + PostgreSQL on same machine): use the `shared` values. + +Recommended `postgresql.conf` starting points: + +```conf +# Memory +shared_buffers = 16GB # dedicated +# shared_buffers = 12GB # shared +effective_cache_size = 48GB # dedicated +# effective_cache_size = 36GB # shared +work_mem = 32MB # dedicated +# work_mem = 16MB # shared +maintenance_work_mem = 2GB # dedicated +# maintenance_work_mem = 1GB # shared + +# WAL / checkpoints +wal_compression = on +checkpoint_timeout = 15min +checkpoint_completion_target = 0.9 +max_wal_size = 16GB +min_wal_size = 2GB + +# Parallelism and connections +max_connections = 120 +max_worker_processes = 20 +max_parallel_workers = 20 +max_parallel_workers_per_gather = 4 +max_parallel_maintenance_workers = 4 + +# Planner / IO (SSD/NVMe) +random_page_cost = 1.1 +effective_io_concurrency = 200 +default_statistics_target = 200 + +# Autovacuum for high-write canonical tables +autovacuum_max_workers = 6 +autovacuum_naptime = 30s +autovacuum_vacuum_scale_factor = 0.02 +autovacuum_analyze_scale_factor = 0.01 +autovacuum_vacuum_cost_limit = 2000 + +# Useful diagnostics +track_io_timing = on +log_temp_files = 32MB +``` + +Apply and validate: +- Reload config (`SELECT pg_reload_conf();`) or restart PostgreSQL if required by your platform. +- Confirm active values with: + +```sql +SHOW shared_buffers; +SHOW effective_cache_size; +SHOW work_mem; +SHOW maintenance_work_mem; +SHOW max_wal_size; +SHOW autovacuum_vacuum_scale_factor; +``` + +After tuning, rerun the canonical benchmark and compare against your pre-tuning snapshot: + +```shell +vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3 +``` + +Notes: +- `work_mem` is per sort/hash operation, not per session; avoid setting it too high globally. +- Keep `settings.scheduled_aggregation_engine: go` as default unless repeated production-scale benchmarks show SQL is consistently faster on your canonical Postgres data. + PostgreSQL migrations live in `db/migrations_postgres`, while SQLite migrations remain in `db/migrations`. diff --git a/components/views/snapshots_templ.go b/components/views/snapshots_templ.go index 0ee34b5..25fee17 100644 --- a/components/views/snapshots_templ.go +++ b/components/views/snapshots_templ.go @@ -473,7 +473,7 @@ func VcenterTotalsPage(vcenter string, entries []VcenterTotalsEntry, chart Vcent if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\">
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\">
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } diff --git a/components/views/vm_trace_templ.go b/components/views/vm_trace_templ.go index 7004abe..a49dbd3 100644 --- a/components/views/vm_trace_templ.go +++ b/components/views/vm_trace_templ.go @@ -194,7 +194,7 @@ func VmTracePage(query string, display_query string, vm_id string, vm_uuid strin if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } - templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\">
") + templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\">
") if templ_7745c5c3_Err != nil { return templ_7745c5c3_Err } diff --git a/internal/auth/ldap.go b/internal/auth/ldap.go index 09b5aaf..3a13935 100644 --- a/internal/auth/ldap.go +++ b/internal/auth/ldap.go @@ -35,6 +35,8 @@ type LDAPIdentity struct { Username string UserDN string Groups []string + // Diagnostics contains non-sensitive LDAP processing notes useful for debugging auth decisions. + Diagnostics []string } type LDAPAuthenticator struct { @@ -93,7 +95,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user if err := conn.Bind(username, password); err != nil { if ldap.IsErrorWithCode(err, ldap.LDAPResultInvalidCredentials) { - return LDAPIdentity{}, ErrLDAPInvalidCredentials + return LDAPIdentity{}, fmt.Errorf("%w: ldap bind rejected credentials", ErrLDAPInvalidCredentials) } return LDAPIdentity{}, fmt.Errorf("%w: bind failed: %v", ErrLDAPOperationFailed, err) } @@ -111,6 +113,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user return LDAPIdentity{}, err } if entry != nil { + identity.Diagnostics = append(identity.Diagnostics, "user_entry_found") if strings.TrimSpace(entry.DN) != "" { identity.UserDN = entry.DN } @@ -122,6 +125,8 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user ); v != "" { identity.Username = v } + } else { + identity.Diagnostics = append(identity.Diagnostics, "user_entry_not_found") } groupSet := make(map[string]struct{}) @@ -156,9 +161,15 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user groupSet[dn] = struct{}{} } } + if len(groupEntries.Entries) == 0 { + identity.Diagnostics = append(identity.Diagnostics, "group_search_returned_no_entries") + } + } else { + identity.Diagnostics = append(identity.Diagnostics, fmt.Sprintf("group_search_failed:%v", err)) } identity.Groups = mapKeysSorted(groupSet) + identity.Diagnostics = compactTrimmedStrings(identity.Diagnostics) return identity, nil } diff --git a/plan.md b/plan.md index c55cd3d..deb2041 100644 --- a/plan.md +++ b/plan.md @@ -304,8 +304,11 @@ The target architecture is: ### 3. Phase 3: Postgres-Ready Scale-Up - [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup. - [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls. -- [ ] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data. - - Benchmark harness implemented via `-benchmark-aggregations` and `-benchmark-runs`; production-scale Postgres run pending. +- [x] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data. + - Production-scale Postgres run completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations` with `runs_per_mode=1`, `driver=postgres`). + - Daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster on this run. + - Monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`). + - Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads. - [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins. - [x] If SQL wins, roll out behind a controlled flag before any default switch. diff --git a/server/handler/auth.go b/server/handler/auth.go index 9545b4f..130d372 100644 --- a/server/handler/auth.go +++ b/server/handler/auth.go @@ -4,6 +4,7 @@ import ( "context" "errors" "net/http" + "sort" "strings" "time" "vctp/internal/auth" @@ -15,6 +16,7 @@ import ( const ( authLoginFailureMessage = "invalid username or password" authLoginRequestTimeout = 30 * time.Second + maxDebugLogListItems = 25 ) type ldapAuthenticator interface { @@ -78,6 +80,17 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) { writeJSONError(w, http.StatusBadRequest, "username and password are required") return } + audit.LogAuthEvent(h.Logger, r, "login", "observe", + "reason", "ldap_authentication_start", + "username", username, + "ldap_bind_address", cfg.LDAPBindAddress, + "ldap_base_dn", cfg.LDAPBaseDN, + "ldap_group_requirements", limitStrings(cfg.LDAPGroups, maxDebugLogListItems), + "auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems), + "ldap_insecure", cfg.LDAPInsecure, + "ldap_disable_validation", cfg.LDAPDisableValidation, + "ldap_trust_cert_configured", strings.TrimSpace(cfg.LDAPTrustCertFile) != "", + ) ldapAuth, err := newLDAPAuthenticator(auth.LDAPConfig{ BindAddress: cfg.LDAPBindAddress, @@ -99,23 +112,70 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) { identity, err := ldapAuth.AuthenticateAndFetchGroups(ctx, username, password) if err != nil { if errors.Is(err, auth.ErrLDAPInvalidCredentials) { - audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "invalid_credentials", "username", username) + audit.LogAuthEvent(h.Logger, r, "login", "deny", + "reason", "invalid_credentials", + "username", username, + "ldap_bind_address", cfg.LDAPBindAddress, + "ldap_base_dn", cfg.LDAPBaseDN, + "error", err, + ) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) return } if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { - audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_timeout", "username", username, "error", err) + audit.LogAuthEvent(h.Logger, r, "login", "deny", + "reason", "ldap_timeout", + "username", username, + "ldap_bind_address", cfg.LDAPBindAddress, + "ldap_base_dn", cfg.LDAPBaseDN, + "timeout_seconds", authLoginRequestTimeout.Seconds(), + "error", err, + ) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) return } - audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_authentication_failed", "username", username, "error", err) + audit.LogAuthEvent(h.Logger, r, "login", "deny", + "reason", "ldap_authentication_failed", + "username", username, + "ldap_bind_address", cfg.LDAPBindAddress, + "ldap_base_dn", cfg.LDAPBaseDN, + "error", err, + ) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) return } + audit.LogAuthEvent(h.Logger, r, "login", "observe", + "reason", "ldap_authentication_succeeded", + "username", username, + "ldap_identity_username", identity.Username, + "ldap_user_dn", identity.UserDN, + "ldap_group_count", len(identity.Groups), + "ldap_groups", limitStrings(identity.Groups, maxDebugLogListItems), + "ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems), + ) roles := auth.ResolveRoles(identity.Groups, cfg.AuthGroupRoleMappings) - if !auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups) || len(roles) == 0 { - audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "group_or_role_denied", "username", username, "group_count", len(identity.Groups), "resolved_roles", roles) + hasRequiredGroup := auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups) + audit.LogAuthEvent(h.Logger, r, "login", "observe", + "reason", "authorization_evaluation", + "username", username, + "has_required_group", hasRequiredGroup, + "required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems), + "user_groups", limitStrings(identity.Groups, maxDebugLogListItems), + "resolved_roles", roles, + "auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems), + ) + if !hasRequiredGroup || len(roles) == 0 { + audit.LogAuthEvent(h.Logger, r, "login", "deny", + "reason", "group_or_role_denied", + "username", username, + "group_count", len(identity.Groups), + "has_required_group", hasRequiredGroup, + "required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems), + "user_groups", limitStrings(identity.Groups, maxDebugLogListItems), + "resolved_roles", roles, + "ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems), + ) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) return } @@ -191,3 +251,49 @@ func (h *Handler) AuthMe(w http.ResponseWriter, r *http.Request) { TokenID: claims.ID, }) } + +func sortedStringMapKeys(values map[string]string) []string { + if len(values) == 0 { + return nil + } + keys := make([]string, 0, len(values)) + for key := range values { + key = strings.TrimSpace(key) + if key == "" { + continue + } + keys = append(keys, key) + } + if len(keys) == 0 { + return nil + } + sort.Strings(keys) + return keys +} + +func limitStrings(values []string, maxItems int) []string { + if len(values) == 0 { + return nil + } + if maxItems <= 0 || len(values) <= maxItems { + out := make([]string, 0, len(values)) + for _, value := range values { + value = strings.TrimSpace(value) + if value == "" { + continue + } + out = append(out, value) + } + return out + } + out := make([]string, 0, maxItems+1) + for _, value := range values[:maxItems] { + value = strings.TrimSpace(value) + if value == "" { + continue + } + out = append(out, value) + } + out = append(out, "...") + return out +}