more auth logging
continuous-integration/drone/push Build is passing

This commit is contained in:
Nathan Coad
2026-04-21 10:35:10 +10:00
parent 2c3167a1a0
commit 361ba7719b
6 changed files with 204 additions and 10 deletions
+74
View File
@@ -211,6 +211,80 @@ Validate connectivity before starting vCTP:
psql "postgres://vctp_user:change-this-password@db-hostname:5432/vctp?sslmode=disable" psql "postgres://vctp_user:change-this-password@db-hostname:5432/vctp?sslmode=disable"
``` ```
### PostgreSQL tuning baseline (20 vCPU / 64 GB host)
If your PostgreSQL instance is still running near-default settings, use this as a practical starting profile for vCTP workloads (hourly ingest + daily/monthly aggregation).
Choose one profile:
- Dedicated DB host (PostgreSQL is the primary service on this machine): use the `dedicated` values.
- Shared host (vCTP app + PostgreSQL on same machine): use the `shared` values.
Recommended `postgresql.conf` starting points:
```conf
# Memory
shared_buffers = 16GB # dedicated
# shared_buffers = 12GB # shared
effective_cache_size = 48GB # dedicated
# effective_cache_size = 36GB # shared
work_mem = 32MB # dedicated
# work_mem = 16MB # shared
maintenance_work_mem = 2GB # dedicated
# maintenance_work_mem = 1GB # shared
# WAL / checkpoints
wal_compression = on
checkpoint_timeout = 15min
checkpoint_completion_target = 0.9
max_wal_size = 16GB
min_wal_size = 2GB
# Parallelism and connections
max_connections = 120
max_worker_processes = 20
max_parallel_workers = 20
max_parallel_workers_per_gather = 4
max_parallel_maintenance_workers = 4
# Planner / IO (SSD/NVMe)
random_page_cost = 1.1
effective_io_concurrency = 200
default_statistics_target = 200
# Autovacuum for high-write canonical tables
autovacuum_max_workers = 6
autovacuum_naptime = 30s
autovacuum_vacuum_scale_factor = 0.02
autovacuum_analyze_scale_factor = 0.01
autovacuum_vacuum_cost_limit = 2000
# Useful diagnostics
track_io_timing = on
log_temp_files = 32MB
```
Apply and validate:
- Reload config (`SELECT pg_reload_conf();`) or restart PostgreSQL if required by your platform.
- Confirm active values with:
```sql
SHOW shared_buffers;
SHOW effective_cache_size;
SHOW work_mem;
SHOW maintenance_work_mem;
SHOW max_wal_size;
SHOW autovacuum_vacuum_scale_factor;
```
After tuning, rerun the canonical benchmark and compare against your pre-tuning snapshot:
```shell
vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3
```
Notes:
- `work_mem` is per sort/hash operation, not per session; avoid setting it too high globally.
- Keep `settings.scheduled_aggregation_engine: go` as default unless repeated production-scale benchmarks show SQL is consistently faster on your canonical Postgres data.
PostgreSQL migrations live in `db/migrations_postgres`, while SQLite migrations remain in PostgreSQL migrations live in `db/migrations_postgres`, while SQLite migrations remain in
`db/migrations`. `db/migrations`.
+1 -1
View File
@@ -473,7 +473,7 @@ func VcenterTotalsPage(vcenter string, entries []VcenterTotalsEntry, chart Vcent
if templ_7745c5c3_Err != nil { if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err return templ_7745c5c3_Err
} }
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>") templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
if templ_7745c5c3_Err != nil { if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err return templ_7745c5c3_Err
} }
+1 -1
View File
@@ -194,7 +194,7 @@ func VmTracePage(query string, display_query string, vm_id string, vm_uuid strin
if templ_7745c5c3_Err != nil { if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err return templ_7745c5c3_Err
} }
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>") templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
if templ_7745c5c3_Err != nil { if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err return templ_7745c5c3_Err
} }
+12 -1
View File
@@ -35,6 +35,8 @@ type LDAPIdentity struct {
Username string Username string
UserDN string UserDN string
Groups []string Groups []string
// Diagnostics contains non-sensitive LDAP processing notes useful for debugging auth decisions.
Diagnostics []string
} }
type LDAPAuthenticator struct { type LDAPAuthenticator struct {
@@ -93,7 +95,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
if err := conn.Bind(username, password); err != nil { if err := conn.Bind(username, password); err != nil {
if ldap.IsErrorWithCode(err, ldap.LDAPResultInvalidCredentials) { if ldap.IsErrorWithCode(err, ldap.LDAPResultInvalidCredentials) {
return LDAPIdentity{}, ErrLDAPInvalidCredentials return LDAPIdentity{}, fmt.Errorf("%w: ldap bind rejected credentials", ErrLDAPInvalidCredentials)
} }
return LDAPIdentity{}, fmt.Errorf("%w: bind failed: %v", ErrLDAPOperationFailed, err) return LDAPIdentity{}, fmt.Errorf("%w: bind failed: %v", ErrLDAPOperationFailed, err)
} }
@@ -111,6 +113,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
return LDAPIdentity{}, err return LDAPIdentity{}, err
} }
if entry != nil { if entry != nil {
identity.Diagnostics = append(identity.Diagnostics, "user_entry_found")
if strings.TrimSpace(entry.DN) != "" { if strings.TrimSpace(entry.DN) != "" {
identity.UserDN = entry.DN identity.UserDN = entry.DN
} }
@@ -122,6 +125,8 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
); v != "" { ); v != "" {
identity.Username = v identity.Username = v
} }
} else {
identity.Diagnostics = append(identity.Diagnostics, "user_entry_not_found")
} }
groupSet := make(map[string]struct{}) groupSet := make(map[string]struct{})
@@ -156,9 +161,15 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
groupSet[dn] = struct{}{} groupSet[dn] = struct{}{}
} }
} }
if len(groupEntries.Entries) == 0 {
identity.Diagnostics = append(identity.Diagnostics, "group_search_returned_no_entries")
}
} else {
identity.Diagnostics = append(identity.Diagnostics, fmt.Sprintf("group_search_failed:%v", err))
} }
identity.Groups = mapKeysSorted(groupSet) identity.Groups = mapKeysSorted(groupSet)
identity.Diagnostics = compactTrimmedStrings(identity.Diagnostics)
return identity, nil return identity, nil
} }
+5 -2
View File
@@ -304,8 +304,11 @@ The target architecture is:
### 3. Phase 3: Postgres-Ready Scale-Up ### 3. Phase 3: Postgres-Ready Scale-Up
- [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup. - [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup.
- [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls. - [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls.
- [ ] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data. - [x] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
- Benchmark harness implemented via `-benchmark-aggregations` and `-benchmark-runs`; production-scale Postgres run pending. - Production-scale Postgres run completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations` with `runs_per_mode=1`, `driver=postgres`).
- Daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster on this run.
- Monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`).
- Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads.
- [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins. - [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins.
- [x] If SQL wins, roll out behind a controlled flag before any default switch. - [x] If SQL wins, roll out behind a controlled flag before any default switch.
+111 -5
View File
@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"net/http" "net/http"
"sort"
"strings" "strings"
"time" "time"
"vctp/internal/auth" "vctp/internal/auth"
@@ -15,6 +16,7 @@ import (
const ( const (
authLoginFailureMessage = "invalid username or password" authLoginFailureMessage = "invalid username or password"
authLoginRequestTimeout = 30 * time.Second authLoginRequestTimeout = 30 * time.Second
maxDebugLogListItems = 25
) )
type ldapAuthenticator interface { type ldapAuthenticator interface {
@@ -78,6 +80,17 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
writeJSONError(w, http.StatusBadRequest, "username and password are required") writeJSONError(w, http.StatusBadRequest, "username and password are required")
return return
} }
audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "ldap_authentication_start",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"ldap_group_requirements", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
"ldap_insecure", cfg.LDAPInsecure,
"ldap_disable_validation", cfg.LDAPDisableValidation,
"ldap_trust_cert_configured", strings.TrimSpace(cfg.LDAPTrustCertFile) != "",
)
ldapAuth, err := newLDAPAuthenticator(auth.LDAPConfig{ ldapAuth, err := newLDAPAuthenticator(auth.LDAPConfig{
BindAddress: cfg.LDAPBindAddress, BindAddress: cfg.LDAPBindAddress,
@@ -99,23 +112,70 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
identity, err := ldapAuth.AuthenticateAndFetchGroups(ctx, username, password) identity, err := ldapAuth.AuthenticateAndFetchGroups(ctx, username, password)
if err != nil { if err != nil {
if errors.Is(err, auth.ErrLDAPInvalidCredentials) { if errors.Is(err, auth.ErrLDAPInvalidCredentials) {
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "invalid_credentials", "username", username) audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "invalid_credentials",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return return
} }
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_timeout", "username", username, "error", err) audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "ldap_timeout",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"timeout_seconds", authLoginRequestTimeout.Seconds(),
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return return
} }
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_authentication_failed", "username", username, "error", err) audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "ldap_authentication_failed",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return return
} }
audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "ldap_authentication_succeeded",
"username", username,
"ldap_identity_username", identity.Username,
"ldap_user_dn", identity.UserDN,
"ldap_group_count", len(identity.Groups),
"ldap_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
)
roles := auth.ResolveRoles(identity.Groups, cfg.AuthGroupRoleMappings) roles := auth.ResolveRoles(identity.Groups, cfg.AuthGroupRoleMappings)
if !auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups) || len(roles) == 0 { hasRequiredGroup := auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups)
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "group_or_role_denied", "username", username, "group_count", len(identity.Groups), "resolved_roles", roles) audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "authorization_evaluation",
"username", username,
"has_required_group", hasRequiredGroup,
"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"resolved_roles", roles,
"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
)
if !hasRequiredGroup || len(roles) == 0 {
audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "group_or_role_denied",
"username", username,
"group_count", len(identity.Groups),
"has_required_group", hasRequiredGroup,
"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"resolved_roles", roles,
"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage) writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return return
} }
@@ -191,3 +251,49 @@ func (h *Handler) AuthMe(w http.ResponseWriter, r *http.Request) {
TokenID: claims.ID, TokenID: claims.ID,
}) })
} }
func sortedStringMapKeys(values map[string]string) []string {
if len(values) == 0 {
return nil
}
keys := make([]string, 0, len(values))
for key := range values {
key = strings.TrimSpace(key)
if key == "" {
continue
}
keys = append(keys, key)
}
if len(keys) == 0 {
return nil
}
sort.Strings(keys)
return keys
}
func limitStrings(values []string, maxItems int) []string {
if len(values) == 0 {
return nil
}
if maxItems <= 0 || len(values) <= maxItems {
out := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
continue
}
out = append(out, value)
}
return out
}
out := make([]string, 0, maxItems+1)
for _, value := range values[:maxItems] {
value = strings.TrimSpace(value)
if value == "" {
continue
}
out = append(out, value)
}
out = append(out, "...")
return out
}