more auth logging
continuous-integration/drone/push Build is passing

This commit is contained in:
Nathan Coad
2026-04-21 10:35:10 +10:00
parent 2c3167a1a0
commit 361ba7719b
6 changed files with 204 additions and 10 deletions
+74
View File
@@ -211,6 +211,80 @@ Validate connectivity before starting vCTP:
psql "postgres://vctp_user:change-this-password@db-hostname:5432/vctp?sslmode=disable"
```
### PostgreSQL tuning baseline (20 vCPU / 64 GB host)
If your PostgreSQL instance is still running near-default settings, use this as a practical starting profile for vCTP workloads (hourly ingest + daily/monthly aggregation).
Choose one profile:
- Dedicated DB host (PostgreSQL is the primary service on this machine): use the `dedicated` values.
- Shared host (vCTP app + PostgreSQL on same machine): use the `shared` values.
Recommended `postgresql.conf` starting points:
```conf
# Memory
shared_buffers = 16GB # dedicated
# shared_buffers = 12GB # shared
effective_cache_size = 48GB # dedicated
# effective_cache_size = 36GB # shared
work_mem = 32MB # dedicated
# work_mem = 16MB # shared
maintenance_work_mem = 2GB # dedicated
# maintenance_work_mem = 1GB # shared
# WAL / checkpoints
wal_compression = on
checkpoint_timeout = 15min
checkpoint_completion_target = 0.9
max_wal_size = 16GB
min_wal_size = 2GB
# Parallelism and connections
max_connections = 120
max_worker_processes = 20
max_parallel_workers = 20
max_parallel_workers_per_gather = 4
max_parallel_maintenance_workers = 4
# Planner / IO (SSD/NVMe)
random_page_cost = 1.1
effective_io_concurrency = 200
default_statistics_target = 200
# Autovacuum for high-write canonical tables
autovacuum_max_workers = 6
autovacuum_naptime = 30s
autovacuum_vacuum_scale_factor = 0.02
autovacuum_analyze_scale_factor = 0.01
autovacuum_vacuum_cost_limit = 2000
# Useful diagnostics
track_io_timing = on
log_temp_files = 32MB
```
Apply and validate:
- Reload config (`SELECT pg_reload_conf();`) or restart PostgreSQL if required by your platform.
- Confirm active values with:
```sql
SHOW shared_buffers;
SHOW effective_cache_size;
SHOW work_mem;
SHOW maintenance_work_mem;
SHOW max_wal_size;
SHOW autovacuum_vacuum_scale_factor;
```
After tuning, rerun the canonical benchmark and compare against your pre-tuning snapshot:
```shell
vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3
```
Notes:
- `work_mem` is per sort/hash operation, not per session; avoid setting it too high globally.
- Keep `settings.scheduled_aggregation_engine: go` as default unless repeated production-scale benchmarks show SQL is consistently faster on your canonical Postgres data.
PostgreSQL migrations live in `db/migrations_postgres`, while SQLite migrations remain in
`db/migrations`.
+1 -1
View File
@@ -473,7 +473,7 @@ func VcenterTotalsPage(vcenter string, entries []VcenterTotalsEntry, chart Vcent
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 26, "\"></canvas><div id=\"vcenter-totals-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vcenter-totals-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vcenter-totals-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
+1 -1
View File
@@ -194,7 +194,7 @@ func VmTracePage(query string, display_query string, vm_id string, vm_uuid strin
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\n\t\t\t\t\t\t\t\t})\n\t\t\t\t\t\t\t</script></div>")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 10, "\"></canvas><div id=\"vm-trace-tooltip\" class=\"web3-chart-tooltip\" aria-hidden=\"true\"></div></div><script>\r\n\t\t\t\t\t\t\t\twindow.Web3Charts.renderFromDataset({\r\n\t\t\t\t\t\t\t\t\tcanvasId: \"vm-trace-chart\",\r\n\t\t\t\t\t\t\t\t\ttooltipId: \"vm-trace-tooltip\",\r\n\t\t\t\t\t\t\t\t})\r\n\t\t\t\t\t\t\t</script></div>")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
+12 -1
View File
@@ -35,6 +35,8 @@ type LDAPIdentity struct {
Username string
UserDN string
Groups []string
// Diagnostics contains non-sensitive LDAP processing notes useful for debugging auth decisions.
Diagnostics []string
}
type LDAPAuthenticator struct {
@@ -93,7 +95,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
if err := conn.Bind(username, password); err != nil {
if ldap.IsErrorWithCode(err, ldap.LDAPResultInvalidCredentials) {
return LDAPIdentity{}, ErrLDAPInvalidCredentials
return LDAPIdentity{}, fmt.Errorf("%w: ldap bind rejected credentials", ErrLDAPInvalidCredentials)
}
return LDAPIdentity{}, fmt.Errorf("%w: bind failed: %v", ErrLDAPOperationFailed, err)
}
@@ -111,6 +113,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
return LDAPIdentity{}, err
}
if entry != nil {
identity.Diagnostics = append(identity.Diagnostics, "user_entry_found")
if strings.TrimSpace(entry.DN) != "" {
identity.UserDN = entry.DN
}
@@ -122,6 +125,8 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
); v != "" {
identity.Username = v
}
} else {
identity.Diagnostics = append(identity.Diagnostics, "user_entry_not_found")
}
groupSet := make(map[string]struct{})
@@ -156,9 +161,15 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
groupSet[dn] = struct{}{}
}
}
if len(groupEntries.Entries) == 0 {
identity.Diagnostics = append(identity.Diagnostics, "group_search_returned_no_entries")
}
} else {
identity.Diagnostics = append(identity.Diagnostics, fmt.Sprintf("group_search_failed:%v", err))
}
identity.Groups = mapKeysSorted(groupSet)
identity.Diagnostics = compactTrimmedStrings(identity.Diagnostics)
return identity, nil
}
+5 -2
View File
@@ -304,8 +304,11 @@ The target architecture is:
### 3. Phase 3: Postgres-Ready Scale-Up
- [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup.
- [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls.
- [ ] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
- Benchmark harness implemented via `-benchmark-aggregations` and `-benchmark-runs`; production-scale Postgres run pending.
- [x] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
- Production-scale Postgres run completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations` with `runs_per_mode=1`, `driver=postgres`).
- Daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster on this run.
- Monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`).
- Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads.
- [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins.
- [x] If SQL wins, roll out behind a controlled flag before any default switch.
+111 -5
View File
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"net/http"
"sort"
"strings"
"time"
"vctp/internal/auth"
@@ -15,6 +16,7 @@ import (
const (
authLoginFailureMessage = "invalid username or password"
authLoginRequestTimeout = 30 * time.Second
maxDebugLogListItems = 25
)
type ldapAuthenticator interface {
@@ -78,6 +80,17 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
writeJSONError(w, http.StatusBadRequest, "username and password are required")
return
}
audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "ldap_authentication_start",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"ldap_group_requirements", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
"ldap_insecure", cfg.LDAPInsecure,
"ldap_disable_validation", cfg.LDAPDisableValidation,
"ldap_trust_cert_configured", strings.TrimSpace(cfg.LDAPTrustCertFile) != "",
)
ldapAuth, err := newLDAPAuthenticator(auth.LDAPConfig{
BindAddress: cfg.LDAPBindAddress,
@@ -99,23 +112,70 @@ func (h *Handler) AuthLogin(w http.ResponseWriter, r *http.Request) {
identity, err := ldapAuth.AuthenticateAndFetchGroups(ctx, username, password)
if err != nil {
if errors.Is(err, auth.ErrLDAPInvalidCredentials) {
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "invalid_credentials", "username", username)
audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "invalid_credentials",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return
}
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_timeout", "username", username, "error", err)
audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "ldap_timeout",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"timeout_seconds", authLoginRequestTimeout.Seconds(),
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return
}
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "ldap_authentication_failed", "username", username, "error", err)
audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "ldap_authentication_failed",
"username", username,
"ldap_bind_address", cfg.LDAPBindAddress,
"ldap_base_dn", cfg.LDAPBaseDN,
"error", err,
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return
}
audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "ldap_authentication_succeeded",
"username", username,
"ldap_identity_username", identity.Username,
"ldap_user_dn", identity.UserDN,
"ldap_group_count", len(identity.Groups),
"ldap_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
)
roles := auth.ResolveRoles(identity.Groups, cfg.AuthGroupRoleMappings)
if !auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups) || len(roles) == 0 {
audit.LogAuthEvent(h.Logger, r, "login", "deny", "reason", "group_or_role_denied", "username", username, "group_count", len(identity.Groups), "resolved_roles", roles)
hasRequiredGroup := auth.HasAnyGroup(identity.Groups, cfg.LDAPGroups)
audit.LogAuthEvent(h.Logger, r, "login", "observe",
"reason", "authorization_evaluation",
"username", username,
"has_required_group", hasRequiredGroup,
"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"resolved_roles", roles,
"auth_group_role_mapping_keys", limitStrings(sortedStringMapKeys(cfg.AuthGroupRoleMappings), maxDebugLogListItems),
)
if !hasRequiredGroup || len(roles) == 0 {
audit.LogAuthEvent(h.Logger, r, "login", "deny",
"reason", "group_or_role_denied",
"username", username,
"group_count", len(identity.Groups),
"has_required_group", hasRequiredGroup,
"required_groups", limitStrings(cfg.LDAPGroups, maxDebugLogListItems),
"user_groups", limitStrings(identity.Groups, maxDebugLogListItems),
"resolved_roles", roles,
"ldap_diagnostics", limitStrings(identity.Diagnostics, maxDebugLogListItems),
)
writeJSONError(w, http.StatusUnauthorized, authLoginFailureMessage)
return
}
@@ -191,3 +251,49 @@ func (h *Handler) AuthMe(w http.ResponseWriter, r *http.Request) {
TokenID: claims.ID,
})
}
func sortedStringMapKeys(values map[string]string) []string {
if len(values) == 0 {
return nil
}
keys := make([]string, 0, len(values))
for key := range values {
key = strings.TrimSpace(key)
if key == "" {
continue
}
keys = append(keys, key)
}
if len(keys) == 0 {
return nil
}
sort.Strings(keys)
return keys
}
func limitStrings(values []string, maxItems int) []string {
if len(values) == 0 {
return nil
}
if maxItems <= 0 || len(values) <= maxItems {
out := make([]string, 0, len(values))
for _, value := range values {
value = strings.TrimSpace(value)
if value == "" {
continue
}
out = append(out, value)
}
return out
}
out := make([]string, 0, maxItems+1)
for _, value := range values[:maxItems] {
value = strings.TrimSpace(value)
if value == "" {
continue
}
out = append(out, value)
}
out = append(out, "...")
return out
}