From 4b1b9858628aa3a111c98ea0d6f1cd388a9bd10b Mon Sep 17 00:00:00 2001
From: Nathan Coad <nathan.coad@dell.com>
Date: Tue, 21 Apr 2026 11:00:40 +1000
Subject: [PATCH] update ldap

---
 README.md                  |   3 +
 internal/auth/ldap.go      | 148 ++++++++++++++++++++++++++++---------
 internal/auth/ldap_test.go |  49 ++++++++++++
 plan.md                    |  13 +++-
 4 files changed, 174 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index f32aed4..e02bda4 100644
--- a/README.md
+++ b/README.md
@@ -128,6 +128,9 @@ The benchmark command:
 - Run the benchmark on the target environment and database profile before deciding defaults:
   - `vctp -settings /path/to/vctp.yml -benchmark-aggregations -benchmark-runs 3`
 - Current local comparison snapshot (2026-04-20) is recorded in `phase-metrics-2026-04-20.md`.
+- Latest tuned Postgres snapshot (2026-04-21, `runs=3`) showed:
+  - Daily window (`2026-04-21` to `2026-04-22` UTC): Go avg `2.261369712s` vs SQL avg `1m31.738727387s` (Go ~`40.57x` faster).
+  - Monthly window (`2026-04-01` to `2026-05-01` UTC): Go avg `3.705308832s` vs SQL avg `3.065612298s` (SQL ~`1.21x` faster).
 - Default-path decision remains `settings.scheduled_aggregation_engine: go`.
 - Promote SQL only when representative production-scale **Postgres** runs show clear, repeatable wins.
 
diff --git a/internal/auth/ldap.go b/internal/auth/ldap.go
index 3a13935..4a16a7a 100644
--- a/internal/auth/ldap.go
+++ b/internal/auth/ldap.go
@@ -107,13 +107,24 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 		Username: username,
 		UserDN:   username,
 	}
+	if whoami, err := conn.WhoAmI(nil); err != nil {
+		identity.Diagnostics = append(identity.Diagnostics, fmt.Sprintf("whoami_failed:%v", err))
+	} else if boundDN := strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(whoami.AuthzID), "dn:")); boundDN != "" {
+		identity.UserDN = boundDN
+		identity.Diagnostics = append(identity.Diagnostics, "whoami_dn_resolved")
+	} else {
+		identity.Diagnostics = append(identity.Diagnostics, "whoami_dn_empty")
+	}
 
-	entry, err := a.lookupUserEntry(conn, username)
+	entry, lookupStrategy, err := a.lookupUserEntry(conn, username, identity.UserDN)
 	if err != nil {
 		return LDAPIdentity{}, err
 	}
 	if entry != nil {
-		identity.Diagnostics = append(identity.Diagnostics, "user_entry_found")
+		if lookupStrategy == "" {
+			lookupStrategy = "unknown"
+		}
+		identity.Diagnostics = append(identity.Diagnostics, "user_entry_found:"+lookupStrategy)
 		if strings.TrimSpace(entry.DN) != "" {
 			identity.UserDN = entry.DN
 		}
@@ -140,6 +151,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 		}
 	}
 
+	groupFilter := buildGroupMembershipFilter(identity.UserDN, principalCandidates(username))
 	groupEntries, err := conn.Search(ldap.NewSearchRequest(
 		a.baseDN,
 		ldap.ScopeWholeSubtree,
@@ -147,11 +159,7 @@ func (a *LDAPAuthenticator) AuthenticateAndFetchGroups(ctx context.Context, user
 		0,
 		0,
 		false,
-		fmt.Sprintf("(|(member=%s)(uniqueMember=%s)(memberUid=%s))",
-			ldap.EscapeFilter(identity.UserDN),
-			ldap.EscapeFilter(identity.UserDN),
-			ldap.EscapeFilter(username),
-		),
+		groupFilter,
 		[]string{"dn"},
 		nil,
 	))
@@ -272,10 +280,24 @@ func (a *LDAPAuthenticator) buildTLSConfig() (*tls.Config, error) {
 	return tlsConfig, nil
 }
 
-func (a *LDAPAuthenticator) lookupUserEntry(conn *ldap.Conn, username string) (*ldap.Entry, error) {
+func (a *LDAPAuthenticator) lookupUserEntry(conn *ldap.Conn, username string, userDNHint string) (*ldap.Entry, string, error) {
+	dnCandidates := compactTrimmedStrings([]string{userDNHint})
 	if looksLikeDN(username) {
+		dnCandidates = append(dnCandidates, strings.TrimSpace(username))
+	}
+	seenDN := make(map[string]struct{}, len(dnCandidates))
+	for _, dn := range dnCandidates {
+		key := normalizeDN(dn)
+		if key == "" {
+			continue
+		}
+		if _, ok := seenDN[key]; ok {
+			continue
+		}
+		seenDN[key] = struct{}{}
+
 		searchRes, err := conn.Search(ldap.NewSearchRequest(
-			username,
+			dn,
 			ldap.ScopeBaseObject,
 			ldap.NeverDerefAliases,
 			1,
@@ -286,37 +308,41 @@ func (a *LDAPAuthenticator) lookupUserEntry(conn *ldap.Conn, username string) (*
 			nil,
 		))
 		if err != nil {
-			return nil, fmt.Errorf("%w: unable to load user entry: %v", ErrLDAPOperationFailed, err)
+			if ldap.IsErrorWithCode(err, ldap.LDAPResultNoSuchObject) {
+				continue
+			}
+			return nil, "", fmt.Errorf("%w: unable to load user entry by dn: %v", ErrLDAPOperationFailed, err)
 		}
-		if len(searchRes.Entries) == 0 {
-			return nil, nil
+		if len(searchRes.Entries) > 0 {
+			return searchRes.Entries[0], "dn", nil
 		}
-		return searchRes.Entries[0], nil
 	}
 
-	searchRes, err := conn.Search(ldap.NewSearchRequest(
-		a.baseDN,
-		ldap.ScopeWholeSubtree,
-		ldap.NeverDerefAliases,
-		2,
-		0,
-		false,
-		fmt.Sprintf("(|(uid=%s)(cn=%s)(sAMAccountName=%s)(userPrincipalName=%s))",
-			ldap.EscapeFilter(username),
-			ldap.EscapeFilter(username),
-			ldap.EscapeFilter(username),
-			ldap.EscapeFilter(username),
-		),
-		[]string{"uid", "sAMAccountName", "userPrincipalName", "cn", "memberOf"},
-		nil,
-	))
-	if err != nil {
-		return nil, fmt.Errorf("%w: user lookup failed: %v", ErrLDAPOperationFailed, err)
+	for _, principal := range principalCandidates(username) {
+		searchRes, err := conn.Search(ldap.NewSearchRequest(
+			a.baseDN,
+			ldap.ScopeWholeSubtree,
+			ldap.NeverDerefAliases,
+			2,
+			0,
+			false,
+			fmt.Sprintf("(|(uid=%s)(cn=%s)(sAMAccountName=%s)(userPrincipalName=%s))",
+				ldap.EscapeFilter(principal),
+				ldap.EscapeFilter(principal),
+				ldap.EscapeFilter(principal),
+				ldap.EscapeFilter(principal),
+			),
+			[]string{"uid", "sAMAccountName", "userPrincipalName", "cn", "memberOf"},
+			nil,
+		))
+		if err != nil {
+			return nil, "", fmt.Errorf("%w: user lookup failed: %v", ErrLDAPOperationFailed, err)
+		}
+		if len(searchRes.Entries) > 0 {
+			return searchRes.Entries[0], "principal", nil
+		}
 	}
-	if len(searchRes.Entries) == 0 {
-		return nil, nil
-	}
-	return searchRes.Entries[0], nil
+	return nil, "", nil
 }
 
 func normalizeDN(value string) string {
@@ -352,6 +378,58 @@ func looksLikeDN(value string) bool {
 	return strings.Contains(value, "=") && strings.Contains(value, ",")
 }
 
+func principalCandidates(username string) []string {
+	username = strings.TrimSpace(username)
+	if username == "" {
+		return nil
+	}
+
+	seen := make(map[string]struct{}, 4)
+	candidates := make([]string, 0, 4)
+	add := func(value string) {
+		value = strings.TrimSpace(value)
+		if value == "" {
+			return
+		}
+		key := strings.ToLower(value)
+		if _, ok := seen[key]; ok {
+			return
+		}
+		seen[key] = struct{}{}
+		candidates = append(candidates, value)
+	}
+
+	add(username)
+	if idx := strings.LastIndex(username, `\`); idx >= 0 && idx < len(username)-1 {
+		add(username[idx+1:])
+	}
+	if idx := strings.Index(username, "@"); idx > 0 {
+		add(username[:idx])
+	}
+
+	return candidates
+}
+
+func buildGroupMembershipFilter(userDN string, principals []string) string {
+	clauses := make([]string, 0, 2+len(principals))
+	userDN = strings.TrimSpace(userDN)
+	if userDN != "" {
+		escapedDN := ldap.EscapeFilter(userDN)
+		clauses = append(clauses, "(member="+escapedDN+")", "(uniqueMember="+escapedDN+")")
+	}
+	for _, principal := range principals {
+		principal = strings.TrimSpace(principal)
+		if principal == "" {
+			continue
+		}
+		clauses = append(clauses, "(memberUid="+ldap.EscapeFilter(principal)+")")
+	}
+	if len(clauses) == 0 {
+		return "(objectClass=group)"
+	}
+	return "(|" + strings.Join(clauses, "") + ")"
+}
+
 func ctxErr(ctx context.Context) error {
 	if ctx == nil {
 		return nil
diff --git a/internal/auth/ldap_test.go b/internal/auth/ldap_test.go
index 586c75b..039840d 100644
--- a/internal/auth/ldap_test.go
+++ b/internal/auth/ldap_test.go
@@ -37,3 +37,52 @@ func TestHasAnyGroup(t *testing.T) {
 		t.Fatal("expected empty required groups to allow")
 	}
 }
+
+func TestPrincipalCandidates(t *testing.T) {
+	tests := []struct {
+		name     string
+		username string
+		want     []string
+	}{
+		{
+			name:     "upn adds local part",
+			username: "L075239@corpau.wbcau.westpac.com.au",
+			want:     []string{"L075239@corpau.wbcau.westpac.com.au", "L075239"},
+		},
+		{
+			name:     "domain slash user adds sam",
+			username: `CORPAU\L075239`,
+			want:     []string{`CORPAU\L075239`, "L075239"},
+		},
+		{
+			name:     "plain username unchanged",
+			username: "L075239",
+			want:     []string{"L075239"},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			got := principalCandidates(tc.username)
+			if len(got) != len(tc.want) {
+				t.Fatalf("unexpected candidate count: got=%d want=%d (%#v)", len(got), len(tc.want), got)
+			}
+			for i := range tc.want {
+				if got[i] != tc.want[i] {
+					t.Fatalf("unexpected candidate at %d: got=%q want=%q", i, got[i], tc.want[i])
+				}
+			}
+		})
+	}
+}
+
+func TestBuildGroupMembershipFilter(t *testing.T) {
+	filter := buildGroupMembershipFilter(
+		"CN=User,OU=Users,DC=corpau,DC=wbcau,DC=westpac,DC=com,DC=au",
+		[]string{"L075239@corpau.wbcau.westpac.com.au", "L075239"},
+	)
+	expected := "(|(member=CN=User,OU=Users,DC=corpau,DC=wbcau,DC=westpac,DC=com,DC=au)(uniqueMember=CN=User,OU=Users,DC=corpau,DC=wbcau,DC=westpac,DC=com,DC=au)(memberUid=L075239@corpau.wbcau.westpac.com.au)(memberUid=L075239))"
+	if filter != expected {
+		t.Fatalf("unexpected group filter:\n got: %s\nwant: %s", filter, expected)
+	}
+}
diff --git a/plan.md b/plan.md
index deb2041..c262d21 100644
--- a/plan.md
+++ b/plan.md
@@ -305,10 +305,15 @@ The target architecture is:
 - [x] Validate/add canonical `vm_hourly_stats` indexes for snapshot time, vCenter+time, VM identity+time, and trace lookup.
 - [x] Add PostgreSQL monthly partitioning for `vm_hourly_stats` behind migration controls.
 - [x] Benchmark Go vs SQL on canonical Postgres tables using representative production-scale data.
-  - Production-scale Postgres run completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations` with `runs_per_mode=1`, `driver=postgres`).
-  - Daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster on this run.
-  - Monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`).
-  - Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads.
+  - Production-scale Postgres benchmark runs completed on 2026-04-21 via one-shot canonical benchmark (`-benchmark-aggregations`, `driver=postgres`, with `runs_per_mode=1` and `runs_per_mode=3`).
+  - Run A (pre-tuning), daily window `2026-04-20T00:00:00Z` to `2026-04-21T00:00:00Z`: Go `4.000602432s` (`14881` rows) vs SQL `1h17m19.039092561s` (`14920` rows), with Go ~`1159.59x` faster.
+  - Run A (pre-tuning), monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.529410947s` (`15871` rows) vs SQL `3.313037973s` (`15873` rows), near parity with SQL slightly faster (~`0.216s`, `6.1%`).
+  - Run B (after PostgreSQL tuning), daily window `2026-04-21T00:00:00Z` to `2026-04-22T00:00:00Z`: Go `2.277889486s` (`14831` rows) vs SQL `1m31.273491543s` (`14839` rows), with Go still ~`40.07x` faster.
+  - Run B (after PostgreSQL tuning), monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go `3.947474215s` (`15871` rows) vs SQL `2.758716002s` (`15873` rows), with SQL ~`1.43x` faster.
+  - Run C (after PostgreSQL tuning, `runs=3`), daily window `2026-04-21T00:00:00Z` to `2026-04-22T00:00:00Z`: Go avg `2.261369712s` (min `2.169537168s`, median `2.191474445s`, max `2.423097524s`, rows `14831`) vs SQL avg `1m31.738727387s` (min `1m29.960115863s`, median `1m32.068576507s`, max `1m33.187489791s`, rows `14839`), with Go ~`40.57x` faster by average.
+  - Run C (after PostgreSQL tuning, `runs=3`), monthly window `2026-04-01T00:00:00Z` to `2026-05-01T00:00:00Z`: Go avg `3.705308832s` (min `3.696553751s`, median `3.70776704s`, max `3.711605706s`, rows `15871`) vs SQL avg `3.065612298s` (min `2.873749798s`, median `3.022090149s`, max `3.300996948s`, rows `15873`), with SQL ~`1.21x` faster by average (~`17.26%` faster than Go).
+  - Tuning impact between Run A and Run B: daily SQL improved ~`50.83x`, daily Go improved ~`1.76x`, monthly SQL improved ~`1.20x`, and monthly Go regressed (~`0.89x` of prior speed).
+  - Decision remains unchanged: keep Go as scheduled default and treat SQL as fallback/backfill until SQL shows a clear, repeatable runtime win across canonical workloads, especially on daily windows (where Go remains consistently dominant across runs).
 - [x] Keep Go as scheduled default unless SQL shows clear and repeatable runtime wins.
 - [x] If SQL wins, roll out behind a controlled flag before any default switch.