From 0206070abf6deb7bb7491168c051a67982538ba8 Mon Sep 17 00:00:00 2001
From: Miek Gieben <miek@miek.nl>
Date: Wed, 26 Aug 2015 14:13:14 +0100
Subject: [PATCH] Address commments from Alex

---
 sanitize.go      | 101 +++++++++--------------------------------------
 sanitize_test.go |  58 +++++----------------------
 2 files changed, 28 insertions(+), 131 deletions(-)
diff --git a/sanitize.go b/sanitize.go
index cbe8c291c..d10d6edb3 100644
--- a/sanitize.go
+++ b/sanitize.go
@@ -1,26 +1,13 @@
 package dns
 
-import "strings"
-
 // Dedup removes identical RRs from rrs. It preserves the original ordering.
 // The lowest TTL of any duplicates is used in the remaining one.
-//
-// TODO(miek): This function will be extended to also look for CNAMEs and DNAMEs.
-// if found, it will prune rrs from the "other data" that can exist. Example:
-// if it finds a: a.miek.nl. CNAME foo, all other RRs with the ownername a.miek.nl.
-// will be removed. When a DNAME is found all RRs with an ownername below that of
-// the DNAME will be removed.
 func Dedup(rrs []RR) []RR {
 	m := make(map[string]RR)
 	keys := make([]string, 0, len(rrs))
 
-	// Save possible cname and dname domainnames. Currently a slice, don't
-	// expect millions here..
-	cname := []string{}
-	dname := []string{}
-
 	for _, r := range rrs {
-		key, end := normalizedString(r)
+		key := normalizedString(r)
 		keys = append(keys, key)
 		if _, ok := m[key]; ok {
 			// Shortest TTL wins.
@@ -30,37 +17,18 @@ func Dedup(rrs []RR) []RR {
 			continue
 		}
 
-		if r.Header().Rrtype == TypeCNAME {
-			// we do end+3 here, so we capture the full domain name *and*
-			// the class field which mnemonic is always two chars.
-			cname = append(cname, key[:end+3])
-
-		}
-		if r.Header().Rrtype == TypeDNAME {
-			dname = append(dname, key[:end+3])
-		}
-
 		m[key] = r
 	}
 	// If the length of the result map equals the amount of RRs we got,
 	// it means they were all different. We can then just return the original rrset.
-	// We can only do this when we haven't found a CNAME or DNAME.
-	if len(m) == len(rrs) && len(cname) == 0 && len(dname) == 0 {
+	if len(m) == len(rrs) {
 		return rrs
 	}
 
 	ret := make([]RR, 0, len(rrs))
 	for i, r := range rrs {
-		// If keys[i] lives in the map, we should copy it and remove
-		// it from the map.
+		// If keys[i] lives in the map, we should copy and remove it.
 		if _, ok := m[keys[i]]; ok {
-			if needsDeletion(r, keys[i], cname, dname) {
-				// It the RR is masked by an CNAME or DNAME, we only
-				// delete it from the map, but don't copy it.
-				delete(m, keys[i])
-				continue
-			}
-
 			delete(m, keys[i])
 			ret = append(ret, r)
 		}
@@ -74,70 +42,37 @@ func Dedup(rrs []RR) []RR {
 }
 
 // normalizedString returns a normalized string from r. The TTL
-// is removed and the domain name is lowercased. The returned integer
-// is the index where the domain name ends + 1.
-func normalizedString(r RR) (string, int) {
+// is removed and the domain name is lowercased. We go from this:
+// DomainName<TAB>TTL<TAB>CLASS<TAB>TYPE<TAB>RDATA to:
+// lowercasename<TAB>CLASS<TAB>TYPE...
+func normalizedString(r RR) string {
 	// A string Go DNS makes has: domainname<TAB>TTL<TAB>...
 	b := []byte(r.String())
 
-	// find the first non-escaped tab, then another, so we capture
-	// where the TTL lives.
+	// find the first non-escaped tab, then another, so we capture where the TTL lives.
 	esc := false
 	ttlStart, ttlEnd := 0, 0
-	for i, c := range b {
-		if c == '\\' {
-			esc = true
-			continue
-		}
-		if esc {
-			esc = false
-			continue
-		}
-		if c == '\t' {
+	for i := 0; i < len(b) && ttlEnd == 0; i++ {
+		switch {
+		case b[i] == '\\':
+			esc = !esc
+		case b[i] == '\t' && !esc:
 			if ttlStart == 0 {
 				ttlStart = i
 				continue
 			}
 			if ttlEnd == 0 {
 				ttlEnd = i
-				break
 			}
-		}
-		if c >= 'A' && c <= 'Z' {
-			b[i] = c + 32
+		case b[i] >= 'A' && b[i] <= 'Z' && !esc:
+			b[i] += 32
+		default:
+			esc = false
 		}
 	}
 
 	// remove TTL.
 	copy(b[ttlStart:], b[ttlEnd:])
 	cut := ttlEnd - ttlStart
-	// ttlStart + 3 puts us on the start of the rdata
-	return string(b[:len(b)-cut]), ttlStart
-}
-
-// needsDeletion checks if the RR is masked by either a CNAME or a DNAME.
-// If so it return true.
-func needsDeletion(r RR, s string, cname, dname []string) bool {
-	// For CNAME we can do strings.HasPrefix with s.
-	// For DNAME we can do strings.Contains with s.
-	// Either signals a removal of this RR.
-	for _, c := range cname {
-		if strings.HasPrefix(s, c) {
-			if r.Header().Rrtype == TypeCNAME {
-				// don't delete yourself
-				continue
-			}
-			return true
-		}
-	}
-	for _, d := range dname {
-		if strings.Contains(s, d) {
-			if r.Header().Rrtype == TypeDNAME && strings.HasPrefix(s, d) {
-				// don't delete yourself
-				continue
-			}
-			return true
-		}
-	}
-	return false
+	return string(b[:len(b)-cut])
 }
diff --git a/sanitize_test.go b/sanitize_test.go
index d5ff83cea..eb0248a84 100644
--- a/sanitize_test.go
+++ b/sanitize_test.go
@@ -49,55 +49,17 @@ func TestDedup(t *testing.T) {
 	}
 }
 
-func TestDedupWithCNAMEDNAME(t *testing.T) {
-	testcases := map[[4]RR][]string{
-		[...]RR{
-			newRR(t, "miek.Nl. CNAME a."),
-			newRR(t, "miEk.nl. IN A 127.0.0.1"),
-			newRR(t, "miek.Nl. IN A 127.0.0.1"),
-			newRR(t, "miek.de. IN A 127.0.0.1"),
-		}: []string{"miek.Nl.\t3600\tIN\tCNAME\ta.",
-			"miek.de.\t3600\tIN\tA\t127.0.0.1"},
-		[...]RR{
-			newRR(t, "Miek.nl. CNAME a."),
-			newRR(t, "mIek.nl. CNAME a."),
-			newRR(t, "miEk.nl. CNAME a."),
-			newRR(t, "mieK.nl. CNAME a."),
-		}: []string{"Miek.nl.\t3600\tIN\tCNAME\ta."},
-		[...]RR{
-			newRR(t, "miek.nl. CNAME a."),
-			newRR(t, "a.miek.nl. CNAME a."),
-			newRR(t, "a.miek.nl. CNAME a."),
-			newRR(t, "a.miek.nl. CNAME a."),
-		}: []string{"miek.nl.\t3600\tIN\tCNAME\ta.",
-			"a.miek.nl.\t3600\tIN\tCNAME\ta."},
-		[...]RR{
-			newRR(t, "miek.nl. DNAME a."),
-			newRR(t, "a.miek.nl. CNAME a."),
-			newRR(t, "b.miek.nl. IN A 127.0.0.1"),
-			newRR(t, "a.miek.de. IN A 127.0.0.1"),
-		}: []string{"miek.nl.\t3600\tIN\tDNAME\ta.",
-			"a.miek.de.\t3600\tIN\tA\t127.0.0.1"},
-		[...]RR{
-			newRR(t, "miek.nl. DNAME a."),
-			newRR(t, "a.miek.nl. DNAME a."),
-			newRR(t, "b.miek.nl. DNAME b."),
-			newRR(t, "a.b.miek.nl. DNAME a.b"),
-		}: []string{"miek.nl.\t3600\tIN\tDNAME\ta."},
+func BenchmarkDedup(b *testing.B) {
+	rrs := []RR{
+		newRR(nil, "miEk.nl. 2000 IN A 127.0.0.1"),
+		newRR(nil, "mieK.Nl. 1000 IN A 127.0.0.1"),
+		newRR(nil, "Miek.nL. 500 IN A 127.0.0.1"),
 	}
-
-	for rr, expected := range testcases {
-		out := Dedup([]RR{rr[0], rr[1], rr[2], rr[3]})
-		for i, o := range out {
-			if o.String() != expected[i] {
-				t.Fatalf("expected %v, got %v", expected[i], o.String())
-			}
-		}
+	for i := 0; i < b.N; i++ {
+		Dedup(rrs)
 	}
 }
 
-// BenchMark test as well TODO(miek)
-
 func TestNormalizedString(t *testing.T) {
 	tests := map[RR]string{
 		newRR(t, "mIEk.Nl. 3600 IN A 127.0.0.1"):     "miek.nl.\tIN\tA\t127.0.0.1",
@@ -105,9 +67,9 @@ func TestNormalizedString(t *testing.T) {
 		newRR(t, "m\\\tIeK.nl. 3600 in A 127.0.0.1"): "m\\tiek.nl.\tIN\tA\t127.0.0.1",
 	}
 	for tc, expected := range tests {
-		a1, _ := normalizedString(tc)
-		if a1 != expected {
-			t.Logf("expected %s, got %s", expected, a1)
+		n := normalizedString(tc)
+		if n != expected {
+			t.Logf("expected %s, got %s", expected, n)
 			t.Fail()
 		}
 	}