ipfs · lidel · May 25, 2020 · May 25, 2020 · May 25, 2020 · Stebalien
@@ -41,6 +41,9 @@ var defaultKnownGateways = map[string]config.GatewaySpec{
 	"dweb.link":       subdomainGatewaySpec,
 }
 
+// Label's max length in DNS (https://tools.ietf.org/html/rfc1034#page-7)
+const dnsLabelMaxLength int = 63
+
 // HostnameOption rewrites an incoming request based on the Host header.
 func HostnameOption() ServeOption {
 	return func(n *core.IpfsNode, _ net.Listener, mux *http.ServeMux) (*http.ServeMux, error) {
@@ -151,16 +154,29 @@ func HostnameOption() ServeOption {
 					return
 				}
 
-				// Do we need to fix multicodec in PeerID represented as CIDv1?
-				if isPeerIDNamespace(ns) {
-					keyCid, err := cid.Decode(rootID)
-					if err == nil && keyCid.Type() != cid.Libp2pKey {
+				// Check if rootID is a valid CID
+				if rootCID, err := cid.Decode(rootID); err == nil {
+					// Do we need to redirect CID to a canonical DNS representation?
+					hostPrefix := toDNSPrefix(rootID)
+					if !strings.HasPrefix(r.Host, hostPrefix) {
 						if newURL, ok := toSubdomainURL(hostname, pathPrefix+r.URL.Path, r); ok {
-							// Redirect to CID fixed inside of toSubdomainURL()
+							// Redirect to CID split split at deterministic places
+							// to ensure CID always gets the same Origin on the web
 							http.Redirect(w, r, newURL, http.StatusMovedPermanently)
 							return
 						}
 					}
+
+					// Do we need to fix multicodec in PeerID represented as CIDv1?
+					if isPeerIDNamespace(ns) {
+						if rootCID.Type() != cid.Libp2pKey {
+							if newURL, ok := toSubdomainURL(hostname, pathPrefix+r.URL.Path, r); ok {
+								// Redirect to CID fixed inside of toSubdomainURL()
+								http.Redirect(w, r, newURL, http.StatusMovedPermanently)
+								return
+							}
+						}
+					}
 				}
 
 				// Rewrite the path to not use subdomains
@@ -226,8 +242,16 @@ func knownSubdomainDetails(hostname string, knownGateways map[string]config.Gate
 			break
 		}
 
-		// Merge remaining labels (could be a FQDN with DNSLink)
-		rootID := strings.Join(labels[:i-1], ".")
+		idLabels := labels[:i-1]
+		// Merge remaining DNS labels and see if it is a CID or something else
+		// (DNS-friendly text representation splits CID to fit each chunk in 63 characters)
+		rootID := strings.Join(idLabels, "")
+		if _, err := cid.Decode(rootID); err != nil {
+			// Not a CID:
+			// Return rootID in original form, separated with '.'
+			// (mostly used by FQDNs with DNSLink)
+			rootID = strings.Join(idLabels, ".")
+		}
 		return gw, fqdn, ns, rootID, true
 	}
 	// not a known subdomain gateway
@@ -266,6 +290,37 @@ func isPeerIDNamespace(ns string) bool {
 	}
 }
 
+// Converts an identifier to DNS-safe representation
+func toDNSPrefix(id string) (prefix string) {
+	s := strings.Replace(id, ".", "", -1) // remove separators if present
+
+	// Return if things fit after dot removal
+	if len(s) <= dnsLabelMaxLength {
+		return s
+	}
+
+	parts := make(
+		[]string,
+		// same as ceil( len(s) / dnsLabelMaxLength )
+		(len(s)+dnsLabelMaxLength-1)/dnsLabelMaxLength,
+	)
+
+	firstPartLen := len(s) % dnsLabelMaxLength
+
+	// if it divides by 63 perfectly - full part
+	if firstPartLen == 0 {
+		firstPartLen = dnsLabelMaxLength
+	}
+
+	// Iterate from right to left to maximize length of right-most labels
+	for i := len(parts) - 1; i > 0; i-- {
+		parts[i] = s[(i-1)*dnsLabelMaxLength+firstPartLen : i*dnsLabelMaxLength+firstPartLen]
+	}
+	parts[0] = s[:firstPartLen]
+
+	return strings.Join(parts, ".")
+}
+
 // Converts a hostname/path to a subdomain-based URL, if applicable.
 func toSubdomainURL(hostname, path string, r *http.Request) (redirURL string, ok bool) {
 	var scheme, ns, rootID, rest string
@@ -340,6 +395,7 @@ func toSubdomainURL(hostname, path string, r *http.Request) (redirURL string, ok
 			// produce a subdomain URL
 			return "", false
 		}
+		rootID = toDNSPrefix(rootID)
 	}
 
 	return safeRedirectURL(fmt.Sprintf(

@@ -23,11 +23,13 @@ func TestToSubdomainURL(t *testing.T) {
 		{"localhost:8080", "/ipns/dnslink.io", "http://dnslink.io.ipns.localhost:8080/", true},
 		// CIDv0 → CIDv1base32
 		{"localhost", "/ipfs/QmbCMUZw6JFeZ7Wp9jkzbye3Fzp2GGcPgC3nmeUjfVF87n", "http://bafybeif7a7gdklt6hodwdrmwmxnhksctcuav6lfxlcyfz4khzl3qfmvcgu.ipfs.localhost/", true},
+		// CIDv1 with long sha512 (requires DNS label length workaround)
+		{"localhost", "/ipfs/bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg", "http://bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvy.w764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg.ipfs.localhost/", true},
 		// PeerID as CIDv1 needs to have libp2p-key multicodec
 		{"localhost", "/ipns/QmY3hE8xgFCjGcz6PHgnvJz5HZi1BaKRfPkn1ghZUcYMjD", "http://bafzbeieqhtl2l3mrszjnhv6hf2iloiitsx7mexiolcnywnbcrzkqxwslja.ipns.localhost/", true},
 		{"localhost", "/ipns/bafybeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm", "http://bafzbeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm.ipns.localhost/", true},
 		// PeerID: ed25519+identity multihash
-		{"localhost", "/ipns/12D3KooWFB51PRY9BxcXSH6khFXw1BZeszeLDy7C8GciskqCTZn5", "http://bafzaajaiaejcat4yhiwnr2qz73mtu6vrnj2krxlpfoa3wo2pllfi37quorgwh2jw.ipns.localhost/", true},
+		{"localhost", "/ipns/12D3KooWFB51PRY9BxcXSH6khFXw1BZeszeLDy7C8GciskqCTZn5", "http://ba.fzaajaiaejcat4yhiwnr2qz73mtu6vrnj2krxlpfoa3wo2pllfi37quorgwh2jw.ipns.localhost/", true},
 	} {
 		url, ok := toSubdomainURL(test.hostname, test.path, r)
 		if ok != test.ok || url != test.url {
@@ -75,6 +77,28 @@ func TestPortStripping(t *testing.T) {
 
 }
 
+func TestDNSPrefix(t *testing.T) {
+	for _, test := range []struct {
+		in  string
+		out string
+	}{
+		// <= 63
+		{"bafybeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm", "bafybeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm"},
+		{"bafy.beickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm", "bafybeickencdqw37dpz3ha36ewrh4undfjt2do52chtcky4rxkj447qhdm"},
+		// > 63
+		{"bafzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk", "ba.fzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk"},
+		{"bafzaajaiaejca4syrpdu6g.dx4wsdnokxkprgzxf4wrs.tuc34gxw5k5jrag2so5gk", "ba.fzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk"},
+		{"bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg", "bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvy.w764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg"},
+		{"bafkrgqe3ohjcjplc6n4f3fw.unlj6upltggn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4.ffyyxnayrtdi5oc4xb2332g645433aeg", "bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvy.w764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg"},
+	} {
+		out := toDNSPrefix(test.in)
+		if out != test.out {
+			t.Errorf("(%s): returned '%s', expected '%s'", test.in, out, test.out)
+		}
+	}
+
+}
+
 func TestKnownSubdomainDetails(t *testing.T) {
 	gwSpec := config.GatewaySpec{
 		UseSubdomains: true,
@@ -127,6 +151,17 @@ func TestKnownSubdomainDetails(t *testing.T) {
 		{"foo.dweb.ipfs.pvt.k12.ma.us", "", "", "", false},
 		{"bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am.ipfs.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
 		{"bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju.ipns.dweb.ipfs.pvt.k12.ma.us", "dweb.ipfs.pvt.k12.ma.us", "ipns", "bafzbeihe35nmjqar22thmxsnlsgxppd66pseq6tscs4mo25y55juhh6bju", true},
+		// edge case check: understand split CIDs (workaround for 63 character limit of a single DNS label https://github.com/ipfs/go-ipfs/issues/7318)
+		// Note: canonical split is at 63, but we support arbitrary splits for improved UX
+		// Short CID (eg. unnecessarily split by user)
+		{"baf.kreicysg23kiwv34eg2d7.qweipxwosdo2py4ldv4.2nbauguluen5v6am.ipfs.dweb.link", "dweb.link", "ipfs", "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am", true},
+		// ED25519 libp2p-key
+		{"ba.fzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk.ipfs.dweb.link", "dweb.link", "ipfs", "bafzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk", true},
+		{"bafzaajaiaejca4syrpdu6gdx4wsdnok.xkprgzxf4wrstuc34gxw5k5jrag2so5gk.ipfs.dweb.link", "dweb.link", "ipfs", "bafzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk", true},
+		{"bafzaajaiaejca4sy.rpdu6gdx4wsdnok.xkprgzxf4wrstuc34g.xw5k5jrag2so5gk.ipfs.dweb.link", "dweb.link", "ipfs", "bafzaajaiaejca4syrpdu6gdx4wsdnokxkprgzxf4wrstuc34gxw5k5jrag2so5gk", true},
+		// CID created with --hash sha2-512
+		{"bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvyw764srszz4u4rshq.6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg.ipfs.dweb.link", "dweb.link", "ipfs", "bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg", true},
+		{"bafkrgqe3ohjcjplc6n4f3fwunlj6upltg.gn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4f.fyyxnayrtdi5oc4xb2332g645433aeg.ipfs.dweb.link", "dweb.link", "ipfs", "bafkrgqe3ohjcjplc6n4f3fwunlj6upltggn7xqujbsvnvyw764srszz4u4rshq6ztos4chl4plgg4ffyyxnayrtdi5oc4xb2332g645433aeg", true},
 		// other namespaces
 		{"api.localhost", "", "", "", false},
 		{"peerid.p2p.localhost", "localhost", "p2p", "peerid", true},

@@ -92,6 +92,10 @@ test_launch_ipfs_daemon --offline
 test_expect_success "Add test text file" '
   CID_VAL="hello"
   CIDv1=$(echo $CID_VAL | ipfs add --cid-version 1 -Q)
+  CIDv1_LONG=$(echo $CID_VAL | ipfs add --cid-version 1 --hash sha2-512 -Q)
+  CID_DNS_SPLIT_CANONICAL="bafkrgqhhyivzstcz3hhswshfjgy6ertgmnqeleynhwt4dl.fsthi4hn7zgh4uvlsb5xncykzapi3ocd4lzogukir6ksdy6wzrnz6ohnv4aglcs"
+  CID_DNS_SPLIT_CUSTOM="baf.krgqhhyivzstcz3hhswshfjgy6ertgmnqeleynhwt4dl.fsthi4hn7zgh4uvlsb5xncykzapi3ocd4lzogukir.6ksdy6wzrnz6ohnv4aglcs"
+  CID_DNS_SPLIT_CUSTOM2=$(echo $CIDv1 | sed -r -e "s/^./&./")
   CIDv0=$(echo $CID_VAL | ipfs add --cid-version 0 -Q)
   CIDv0to1=$(echo "$CIDv0" | ipfs cid base32)
 '
@@ -119,7 +123,6 @@ test_expect_success "Publish test text file to IPNS" '
   test_cmp expected2 output
 '
 
-
 # ensure we start with empty Gateway.PublicGateways
 test_expect_success 'start daemon with empty config for Gateway.PublicGateways' '
   test_kill_ipfs_daemon &&
@@ -262,6 +265,7 @@ test_expect_success "request for deep path resource at {cid}.ipfs.localhost/sub/
   test_should_contain "subdir2-bar" list_response
 '
 
+
 # *.ipns.localhost
 
 # <libp2p-key>.ipns.localhost
@@ -501,6 +505,58 @@ test_hostname_gateway_response_should_contain \
   "http://127.0.0.1:$GWAY_PORT" \
   "404 Not Found"
 
+## ============================================================================
+## Special handling of CIDs that do not fit in a single DNS Label (>63chars)
+## https://github.com/ipfs/go-ipfs/issues/7318
+## ============================================================================
+
+
+# local: *.localhost
+test_localhost_gateway_response_should_contain \
+  "request for a long CID at localhost/ipfs/{CIDv1} returns Location HTTP header for DNS-safe subdomain redirect in browsers" \
+  "http://localhost:$GWAY_PORT/ipfs/$CIDv1_LONG" \
+  "Location: http://${CID_DNS_SPLIT_CANONICAL}.ipfs.localhost:$GWAY_PORT/"
+
+test_localhost_gateway_response_should_contain \
+  "request for {long.CID}.ipfs.localhost should return expected payload" \
+  "http://${CID_DNS_SPLIT_CANONICAL}.ipfs.localhost:$GWAY_PORT" \
+  "$CID_VAL"
+
+test_localhost_gateway_response_should_contain \
+  "request for {custom.split.of.long.CID}.ipfs.localhost should return redirect to a canonical Origin" \
+  "http://${CID_DNS_SPLIT_CUSTOM}.ipfs.localhost:$GWAY_PORT/ipfs/$CIDv1" \
+  "Location: http://${CID_DNS_SPLIT_CANONICAL}.ipfs.localhost:$GWAY_PORT/"
+
+test_localhost_gateway_response_should_contain \
+  "request for {unnecessary.split.of.short.CID}.ipfs.localhost should return redirect to a canonical Origin" \
+  "http://${CID_DNS_SPLIT_CUSTOM2}.ipfs.localhost:$GWAY_PORT/ipfs/$CIDv1" \
+  "Location: http://${CIDv1}.ipfs.localhost:$GWAY_PORT/"
+
+# public gateway: *.example.com
+
+test_hostname_gateway_response_should_contain \
+  "request for a long CID at example.com/ipfs/{CIDv1} returns Location HTTP header for DNS-safe subdomain redirect in browsers" \
+  "example.com" \
+  "http://127.0.0.1:$GWAY_PORT/ipfs/$CIDv1_LONG" \
+  "Location: http://${CID_DNS_SPLIT_CANONICAL}.ipfs.example.com"
+
+test_hostname_gateway_response_should_contain \
+  "request for {long.CID}.ipfs.example.com should return expected payload" \
+  "${CID_DNS_SPLIT_CANONICAL}.ipfs.example.com" \
+  "http://127.0.0.1:$GWAY_PORT" \
+  "$CID_VAL"
+
+test_hostname_gateway_response_should_contain \
+  "request for {custom.split.of.long.CID}.ipfs.example.com should return redirect to a canonical Origin" \
+  "${CID_DNS_SPLIT_CUSTOM}.ipfs.example.com" \
+  "http://127.0.0.1:$GWAY_PORT" \
+  "Location: http://${CID_DNS_SPLIT_CANONICAL}.ipfs.example.com"
+
+test_hostname_gateway_response_should_contain \
+  "request for {unnecessary.split.of.short.CID}.ipfs.example.com should return redirect to a canonical Origin" \
+  "${CID_DNS_SPLIT_CUSTOM2}.ipfs.example.com" \
+  "http://127.0.0.1:$GWAY_PORT" \
+  "Location: http://${CIDv1}.ipfs.example.com"
 
 ## ============================================================================
 ## Test path-based requests with a custom hostname config