@@ -349,18 +349,19 @@ def _encode(t):
349349 split = (scheme,) + split
350350 self.checkRoundtrips(url, parsed, split)
351351
352- def checkJoin(self, base, relurl, expected):
352+ def checkJoin(self, base, relurl, expected, *, relroundtrip=True ):
353353 with self.subTest(base=base, relurl=relurl):
354354 self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
355355 baseb = base.encode('ascii')
356356 relurlb = relurl.encode('ascii')
357357 expectedb = expected.encode('ascii')
358358 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
359359
360- relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
361- self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
362- relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
363- self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
360+ if relroundtrip:
361+ relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
362+ self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
363+ relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
364+ self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
364365
365366 def test_unparse_parse(self):
366367 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
@@ -526,8 +527,6 @@ def test_RFC3986(self):
526527
527528 def test_urljoins(self):
528529 self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
529- self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
530- self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
531530 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
532531 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
533532 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
@@ -548,8 +547,6 @@ def test_urljoins(self):
548547 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
549548 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
550549 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
551- self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
552- self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
553550 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
554551 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
555552 self.checkJoin('http:///', '..','http:///')
@@ -579,6 +576,53 @@ def test_urljoins(self):
579576 # issue 23703: don't duplicate filename
580577 self.checkJoin('a', 'b', 'b')
581578
579+ # Test with empty (but defined) components.
580+ self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
581+ self.checkJoin(RFC1808_BASE, '#', 'http://a/b/c/d;p?q#', relroundtrip=False)
582+ self.checkJoin(RFC1808_BASE, '#z', 'http://a/b/c/d;p?q#z')
583+ self.checkJoin(RFC1808_BASE, '?', 'http://a/b/c/d;p?', relroundtrip=False)
584+ self.checkJoin(RFC1808_BASE, '?#z', 'http://a/b/c/d;p?#z', relroundtrip=False)
585+ self.checkJoin(RFC1808_BASE, '?y', 'http://a/b/c/d;p?y')
586+ self.checkJoin(RFC1808_BASE, ';', 'http://a/b/c/;')
587+ self.checkJoin(RFC1808_BASE, ';?y', 'http://a/b/c/;?y')
588+ self.checkJoin(RFC1808_BASE, ';#z', 'http://a/b/c/;#z')
589+ self.checkJoin(RFC1808_BASE, ';x', 'http://a/b/c/;x')
590+ self.checkJoin(RFC1808_BASE, '/w', 'http://a/w')
591+ self.checkJoin(RFC1808_BASE, '//', 'http://a/b/c/d;p?q#f')
592+ self.checkJoin(RFC1808_BASE, '//#z', 'http://a/b/c/d;p?q#z')
593+ self.checkJoin(RFC1808_BASE, '//?y', 'http://a/b/c/d;p?y')
594+ self.checkJoin(RFC1808_BASE, '//;x', 'http://;x')
595+ self.checkJoin(RFC1808_BASE, '///w', 'http://a/w')
596+ self.checkJoin(RFC1808_BASE, '//v', 'http://v')
597+ # For backward compatibility with RFC1630, the scheme name is allowed
598+ # to be present in a relative reference if it is the same as the base
599+ # URI scheme.
600+ self.checkJoin(RFC1808_BASE, 'http:', 'http://a/b/c/d;p?q#f')
601+ self.checkJoin(RFC1808_BASE, 'http:#', 'http://a/b/c/d;p?q#', relroundtrip=False)
602+ self.checkJoin(RFC1808_BASE, 'http:#z', 'http://a/b/c/d;p?q#z')
603+ self.checkJoin(RFC1808_BASE, 'http:?', 'http://a/b/c/d;p?', relroundtrip=False)
604+ self.checkJoin(RFC1808_BASE, 'http:?#z', 'http://a/b/c/d;p?#z', relroundtrip=False)
605+ self.checkJoin(RFC1808_BASE, 'http:?y', 'http://a/b/c/d;p?y')
606+ self.checkJoin(RFC1808_BASE, 'http:;', 'http://a/b/c/;')
607+ self.checkJoin(RFC1808_BASE, 'http:;?y', 'http://a/b/c/;?y')
608+ self.checkJoin(RFC1808_BASE, 'http:;#z', 'http://a/b/c/;#z')
609+ self.checkJoin(RFC1808_BASE, 'http:;x', 'http://a/b/c/;x')
610+ self.checkJoin(RFC1808_BASE, 'http:/w', 'http://a/w')
611+ self.checkJoin(RFC1808_BASE, 'http://', 'http://a/b/c/d;p?q#f')
612+ self.checkJoin(RFC1808_BASE, 'http://#z', 'http://a/b/c/d;p?q#z')
613+ self.checkJoin(RFC1808_BASE, 'http://?y', 'http://a/b/c/d;p?y')
614+ self.checkJoin(RFC1808_BASE, 'http://;x', 'http://;x')
615+ self.checkJoin(RFC1808_BASE, 'http:///w', 'http://a/w')
616+ self.checkJoin(RFC1808_BASE, 'http://v', 'http://v')
617+ # Different scheme is not ignored.
618+ self.checkJoin(RFC1808_BASE, 'https:', 'https:', relroundtrip=False)
619+ self.checkJoin(RFC1808_BASE, 'https:#', 'https:#', relroundtrip=False)
620+ self.checkJoin(RFC1808_BASE, 'https:#z', 'https:#z', relroundtrip=False)
621+ self.checkJoin(RFC1808_BASE, 'https:?', 'https:?', relroundtrip=False)
622+ self.checkJoin(RFC1808_BASE, 'https:?y', 'https:?y', relroundtrip=False)
623+ self.checkJoin(RFC1808_BASE, 'https:;', 'https:;')
624+ self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x')
625+
582626 def test_RFC2732(self):
583627 str_cases = [
584628 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
@@ -641,16 +685,31 @@ def test_urldefrag(self):
641685 ('http://python.org/p?q', 'http://python.org/p?q', ''),
642686 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
643687 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
688+ ('http://a/b/c;p?q#f', 'http://a/b/c;p?q', 'f'),
689+ ('http://a/b/c;p?q#', 'http://a/b/c;p?q', ''),
690+ ('http://a/b/c;p?q', 'http://a/b/c;p?q', ''),
691+ ('http://a/b/c;p?#f', 'http://a/b/c;p?', 'f'),
692+ ('http://a/b/c;p#f', 'http://a/b/c;p', 'f'),
693+ ('http://a/b/c;?q#f', 'http://a/b/c;?q', 'f'),
694+ ('http://a/b/c?q#f', 'http://a/b/c?q', 'f'),
695+ ('http:///b/c;p?q#f', 'http:///b/c;p?q', 'f'),
696+ ('http:b/c;p?q#f', 'http:b/c;p?q', 'f'),
697+ ('http:;?q#f', 'http:;?q', 'f'),
698+ ('http:?q#f', 'http:?q', 'f'),
699+ ('//a/b/c;p?q#f', '//a/b/c;p?q', 'f'),
700+ ('://a/b/c;p?q#f', '://a/b/c;p?q', 'f'),
644701 ]
645702 def _encode(t):
646703 return type(t)(x.encode('ascii') for x in t)
647704 bytes_cases = [_encode(x) for x in str_cases]
648705 for url, defrag, frag in str_cases + bytes_cases:
649- result = urllib.parse.urldefrag(url)
650- self.assertEqual(result.geturl(), url)
651- self.assertEqual(result, (defrag, frag))
652- self.assertEqual(result.url, defrag)
653- self.assertEqual(result.fragment, frag)
706+ with self.subTest(url):
707+ result = urllib.parse.urldefrag(url)
708+ hash = '#' if isinstance(url, str) else b'#'
709+ self.assertEqual(result.geturl(), url.rstrip(hash))
710+ self.assertEqual(result, (defrag, frag))
711+ self.assertEqual(result.url, defrag)
712+ self.assertEqual(result.fragment, frag)
654713
655714 def test_urlsplit_scoped_IPv6(self):
656715 p = urllib.parse.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234')
0 commit comments