@@ -349,18 +349,19 @@ def _encode(t):
349
349
split = (scheme ,) + split
350
350
self .checkRoundtrips (url , parsed , split )
351
351
352
- def checkJoin (self , base , relurl , expected ):
352
+ def checkJoin (self , base , relurl , expected , * , relroundtrip = True ):
353
353
with self .subTest (base = base , relurl = relurl ):
354
354
self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
355
355
baseb = base .encode ('ascii' )
356
356
relurlb = relurl .encode ('ascii' )
357
357
expectedb = expected .encode ('ascii' )
358
358
self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
359
359
360
- relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
361
- self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
362
- relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
363
- self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
360
+ if relroundtrip :
361
+ relurl = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurl ))
362
+ self .assertEqual (urllib .parse .urljoin (base , relurl ), expected )
363
+ relurlb = urllib .parse .urlunsplit (urllib .parse .urlsplit (relurlb ))
364
+ self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
364
365
365
366
def test_unparse_parse (self ):
366
367
str_cases = ['Python' , './Python' ,'x-newscheme://foo.com/stuff' ,'x://y' ,'x:/y' ,'x:/' ,'/' ,]
@@ -526,8 +527,6 @@ def test_RFC3986(self):
526
527
527
528
def test_urljoins (self ):
528
529
self .checkJoin (SIMPLE_BASE , 'g:h' ,'g:h' )
529
- self .checkJoin (SIMPLE_BASE , 'http:g' ,'http://a/b/c/g' )
530
- self .checkJoin (SIMPLE_BASE , 'http:' ,'http://a/b/c/d' )
531
530
self .checkJoin (SIMPLE_BASE , 'g' ,'http://a/b/c/g' )
532
531
self .checkJoin (SIMPLE_BASE , './g' ,'http://a/b/c/g' )
533
532
self .checkJoin (SIMPLE_BASE , 'g/' ,'http://a/b/c/g/' )
@@ -548,8 +547,6 @@ def test_urljoins(self):
548
547
self .checkJoin (SIMPLE_BASE , 'g/./h' ,'http://a/b/c/g/h' )
549
548
self .checkJoin (SIMPLE_BASE , 'g/../h' ,'http://a/b/c/h' )
550
549
self .checkJoin (SIMPLE_BASE , 'http:g' ,'http://a/b/c/g' )
551
- self .checkJoin (SIMPLE_BASE , 'http:' ,'http://a/b/c/d' )
552
- self .checkJoin (SIMPLE_BASE , 'http:?y' ,'http://a/b/c/d?y' )
553
550
self .checkJoin (SIMPLE_BASE , 'http:g?y' ,'http://a/b/c/g?y' )
554
551
self .checkJoin (SIMPLE_BASE , 'http:g?y/./x' ,'http://a/b/c/g?y/./x' )
555
552
self .checkJoin ('http:///' , '..' ,'http:///' )
@@ -579,6 +576,53 @@ def test_urljoins(self):
579
576
# issue 23703: don't duplicate filename
580
577
self .checkJoin ('a' , 'b' , 'b' )
581
578
579
+ # Test with empty (but defined) components.
580
+ self .checkJoin (RFC1808_BASE , '' , 'http://a/b/c/d;p?q#f' )
581
+ self .checkJoin (RFC1808_BASE , '#' , 'http://a/b/c/d;p?q#' , relroundtrip = False )
582
+ self .checkJoin (RFC1808_BASE , '#z' , 'http://a/b/c/d;p?q#z' )
583
+ self .checkJoin (RFC1808_BASE , '?' , 'http://a/b/c/d;p?' , relroundtrip = False )
584
+ self .checkJoin (RFC1808_BASE , '?#z' , 'http://a/b/c/d;p?#z' , relroundtrip = False )
585
+ self .checkJoin (RFC1808_BASE , '?y' , 'http://a/b/c/d;p?y' )
586
+ self .checkJoin (RFC1808_BASE , ';' , 'http://a/b/c/;' )
587
+ self .checkJoin (RFC1808_BASE , ';?y' , 'http://a/b/c/;?y' )
588
+ self .checkJoin (RFC1808_BASE , ';#z' , 'http://a/b/c/;#z' )
589
+ self .checkJoin (RFC1808_BASE , ';x' , 'http://a/b/c/;x' )
590
+ self .checkJoin (RFC1808_BASE , '/w' , 'http://a/w' )
591
+ self .checkJoin (RFC1808_BASE , '//' , 'http://a/b/c/d;p?q#f' )
592
+ self .checkJoin (RFC1808_BASE , '//#z' , 'http://a/b/c/d;p?q#z' )
593
+ self .checkJoin (RFC1808_BASE , '//?y' , 'http://a/b/c/d;p?y' )
594
+ self .checkJoin (RFC1808_BASE , '//;x' , 'http://;x' )
595
+ self .checkJoin (RFC1808_BASE , '///w' , 'http://a/w' )
596
+ self .checkJoin (RFC1808_BASE , '//v' , 'http://v' )
597
+ # For backward compatibility with RFC1630, the scheme name is allowed
598
+ # to be present in a relative reference if it is the same as the base
599
+ # URI scheme.
600
+ self .checkJoin (RFC1808_BASE , 'http:' , 'http://a/b/c/d;p?q#f' )
601
+ self .checkJoin (RFC1808_BASE , 'http:#' , 'http://a/b/c/d;p?q#' , relroundtrip = False )
602
+ self .checkJoin (RFC1808_BASE , 'http:#z' , 'http://a/b/c/d;p?q#z' )
603
+ self .checkJoin (RFC1808_BASE , 'http:?' , 'http://a/b/c/d;p?' , relroundtrip = False )
604
+ self .checkJoin (RFC1808_BASE , 'http:?#z' , 'http://a/b/c/d;p?#z' , relroundtrip = False )
605
+ self .checkJoin (RFC1808_BASE , 'http:?y' , 'http://a/b/c/d;p?y' )
606
+ self .checkJoin (RFC1808_BASE , 'http:;' , 'http://a/b/c/;' )
607
+ self .checkJoin (RFC1808_BASE , 'http:;?y' , 'http://a/b/c/;?y' )
608
+ self .checkJoin (RFC1808_BASE , 'http:;#z' , 'http://a/b/c/;#z' )
609
+ self .checkJoin (RFC1808_BASE , 'http:;x' , 'http://a/b/c/;x' )
610
+ self .checkJoin (RFC1808_BASE , 'http:/w' , 'http://a/w' )
611
+ self .checkJoin (RFC1808_BASE , 'http://' , 'http://a/b/c/d;p?q#f' )
612
+ self .checkJoin (RFC1808_BASE , 'http://#z' , 'http://a/b/c/d;p?q#z' )
613
+ self .checkJoin (RFC1808_BASE , 'http://?y' , 'http://a/b/c/d;p?y' )
614
+ self .checkJoin (RFC1808_BASE , 'http://;x' , 'http://;x' )
615
+ self .checkJoin (RFC1808_BASE , 'http:///w' , 'http://a/w' )
616
+ self .checkJoin (RFC1808_BASE , 'http://v' , 'http://v' )
617
+ # Different scheme is not ignored.
618
+ self .checkJoin (RFC1808_BASE , 'https:' , 'https:' , relroundtrip = False )
619
+ self .checkJoin (RFC1808_BASE , 'https:#' , 'https:#' , relroundtrip = False )
620
+ self .checkJoin (RFC1808_BASE , 'https:#z' , 'https:#z' , relroundtrip = False )
621
+ self .checkJoin (RFC1808_BASE , 'https:?' , 'https:?' , relroundtrip = False )
622
+ self .checkJoin (RFC1808_BASE , 'https:?y' , 'https:?y' , relroundtrip = False )
623
+ self .checkJoin (RFC1808_BASE , 'https:;' , 'https:;' )
624
+ self .checkJoin (RFC1808_BASE , 'https:;x' , 'https:;x' )
625
+
582
626
def test_RFC2732 (self ):
583
627
str_cases = [
584
628
('http://Test.python.org:5432/foo/' , 'test.python.org' , 5432 ),
@@ -641,16 +685,31 @@ def test_urldefrag(self):
641
685
('http://python.org/p?q' , 'http://python.org/p?q' , '' ),
642
686
(RFC1808_BASE , 'http://a/b/c/d;p?q' , 'f' ),
643
687
(RFC2396_BASE , 'http://a/b/c/d;p?q' , '' ),
688
+ ('http://a/b/c;p?q#f' , 'http://a/b/c;p?q' , 'f' ),
689
+ ('http://a/b/c;p?q#' , 'http://a/b/c;p?q' , '' ),
690
+ ('http://a/b/c;p?q' , 'http://a/b/c;p?q' , '' ),
691
+ ('http://a/b/c;p?#f' , 'http://a/b/c;p?' , 'f' ),
692
+ ('http://a/b/c;p#f' , 'http://a/b/c;p' , 'f' ),
693
+ ('http://a/b/c;?q#f' , 'http://a/b/c;?q' , 'f' ),
694
+ ('http://a/b/c?q#f' , 'http://a/b/c?q' , 'f' ),
695
+ ('http:///b/c;p?q#f' , 'http:///b/c;p?q' , 'f' ),
696
+ ('http:b/c;p?q#f' , 'http:b/c;p?q' , 'f' ),
697
+ ('http:;?q#f' , 'http:;?q' , 'f' ),
698
+ ('http:?q#f' , 'http:?q' , 'f' ),
699
+ ('//a/b/c;p?q#f' , '//a/b/c;p?q' , 'f' ),
700
+ ('://a/b/c;p?q#f' , '://a/b/c;p?q' , 'f' ),
644
701
]
645
702
def _encode (t ):
646
703
return type (t )(x .encode ('ascii' ) for x in t )
647
704
bytes_cases = [_encode (x ) for x in str_cases ]
648
705
for url , defrag , frag in str_cases + bytes_cases :
649
- result = urllib .parse .urldefrag (url )
650
- self .assertEqual (result .geturl (), url )
651
- self .assertEqual (result , (defrag , frag ))
652
- self .assertEqual (result .url , defrag )
653
- self .assertEqual (result .fragment , frag )
706
+ with self .subTest (url ):
707
+ result = urllib .parse .urldefrag (url )
708
+ hash = '#' if isinstance (url , str ) else b'#'
709
+ self .assertEqual (result .geturl (), url .rstrip (hash ))
710
+ self .assertEqual (result , (defrag , frag ))
711
+ self .assertEqual (result .url , defrag )
712
+ self .assertEqual (result .fragment , frag )
654
713
655
714
def test_urlsplit_scoped_IPv6 (self ):
656
715
p = urllib .parse .urlsplit ('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234' )
0 commit comments