Skip to content

Commit ad1af7e

Browse files
Locale independent URL parsing (#386)
Co-Authored-By: Alexey Shokov <alexey@shockov.com> Co-authored-by: Alexey Shokov <alexey@shockov.com>
1 parent d38b618 commit ad1af7e

File tree

2 files changed

+46
-3
lines changed

2 files changed

+46
-3
lines changed

src/Uri.php

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,48 @@ public function __construct($uri = '')
6767
{
6868
// weak type check to also accept null until we can add scalar type hints
6969
if ($uri != '') {
70-
$parts = parse_url($uri);
70+
$parts = self::parse($uri);
7171
if ($parts === false) {
7272
throw new \InvalidArgumentException("Unable to parse URI: $uri");
7373
}
7474
$this->applyParts($parts);
7575
}
7676
}
7777

78+
/**
79+
* UTF-8 aware \parse_url() replacement.
80+
*
81+
* The internal function produces broken output for non ASCII domain names
82+
* (IDN) when used with locales other than "C".
83+
*
84+
* On the other hand, cURL understands IDN correctly only when UTF-8 locale
85+
* is configured ("C.UTF-8", "en_US.UTF-8", etc.).
86+
*
87+
* @see https://bugs.php.net/bug.php?id=52923
88+
* @see https://www.php.net/manual/en/function.parse-url.php#114817
89+
* @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING
90+
*
91+
* @return array|false
92+
*/
93+
private static function parse($url, $component = -1)
94+
{
95+
$encodedUrl = preg_replace_callback(
96+
'%[^:/@?&=#]+%usD',
97+
static function ($matches) {
98+
return urlencode($matches[0]);
99+
},
100+
$url
101+
);
102+
103+
$result = parse_url($encodedUrl, $component);
104+
105+
if ($result === false) {
106+
return false;
107+
}
108+
109+
return is_array($result) ? array_map('urldecode', $result) : urldecode($result);
110+
}
111+
78112
public function __toString()
79113
{
80114
return self::composeComponents(
@@ -582,7 +616,7 @@ private function filterScheme($scheme)
582616
throw new \InvalidArgumentException('Scheme must be a string');
583617
}
584618

585-
return strtolower($scheme);
619+
return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
586620
}
587621

588622
/**
@@ -618,7 +652,7 @@ private function filterHost($host)
618652
throw new \InvalidArgumentException('Host must be a string');
619653
}
620654

621-
return strtolower($host);
655+
return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
622656
}
623657

624658
/**

tests/UriTest.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,15 @@ public function testSpecialCharsOfUserInfo()
683683
$uri = (new Uri)->withUserInfo('foo%40bar.com', 'pass%23word');
684684
self::assertSame('foo%40bar.com:pass%23word', $uri->getUserInfo());
685685
}
686+
687+
public function testInternationalizedDomainName()
688+
{
689+
$uri = new Uri('https://яндекс.рф');
690+
self::assertSame('яндекс.рф', $uri->getHost());
691+
692+
$uri = new Uri('https://яндекAс.рф');
693+
self::assertSame('яндекaс.рф', $uri->getHost());
694+
}
686695
}
687696

688697
class ExtendedUriTest extends Uri

0 commit comments

Comments
 (0)