From c7ea0a74e0ea29a36ab7157f382d2c92d5cb11e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 14 May 2025 11:53:12 +0200 Subject: [PATCH 1/3] [WebLink] Add class to parse Link headers from HTTP responses --- CHANGELOG.md | 6 ++ HttpHeaderParser.php | 87 +++++++++++++++++++++++++ HttpHeaderSerializer.php | 2 +- Link.php | 15 ++++- Tests/HttpHeaderParserTest.php | 112 +++++++++++++++++++++++++++++++++ Tests/LinkTest.php | 8 +-- 6 files changed, 224 insertions(+), 6 deletions(-) create mode 100644 HttpHeaderParser.php create mode 100644 Tests/HttpHeaderParserTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index 28dad5a..6da8115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ CHANGELOG ========= +7.4 +--- + + * Add `HttpHeaderParser` to read `Link` headers from HTTP responses + * Make `HttpHeaderSerializer` non-final + 4.4.0 ----- diff --git a/HttpHeaderParser.php b/HttpHeaderParser.php new file mode 100644 index 0000000..15fc91c --- /dev/null +++ b/HttpHeaderParser.php @@ -0,0 +1,87 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\WebLink; + +use Psr\Link\EvolvableLinkProviderInterface; + +/** + * Parse a list of HTTP Link headers into a list of Link instances. + * + * @see https://tools.ietf.org/html/rfc5988 + * + * @author Jérôme Tamarelle + */ +class HttpHeaderParser +{ + // Regex to match each link entry: <...>; param1=...; param2=... + private const LINK_PATTERN = '/<([^>]*)>\s*((?:\s*;\s*[a-zA-Z0-9\-_]+(?:\s*=\s*(?:"(?:[^"\\\\]|\\\\.)*"|[^";,\s]+))?)*)/'; + + // Regex to match parameters: ; key[=value] + private const PARAM_PATTERN = '/;\s*([a-zA-Z0-9\-_]+)(?:\s*=\s*(?:"((?:[^"\\\\]|\\\\.)*)"|([^";,\s]+)))?/'; + + /** + * @param string|string[] $headers Value of the "Link" HTTP header + */ + public function parse(string|array $headers): EvolvableLinkProviderInterface + { + if (is_array($headers)) { + $headers = implode(', ', $headers); + } + $links = new GenericLinkProvider(); + + if (!preg_match_all(self::LINK_PATTERN, $headers, $matches, \PREG_SET_ORDER)) { + return $links; + } + + foreach ($matches as $match) { + $href = $match[1]; + $attributesString = $match[2]; + + $attributes = []; + if (preg_match_all(self::PARAM_PATTERN, $attributesString, $attributeMatches, \PREG_SET_ORDER)) { + $rels = null; + foreach ($attributeMatches as $pm) { + $key = $pm[1]; + $value = match (true) { + // Quoted value, unescape quotes + ($pm[2] ?? '') !== '' => stripcslashes($pm[2]), + ($pm[3] ?? '') !== '' => $pm[3], + // No value + default => true, + }; + + if ($key === 'rel') { + // Only the first occurrence of the "rel" attribute is read + $rels ??= $value === true ? [] : preg_split('/\s+/', $value, 0, \PREG_SPLIT_NO_EMPTY); + } elseif (is_array($attributes[$key] ?? null)) { + $attributes[$key][] = $value; + } elseif (isset($attributes[$key])) { + $attributes[$key] = [$attributes[$key], $value]; + } else { + $attributes[$key] = $value; + } + } + } + + $link = new Link(null, $href); + foreach ($rels ?? [] as $rel) { + $link = $link->withRel($rel); + } + foreach ($attributes as $k => $v) { + $link = $link->withAttribute($k, $v); + } + $links = $links->withLink($link); + } + + return $links; + } +} diff --git a/HttpHeaderSerializer.php b/HttpHeaderSerializer.php index 4d537c9..d3b686a 100644 --- a/HttpHeaderSerializer.php +++ b/HttpHeaderSerializer.php @@ -20,7 +20,7 @@ * * @author Kévin Dunglas */ -final class HttpHeaderSerializer +class HttpHeaderSerializer { /** * Builds the value of the "Link" HTTP header. diff --git a/Link.php b/Link.php index 1f5fbbd..519194c 100644 --- a/Link.php +++ b/Link.php @@ -153,7 +153,7 @@ class Link implements EvolvableLinkInterface private array $rel = []; /** - * @var array + * @var array> */ private array $attributes = []; @@ -181,6 +181,11 @@ public function getRels(): array return array_values($this->rel); } + /** + * Returns a list of attributes that describe the target URI. + * + * @return array> + */ public function getAttributes(): array { return $this->attributes; @@ -210,6 +215,14 @@ public function withoutRel(string $rel): static return $that; } + /** + * Returns an instance with the specified attribute added. + * + * If the specified attribute is already present, it will be overwritten + * with the new value. + * + * @param scalar|\Stringable|list $value + */ public function withAttribute(string $attribute, string|\Stringable|int|float|bool|array $value): static { $that = clone $this; diff --git a/Tests/HttpHeaderParserTest.php b/Tests/HttpHeaderParserTest.php new file mode 100644 index 0000000..b2ccc3e --- /dev/null +++ b/Tests/HttpHeaderParserTest.php @@ -0,0 +1,112 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\WebLink\Tests; + +use PHPUnit\Framework\Attributes\DataProvider; +use PHPUnit\Framework\TestCase; +use Symfony\Component\WebLink\HttpHeaderParser; + +class HttpHeaderParserTest extends TestCase +{ + public function testParse() + { + $parser = new HttpHeaderParser(); + + $header = [ + '; rel="prerender",; rel="dns-prefetch"; pr="0.7",; rel="preload"; as="script"', + '; rel="preload"; as="image"; nopush,; rel="alternate next"; hreflang="fr"; hreflang="de"; title="Hello"' + ]; + $provider = $parser->parse($header); + $links = $provider->getLinks(); + + self::assertCount(5, $links); + + self::assertSame(['prerender'], $links[0]->getRels()); + self::assertSame('/1', $links[0]->getHref()); + self::assertSame([], $links[0]->getAttributes()); + + self::assertSame(['dns-prefetch'], $links[1]->getRels()); + self::assertSame('/2', $links[1]->getHref()); + self::assertSame(['pr' => '0.7'], $links[1]->getAttributes()); + + self::assertSame(['preload'], $links[2]->getRels()); + self::assertSame('/3', $links[2]->getHref()); + self::assertSame(['as' => 'script'], $links[2]->getAttributes()); + + self::assertSame(['preload'], $links[3]->getRels()); + self::assertSame('/4', $links[3]->getHref()); + self::assertSame(['as' => 'image', 'nopush' => true], $links[3]->getAttributes()); + + self::assertSame(['alternate', 'next'], $links[4]->getRels()); + self::assertSame('/5', $links[4]->getHref()); + self::assertSame(['hreflang' => ['fr', 'de'], 'title' => 'Hello'], $links[4]->getAttributes()); + } + + public function testParseEmpty() + { + $parser = new HttpHeaderParser(); + $provider = $parser->parse(''); + self::assertCount(0, $provider->getLinks()); + } + + /** @dataProvider provideHeaderParsingCases */ + #[DataProvider('provideHeaderParsingCases')] + public function testParseVariousAttributes(string $header, array $expectedRels, array $expectedAttributes) + { + $parser = new HttpHeaderParser(); + $links = $parser->parse($header)->getLinks(); + + self::assertCount(1, $links); + self::assertSame('/foo', $links[0]->getHref()); + self::assertSame($expectedRels, $links[0]->getRels()); + self::assertSame($expectedAttributes, $links[0]->getAttributes()); + } + + public static function provideHeaderParsingCases() + { + yield 'double_quotes_in_attribute_value' => [ + '; rel="alternate"; title="\"escape me\" \"already escaped\" \"\"\""', + ['alternate'], + ['title' => '"escape me" "already escaped" """'], + ]; + + yield 'unquoted_attribute_value' => [ + '; rel=alternate; type=text/html', + ['alternate'], + ['type' => 'text/html'], + ]; + + yield 'attribute_with_punctuation' => [ + '; rel="alternate"; title=">; hello, world; test:case"', + ['alternate'], + ['title' => '>; hello, world; test:case'], + ]; + + yield 'no_rel' => [ + '; type=text/html', + [], + ['type' => 'text/html'], + ]; + + yield 'empty_rel' => [ + '; rel', + [], + [], + ]; + + yield 'multiple_rel_attributes_get_first' => [ + '; rel="alternate" rel="next"', + ['alternate'], + [], + ]; + } +} diff --git a/Tests/LinkTest.php b/Tests/LinkTest.php index 226bc3a..07946af 100644 --- a/Tests/LinkTest.php +++ b/Tests/LinkTest.php @@ -27,10 +27,10 @@ public function testCanSetAndRetrieveValues() ->withAttribute('me', 'you') ; - $this->assertEquals('http://www.google.com', $link->getHref()); + $this->assertSame('http://www.google.com', $link->getHref()); $this->assertContains('next', $link->getRels()); $this->assertArrayHasKey('me', $link->getAttributes()); - $this->assertEquals('you', $link->getAttributes()['me']); + $this->assertSame('you', $link->getAttributes()['me']); } public function testCanRemoveValues() @@ -44,7 +44,7 @@ public function testCanRemoveValues() $link = $link->withoutAttribute('me') ->withoutRel('next'); - $this->assertEquals('http://www.google.com', $link->getHref()); + $this->assertSame('http://www.google.com', $link->getHref()); $this->assertFalse(\in_array('next', $link->getRels(), true)); $this->assertArrayNotHasKey('me', $link->getAttributes()); } @@ -65,7 +65,7 @@ public function testConstructor() { $link = new Link('next', 'http://www.google.com'); - $this->assertEquals('http://www.google.com', $link->getHref()); + $this->assertSame('http://www.google.com', $link->getHref()); $this->assertContains('next', $link->getRels()); } From e5f155b51204b2658ca094fa30581e3a926de8a6 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Mon, 2 Jun 2025 16:08:14 +0200 Subject: [PATCH 2/3] Allow Symfony ^8.0 --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 3203f6f..0d7ca78 100644 --- a/composer.json +++ b/composer.json @@ -23,7 +23,7 @@ "psr/link": "^1.1|^2.0" }, "require-dev": { - "symfony/http-kernel": "^6.4|^7.0" + "symfony/http-kernel": "^6.4|^7.0|^8.0" }, "conflict": { "symfony/http-kernel": "<6.4" From 9d9080d91c57bf04835561431a2a89425df6a299 Mon Sep 17 00:00:00 2001 From: Nicolas Grekas Date: Tue, 8 Jul 2025 11:08:29 +0200 Subject: [PATCH 3/3] Various CS fixes --- HttpHeaderParser.php | 8 ++++---- Tests/HttpHeaderParserTest.php | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/HttpHeaderParser.php b/HttpHeaderParser.php index 15fc91c..fbb2a60 100644 --- a/HttpHeaderParser.php +++ b/HttpHeaderParser.php @@ -33,7 +33,7 @@ class HttpHeaderParser */ public function parse(string|array $headers): EvolvableLinkProviderInterface { - if (is_array($headers)) { + if (\is_array($headers)) { $headers = implode(', ', $headers); } $links = new GenericLinkProvider(); @@ -59,10 +59,10 @@ public function parse(string|array $headers): EvolvableLinkProviderInterface default => true, }; - if ($key === 'rel') { + if ('rel' === $key) { // Only the first occurrence of the "rel" attribute is read - $rels ??= $value === true ? [] : preg_split('/\s+/', $value, 0, \PREG_SPLIT_NO_EMPTY); - } elseif (is_array($attributes[$key] ?? null)) { + $rels ??= true === $value ? [] : preg_split('/\s+/', $value, 0, \PREG_SPLIT_NO_EMPTY); + } elseif (\is_array($attributes[$key] ?? null)) { $attributes[$key][] = $value; } elseif (isset($attributes[$key])) { $attributes[$key] = [$attributes[$key], $value]; diff --git a/Tests/HttpHeaderParserTest.php b/Tests/HttpHeaderParserTest.php index b2ccc3e..04b464b 100644 --- a/Tests/HttpHeaderParserTest.php +++ b/Tests/HttpHeaderParserTest.php @@ -23,7 +23,7 @@ public function testParse() $header = [ '; rel="prerender",; rel="dns-prefetch"; pr="0.7",; rel="preload"; as="script"', - '; rel="preload"; as="image"; nopush,; rel="alternate next"; hreflang="fr"; hreflang="de"; title="Hello"' + '; rel="preload"; as="image"; nopush,; rel="alternate next"; hreflang="fr"; hreflang="de"; title="Hello"', ]; $provider = $parser->parse($header); $links = $provider->getLinks();