From 5ce491fff9bec6baaf8b0591fb092e9cabfa0729 Mon Sep 17 00:00:00 2001 From: Colin O'Dell Date: Sat, 7 Dec 2024 10:17:04 -0500 Subject: [PATCH] Optimize repeated parsing of links without closing brace --- src/Util/LinkParserHelper.php | 41 ++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/Util/LinkParserHelper.php b/src/Util/LinkParserHelper.php index ae722f7a98..3e76c28faa 100644 --- a/src/Util/LinkParserHelper.php +++ b/src/Util/LinkParserHelper.php @@ -31,14 +31,7 @@ final class LinkParserHelper public static function parseLinkDestination(Cursor $cursor): ?string { if ($cursor->getCurrentCharacter() === '<') { - if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { - // Chop off surrounding <..>: - return UrlEncoder::unescapeAndEncode( - RegexHelper::unescape(\substr($res, 1, -1)) - ); - } - - return null; + return self::parseDestinationBraces($cursor); } $destination = self::manuallyParseLinkDestination($cursor); @@ -137,4 +130,36 @@ private static function manuallyParseLinkDestination(Cursor $cursor): ?string return $destination; } + + /** @var \WeakReference|null */ + private static ?\WeakReference $lastCursor = null; + private static bool $lastCursorLacksClosingBrace = false; + + private static function parseDestinationBraces(Cursor $cursor): ?string + { + // Optimization: If we've previously parsed this cursor and returned `null`, we know + // that no closing brace exists, so we can skip the regex entirely. This helps avoid + // certain pathological cases where the regex engine can take a very long time to + // determine that no match exists. + if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) { + if (self::$lastCursorLacksClosingBrace) { + return null; + } + } else { + self::$lastCursor = \WeakReference::create($cursor); + } + + if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { + self::$lastCursorLacksClosingBrace = false; + + // Chop off surrounding <..>: + return UrlEncoder::unescapeAndEncode( + RegexHelper::unescape(\substr($res, 1, -1)) + ); + } + + self::$lastCursorLacksClosingBrace = true; + + return null; + } }