Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CLEANUP] Move CSS parsing to a separate class #1014

Merged
merged 7 commits into from
Apr 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 15 additions & 248 deletions src/CssInliner.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Pelago\Emogrifier\Utilities\CssConcatenator;
use Pelago\Emogrifier\Utilities\CssDocument;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\ParseException;

Expand All @@ -29,18 +30,6 @@ class CssInliner extends AbstractHtmlProcessor
*/
private const CACHE_KEY_COMBINED_STYLES = 2;

/**
* This regular expression pattern will match any uninlinable at-rule with nested statements, along with any
* whitespace immediately following. Currently, any at-rule apart from `@media` is considered uninlinable. The
* first capturing group matches the at sign and identifier (e.g. `@font-face`). The second capturing group matches
* the nested statements along with their enclosing curly brackets (i.e. `{...}`), and via `(?2)` will match deeper
* nested blocks recursively.
*
* @var string
*/
private const UNINLINABLE_AT_RULE_MATCHER
= '/(@(?!media\\b)[\\w\\-]++)[^\\{]*+(\\{[^\\{\\}]*+(?:(?2)[^\\{\\}]*+)*+\\})\\s*+/i';

/**
* Regular expression component matching a static pseudo class in a selector, without the preceding ":",
* for which the applicable elements can be determined (by converting the selector to an XPath expression).
Expand Down Expand Up @@ -148,7 +137,7 @@ class CssInliner extends AbstractHtmlProcessor

/**
* array of data describing CSS rules which apply to the document but cannot be inlined, in the format returned by
* `parseCssRules`
* {@see collateCssRules}
oliverklee marked this conversation as resolved.
Show resolved Hide resolved
*
* @var array<array-key, array{
* media: string,
Expand Down Expand Up @@ -190,17 +179,10 @@ public function inlineCss(string $css = ''): self
if ($this->isStyleBlocksParsingEnabled) {
$combinedCss .= $this->getCssFromAllStyleNodes();
}

$cssWithoutComments = $this->removeCssComments($combinedCss);
[$cssWithoutCommentsCharsetOrImport, $cssImportRules]
= $this->extractImportAndCharsetRules($cssWithoutComments);
[$cssWithoutCommentsOrUninlinableAtRules, $cssAtRules]
= $this->extractUninlinableCssAtRules($cssWithoutCommentsCharsetOrImport);

$uninlinableCss = $cssImportRules . $cssAtRules;
$parsedCss = new CssDocument($combinedCss);

$excludedNodes = $this->getNodesToExclude();
$cssRules = $this->parseCssRules($cssWithoutCommentsOrUninlinableAtRules);
$cssRules = $this->collateCssRules($parsedCss);
$cssSelectorConverter = $this->getCssSelectorConverter();
foreach ($cssRules['inlinable'] as $cssRule) {
try {
Expand Down Expand Up @@ -228,7 +210,7 @@ public function inlineCss(string $css = ''): self
$this->removeImportantAnnotationFromAllInlineStyles();

$this->determineMatchingUninlinableCssRules($cssRules['uninlinable']);
$this->copyUninlinableCssToStyleNode($uninlinableCss);
$this->copyUninlinableCssToStyleNode($parsedCss);

return $this;
}
Expand Down Expand Up @@ -521,112 +503,6 @@ private function getCssFromAllStyleNodes(): string
return $css;
}

/**
* Removes comments from the supplied CSS.
*
* @param string $css
*
* @return string CSS with the comments removed
*/
private function removeCssComments(string $css): string
{
return \preg_replace('%/\\*[^*]*+(?:\\*(?!/)[^*]*+)*+\\*/%', '', $css);
}

/**
* Extracts `@import` and `@charset` rules from the supplied CSS. These rules must not be preceded by any other
* rules, or they will be ignored. (From the CSS 2.1 specification: "CSS 2.1 user agents must ignore any '@import'
* rule that occurs inside a block or after any non-ignored statement other than an @charset or an @import rule."
* Note also that `@charset` is case sensitive whereas `@import` is not.)
*
* @param string $css CSS with comments removed
*
* @return array{0: string, 1: string}
* The first element is the CSS with the valid `@import` and `@charset` rules removed. The second element
* contains a concatenation of the valid `@import` rules, each followed by whatever whitespace followed it
* in the original CSS (so that either unminified or minified formatting is preserved); if there were no
* `@import` rules, it will be an empty string. The (valid) `@charset` rules are discarded.
*/
private function extractImportAndCharsetRules(string $css): array
{
$possiblyModifiedCss = $css;
$importRules = '';

while (
\preg_match(
'/^\\s*+(@((?i)import(?-i)|charset)\\s[^;]++;\\s*+)/',
$possiblyModifiedCss,
$matches
)
) {
[$fullMatch, $atRuleAndFollowingWhitespace, $atRuleName] = $matches;

if (\strtolower($atRuleName) === 'import') {
$importRules .= $atRuleAndFollowingWhitespace;
}

$possiblyModifiedCss = \substr($possiblyModifiedCss, \strlen($fullMatch));
}

return [$possiblyModifiedCss, $importRules];
}

/**
* Extracts uninlinable at-rules with nested statements (i.e. a block enclosed in curly brackets) from the supplied
* CSS. Currently, any such at-rule apart from `@media` is considered uninlinable. These rules can be placed
* anywhere in the CSS and are not case sensitive. `@font-face` rules will be checked for validity, though other
* at-rules will be assumed to be valid.
*
* @param string $css CSS with comments, import and charset removed
*
* @return array{0: string, 1: string}
* The first element is the CSS with the at-rules removed. The second element contains a concatenation of
* the valid at-rules, each followed by whatever whitespace followed it in the original CSS (so that either
* unminified or minified formatting is preserved); if there were no at-rules, it will be an empty string.
*/
private function extractUninlinableCssAtRules(string $css): array
{
$possiblyModifiedCss = $css;
$atRules = '';

while (
\preg_match(
self::UNINLINABLE_AT_RULE_MATCHER,
$possiblyModifiedCss,
$matches
)
) {
/** @var array<int, string> $matches */
[$fullMatch, $atRuleName] = $matches;

if ($this->isValidAtRule($atRuleName, $fullMatch)) {
$atRules .= $fullMatch;
}

$possiblyModifiedCss = \str_replace($fullMatch, '', $possiblyModifiedCss);
}

return [$possiblyModifiedCss, $atRules];
}

/**
* Tests if an at-rule is valid. Currently only `@font-face` rules are checked for validity; others are assumed to
* be valid.
*
* @param string $atIdentifier name of the at-rule with the preceding at sign
* @param string $rule full content of the rule, including the at-identifier
*
* @return bool
*/
private function isValidAtRule(string $atIdentifier, string $rule): bool
{
if (\strcasecmp($atIdentifier, '@font-face') === 0) {
return \stripos($rule, 'font-family') !== false && \stripos($rule, 'src') !== false;
}

return true;
}

/**
* Find the nodes that are not to be emogrified.
*
Expand Down Expand Up @@ -674,9 +550,9 @@ private function getCssSelectorConverter(): CssSelectorConverter
}

/**
* Extracts and parses the individual rules from a CSS string.
* Collates the individual rules from a `CssDocument` object.
*
* @param string $css a string of raw CSS code with comments removed
* @param CssDocument $parsedCss
*
* @return array<string, array<array-key, array{
* media: string,
Expand All @@ -697,9 +573,9 @@ private function getCssSelectorConverter(): CssSelectorConverter
* e.g., `color: red; height: 4px;`);
* - "line" (the line number, e.g. 42).
*/
private function parseCssRules(string $css): array
private function collateCssRules(CssDocument $parsedCss): array
{
$matches = $this->getCssRuleMatches($css);
$matches = $parsedCss->getStyleRulesData(\array_keys($this->allowedMediaTypes));

$cssRules = [
'inlinable' => [],
Expand Down Expand Up @@ -835,115 +711,6 @@ private function getCssSelectorPrecedence(string $selector): int
return $precedence;
}

/**
* Parses a string of CSS into the media query, selectors and declarations for each ruleset in order.
*
* @param string $css CSS with comments removed
*
* @return array<int, array{media: string, selectors: string, declarations: string}>
* Array of string sub-arrays with the following keys:
* - "media" (the media query string, e.g. "@media screen and (max-width: 480px)",
* or an empty string if not from an `@media` rule);
* - "selectors" (the CSS selector(s), e.g., "*" or "h1, h2");
* - "declarations" (the semicolon-separated CSS declarations for that/those selector(s),
* e.g., "color: red; height: 4px;").
*/
private function getCssRuleMatches(string $css): array
{
$splitCss = $this->splitCssAndMediaQuery($css);

$ruleMatches = [];
foreach ($splitCss as $cssPart) {
// process each part for selectors and definitions
\preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mi', $cssPart['css'], $matches, PREG_SET_ORDER);

foreach ($matches as $cssRule) {
$ruleMatches[] = [
'media' => $cssPart['media'],
'selectors' => $cssRule[1],
'declarations' => $cssRule[2],
];
}
}

return $ruleMatches;
}

/**
* Splits input CSS code into an array of parts for different media queries, in order.
* Each part is an array where:
*
* - key "css" will contain clean CSS code (for @media rules this will be the group rule body within "{...}")
* - key "media" will contain "@media " followed by the media query list, for all allowed media queries,
* or an empty string for CSS not within a media query
*
* Example:
*
* The CSS code
*
* "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
*
* will be parsed into the following array:
*
* 0 => [
* "css" => "h1 { color:red; }",
* "media" => ""
* ],
* 1 => [
* "css" => " h1 {}",
* "media" => "@media "
* ]
*
* @param string $css
*
* @return array<int, array<string, string>>
*/
private function splitCssAndMediaQuery(string $css): array
{
$mediaTypesExpression = '';
if (!empty($this->allowedMediaTypes)) {
$mediaTypesExpression = '|' . \implode('|', \array_keys($this->allowedMediaTypes));
}

$mediaRuleBodyMatcher = '[^{]*+{(?:[^{}]*+{.*})?\\s*+}\\s*+';

$cssSplitForAllowedMediaTypes = \preg_split(
'#(@media\\s++(?:only\\s++)?+(?:(?=[{(])' . $mediaTypesExpression . ')' . $mediaRuleBodyMatcher
. ')#misU',
$css,
-1,
PREG_SPLIT_DELIM_CAPTURE
);

// filter the CSS outside/between allowed @media rules
$cssCleaningMatchers = [
'import/charset directives' => '/\\s*+@(?:import|charset)\\s[^;]++;/i',
'remaining media enclosures' => '/\\s*+@media\\s' . $mediaRuleBodyMatcher . '/isU',
];

$splitCss = [];
foreach ($cssSplitForAllowedMediaTypes as $index => $cssPart) {
$isMediaRule = $index % 2 !== 0;
if ($isMediaRule) {
\preg_match('/^([^{]*+){(.*)}[^}]*+$/s', $cssPart, $matches);
/** @var array<int, string> $matches */
$splitCss[] = [
'css' => $matches[2],
'media' => $matches[1],
];
} else {
$cleanedCss = \trim(\preg_replace($cssCleaningMatchers, '', $cssPart));
if ($cleanedCss !== '') {
$splitCss[] = [
'css' => $cleanedCss,
'media' => '',
];
}
}
}
return $splitCss;
}

/**
* Copies $cssRule into the style attribute of $node.
*
Expand Down Expand Up @@ -1127,7 +894,7 @@ private function generateStyleStringFromSingleDeclarationsArray(array $styleDecl
* declarationsBlock: string,
* line: int
* }> $cssRules
* the "uninlinable" array of CSS rules returned by `parseCssRules`
* the "uninlinable" array of CSS rules returned by `collateCssRules`
*/
private function determineMatchingUninlinableCssRules(array $cssRules): void
{
Expand Down Expand Up @@ -1305,15 +1072,15 @@ private function removeUnsupportedOfTypePseudoClasses(string $selectorPart): str
* Applies `$this->matchingUninlinableCssRules` to `$this->domDocument` by placing them as CSS in a `<style>`
* element.
*
* @param string $uninlinableCss
* @param CssDocument $parsedCss
* This may contain any `@import` or `@font-face` rules that should precede the CSS placed in the `<style>`
* element. If there are no unlinlinable CSS rules to copy there, a `<style>` element will be created
* containing just `$uninlinableCss`. `$uninlinableCss` may be an empty string; if it is, and there are no
* element. If there are no uninlinable CSS rules to copy there, a `<style>` element will be created
* containing only the applicable at-rules from `$parsedCss`. If there are none, and there are also no
* unlinlinable CSS rules, an empty `<style>` element will not be created.
oliverklee marked this conversation as resolved.
Show resolved Hide resolved
*/
private function copyUninlinableCssToStyleNode(string $uninlinableCss): void
private function copyUninlinableCssToStyleNode(CssDocument $parsedCss): void
{
$css = $uninlinableCss;
$css = $parsedCss->renderNonConditionalAtRules();

// avoid including unneeded class dependency if there are no rules
if ($this->getMatchingUninlinableCssRules() !== []) {
Expand Down
Loading