Skip to content

Improve template minifier #33016

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: 2.4-develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"magento/magento-composer-installer": ">=0.1.11",
"magento/zendframework1": "dev-master as 1.14.6",
"monolog/monolog": "^2.3",
"nikic/php-parser": "~4.4.0",
"pelago/emogrifier": "^5.0.0",
"php-amqplib/php-amqplib": "~3.0.0",
"phpseclib/mcrypt_compat": "2.0.0",
Expand Down
85 changes: 66 additions & 19 deletions lib/internal/Magento/Framework/View/Template/Html/Minifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

use Magento\Framework\App\Filesystem\DirectoryList;
use Magento\Framework\Filesystem;
use Magento\Framework\View\Template\Html\Minifier\Php;

class Minifier implements MinifierInterface
{
Expand Down Expand Up @@ -116,49 +117,94 @@ public function minify($file)
$dir = dirname($file);
$fileName = basename($file);
$content = $this->readFactory->create($dir)->readFile($fileName);
//Storing Heredocs
$heredocs = [];
$content = preg_replace_callback(
'/<<<([A-z]+).*?\1;/ims',
function ($match) use (&$heredocs) {
$heredocs[] = $match[0];
$heredocs = null;

return '__MINIFIED_HEREDOC__' .(count($heredocs) - 1);
},
// Safely minify PHP code and remove single-line PHP comments by using a parser.
if (null !== $content) {
$parser = (new \PhpParser\ParserFactory())->create(\PhpParser\ParserFactory::PREFER_PHP7);

/**
* Prevent problems with deeply nested ASTs if Xdebug is enabled.
* @see https://github.com/nikic/PHP-Parser/blob/v4.4.0/doc/2_Usage_of_basic_components.markdown#bootstrapping
*/
$nestingLevelConfigValue = ini_get('xdebug.max_nesting_level');

if (false !== $nestingLevelConfigValue) {
ini_set('xdebug.max_nesting_level', '3000');
}

try {
$ast = $parser->parse($content);

$traverser = new \PhpParser\NodeTraverser();
$traverser->addVisitor(new Php\NodeVisitor());
$ast = $traverser->traverse($ast);

$prettyPrinter = new Php\PrettyPrinter();
$content = $prettyPrinter->prettyPrintFile($ast);
$heredocs = $prettyPrinter->getDelayedHeredocs();
} catch (\Error $error) {
// Some PHP code is seemingly invalid, or too complex.
Copy link

@maaarghk maaarghk Jul 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case doesn't it make more sense to just log a warning and return the unminified code instead of guessing that you could do a better job than the PHP parser library at minifying some mangled script using regex?

} finally {
if (false !== $nestingLevelConfigValue) {
ini_set('xdebug.max_nesting_level', $nestingLevelConfigValue);
}
}
}

// Stash the heredocs now if the template could not be parsed.
if (null === $heredocs) {
$content = preg_replace_callback(
'/<<<([A-z]+).*?\1\s*;/ims',
function ($match) use (&$heredocs) {
$heredocs[] = $match[0];

return '__MINIFIED_HEREDOC__' .(count($heredocs) - 1);
},
($content ?? '')
);
);
}

// Remove insignificant spaces before closing HTML tags
// (preserve one space after ]]>, and all spaces inside <pre> and <textarea> tags).
$content = preg_replace(
'#(?<!]]>)\s+</#',
'#(?<!]]>)\s+</(?!(?>textarea|pre)\b)#',
'</',
// Remove redundant spaces after PHP tags that do not start with a print or condition statement,
// and that do not contain any "?".
preg_replace(
'#((?:<\?php\s+(?!echo|print|if|elseif|else)[^\?]*)\?>)\s+#',
'$1 ',
// Remove single space in empty non-inline tags.
preg_replace(
'#(?<!' . implode('|', $this->inlineHtmlTags) . ')\> \<#',
'><',
// Remove redundant spaces outside of tags in which they are relevant.
preg_replace(
'#(?ix)(?>[^\S ]\s*|\s{2,})(?=(?:(?:[^<]++|<(?!/?(?:textarea|pre|script)\b))*+)'
. '(?:<(?>textarea|pre|script)\b|\z))#',
' ',
// Remove single-line comments in <script> tags, except for <![CDATA[ and ]]>.
// Do nothing if the "//" part is seemingly part of a string / URL / RegExp.
preg_replace(
'#(?<!:|\\\\|\'|"|/)//(?!/)(?!\s*\<\!\[)(?!\s*]]\>)[^\n\r]*#',
'#(?<!:|\\\\|\\\|\'|"|/)//(?!/)(?!\s*\<\!\[)(?!\s*]]\>)[^\n\r]*'
. '(?!(?:(?:[^<]++|<(?!/?(?:script)\b))*+)(?:<(?>script)\b|\z))#',
'',
// Remove commented single-line PHP tags in <script> tags.
// Do nothing if the "//" part is seemingly part of a URL / RegExp.
preg_replace(
'#(?<!:|\'|")//[^\n\r<]*(\?\>)#',
' $1',
preg_replace(
'#(?<!:)//[^\n\r]*(\<\?php)[^\n\r]*(\s\?\>)[^\n\r]*#',
'',
($content ?? '')
)
'#(?<!:|\\\)//[^\n\r]*(\<\?(php|=))[^\n\r]*(\s\?\>)[^\n\r]*'
. '(?!(?:(?:[^<]++|<(?!/?(?:script)\b))*+)(?:<(?>script)\b|\z))#',
'',
($content ?? '')
)
)
)
)
)
);

//Restoring Heredocs
// Restore the stashed heredocs.
$content = preg_replace_callback(
'/__MINIFIED_HEREDOC__(\d+)/ims',
function ($match) use ($heredocs) {
Expand All @@ -184,3 +230,4 @@ private function getRelativeGeneratedPath($sourcePath)
return $this->filesystem->getDirectoryRead(DirectoryList::ROOT)->getRelativePath($sourcePath);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php
/**
* Copyright © Magento, Inc. All rights reserved.
* See COPYING.txt for license details.
*/
declare(strict_types=1);

namespace Magento\Framework\View\Template\Html\Minifier\Php;

use PhpParser\Comment;
use PhpParser\Node;
use PhpParser\NodeTraverser;
use PhpParser\NodeVisitorAbstract;

class NodeVisitor extends NodeVisitorAbstract
{
/**
* @var Node[]
*/
private $stack = [];

/**
* @var ?Node
*/
private $previous;

public function beforeTraverse(array $nodes)
{
$this->stack = [];
$this->previous = null;
}

public function enterNode(Node $node)
{
if ($node instanceof Node\Stmt) {
// Mark isolated echo statements, to replace them later with short echo tags.
$parent = empty($this->stack) ?: $this->stack[count($this->stack) - 1];

if ($node instanceof Node\Stmt\InlineHTML) {
$node->setAttribute('parent', $parent);

if (
($this->previous instanceof Node\Stmt\Echo_)
&& ($previousHtmlStatement = $this->previous->getAttribute('previousHtmlStatement'))
&& ($previousHtmlStatement->getAttribute('parent') === $parent)
) {
$this->previous->setAttribute('isSingleEchoStatement', true);
$previousHtmlStatement->setAttribute('hasSingleEchoStatementNext', true);
}
} elseif (
($this->previous instanceof Node\Stmt\InlineHTML)
&& ($this->previous->getAttribute('parent') === $parent)
) {
$node->setAttribute('previousHtmlStatement', $this->previous);
}
}

$this->stack[] = $node;
}

public function leaveNode(Node $node)
{
$this->previous = $node;

array_pop($this->stack);

// Remove nodes that only contain non-doc comments.
if ($node instanceof Node\Stmt\Nop) {
$comments = $node->getComments();
$isSuperfluousNode = true;

foreach ($comments as $key => $comment) {
if ($comment instanceof Comment\Doc) {
$isSuperfluousNode = false;
break;
}
}

if ($isSuperfluousNode) {
return NodeTraverser::REMOVE_NODE;
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
<?php
/**
* Copyright © Magento, Inc. All rights reserved.
* See COPYING.txt for license details.
*/
declare(strict_types=1);

namespace Magento\Framework\View\Template\Html\Minifier\Php;

use PhpParser\Comment;
use PhpParser\Node;
use PhpParser\PrettyPrinter\Standard;

class PrettyPrinter extends Standard
{
/**
* @var string[]
*/
private $delayedHeredocs = [];

protected function resetState()
{
$this->delayedHeredocs = [];
$this->indentLevel = 0;
$this->nl = '';
$this->origTokens = null;
}

protected function setIndentLevel(int $level)
{
// Ignore indentation.
}

protected function indent()
{
// Ignore indentation.
}

protected function outdent()
{
// Ignore indentation.
}

/**
* @param string $heredoc
* @return string
*/
private function getHeredocPlaceholder(string $heredoc): string
{
$index = count($this->delayedHeredocs) + 1;

$this->delayedHeredocs[$index] = $this->handleMagicTokens($heredoc);

return '__MINIFIED_HEREDOC__' . $index;
}

protected function pScalar_String(Node\Scalar\String_ $node): string
{
$result = parent::pScalar_String($node);

return $node->getAttribute('kind') !== Node\Scalar\String_::KIND_HEREDOC
? $result
: $this->getHeredocPlaceholder($result);
}

protected function pScalar_Encapsed(Node\Scalar\Encapsed $node): string
{
$result = parent::pScalar_Encapsed($node);

return $node->getAttribute('kind') !== Node\Scalar\String_::KIND_HEREDOC
? $result
: $this->getHeredocPlaceholder($result);
}

protected function pCommaSeparated(array $nodes): string
{
return $this->pImplode($nodes, ',');
}

protected function pComments(array $comments): string
{
// Only preserve doc comments.
foreach ($comments as $key => $comment) {
if (!$comment instanceof Comment\Doc) {
unset($comments[$key]);
}
}

$formattedComments = [];

foreach ($comments as $comment) {
$formattedComments[] = str_replace("\n", '', $comment->getReformattedText());
}

// Add a space between doc comments to avoid occurrences of "//" that could later be misinterpreted.
return implode(' ', $formattedComments) . ' ';
}

protected function pExpr_Array(Node\Expr\Array_ $node): string
{
$node->setAttribute('kind', Node\Expr\Array_::KIND_SHORT);

return parent::pExpr_Array($node);
}

protected function pStmt_Echo(Node\Stmt\Echo_ $node): string
{
$output = $this->pCommaSeparated($node->exprs);

return $node->getAttribute('isSingleEchoStatement')
? $output . ' '
: 'echo ' . $output . ';';
}

protected function pStmt_InlineHTML(Node\Stmt\InlineHTML $node): string
{
$newline = $node->getAttribute('hasLeadingNewline', true) ? "\n" : '';

return '?>'
. $newline
. $node->value
. ($node->getAttribute('hasSingleEchoStatementNext') ? '<?= ' : '<?php ');
}

/**
* @return string[]
*/
public function getDelayedHeredocs(): array
{
return $this->delayedHeredocs;
}
}
Loading