Skip to content

Optimize getDiagnostics for speed #211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions experiments/BenchGetDiagnostics.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<?php
// Autoload required classes
require dirname(__DIR__) . "/vendor/autoload.php";

use Microsoft\PhpParser\{DiagnosticsProvider, Node, Parser, PositionUtilities};


const ITERATIONS = 100;
// Return and print an AST from string contents
$main = function() {
// Instantiate new parser instance
// TODO: Multiple source files to be realistic.
$parser = new Parser();
$t0 = microtime(true);
$astNode = $parser->parseSourceFile(file_get_contents(__DIR__ . '/../src/Parser.php'));
$t1 = microtime(true);
for ($i = 0; $i < ITERATIONS; $i++) {
$diagnostics = DiagnosticsProvider::getDiagnostics($astNode);
}
$t2 = microtime(true);
printf("Average time to generate diagnostics: %.6f\n", ($t2 - $t1) / ITERATIONS);
printf("time to parse: %.6f\n", ($t1 - $t0));
global $__counts;
var_export($__counts);
};
$main();
155 changes: 57 additions & 98 deletions src/DiagnosticsProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,125 +10,79 @@

class DiagnosticsProvider {

/**
* @var string[] maps the token kind to the corresponding name
*/
private static $tokenKindToText;

/**
* @param int $kind (must be a valid token kind)
* @return string
*/
public static function getTextForTokenKind($kind) {
return self::$tokenKindToText[$kind];
}

/**
* This is called when this class is loaded, at the bottom of this file.
* @return void
*/
public static function initTokenKindToText() {
self::$tokenKindToText = \array_flip(\array_merge(
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
TokenStringMaps::KEYWORDS,
TokenStringMaps::RESERVED_WORDS
));
}

/**
* Returns the diagnostic for $node, or null.
* @param \Microsoft\PhpParser\Node $node
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
* @return Diagnostic|null
*/
public static function checkDiagnostics($node) {
if (!isset(self::$tokenKindToText)) {
self::$tokenKindToText = \array_flip(\array_merge(
TokenStringMaps::OPERATORS_AND_PUNCTUATORS,
TokenStringMaps::KEYWORDS,
TokenStringMaps::RESERVED_WORDS
));
if ($node instanceof Token) {
if (\get_class($node) === Token::class) {
return null;
}
return self::checkDiagnosticForUnexpectedToken($node);
}

if ($node instanceof SkippedToken) {
if ($node instanceof Node) {
return $node->getDiagnosticForNode();
}
return null;
}

/**
* @param Token $token
* @return Diagnostic|null
*/
private static function checkDiagnosticForUnexpectedToken($token) {
if ($token instanceof SkippedToken) {
// TODO - consider also attaching parse context information to skipped tokens
// this would allow us to provide more helpful error messages that inform users what to do
// about the problem rather than simply pointing out the mistake.
return new Diagnostic(
DiagnosticKind::Error,
"Unexpected '" .
(self::$tokenKindToText[$node->kind]
?? Token::getTokenKindNameFromValue($node->kind)) .
(self::$tokenKindToText[$token->kind]
?? Token::getTokenKindNameFromValue($token->kind)) .
"'",
$node->start,
$node->getEndPosition() - $node->start
$token->start,
$token->getEndPosition() - $token->start
);
} elseif ($node instanceof MissingToken) {
} elseif ($token instanceof MissingToken) {
return new Diagnostic(
DiagnosticKind::Error,
"'" .
(self::$tokenKindToText[$node->kind]
?? Token::getTokenKindNameFromValue($node->kind)) .
(self::$tokenKindToText[$token->kind]
?? Token::getTokenKindNameFromValue($token->kind)) .
"' expected.",
$node->start,
$node->getEndPosition() - $node->start
$token->start,
$token->getEndPosition() - $token->start
);
}

if ($node === null || $node instanceof Token) {
return null;
}

if ($node instanceof Node) {
if ($node instanceof Node\MethodDeclaration) {
foreach ($node->modifiers as $modifier) {
if ($modifier->kind === TokenKind::VarKeyword) {
return new Diagnostic(
DiagnosticKind::Error,
"Unexpected modifier '" . self::$tokenKindToText[$modifier->kind] . "'",
$modifier->start,
$modifier->length
);
}
}
}
elseif ($node instanceof Node\Statement\NamespaceUseDeclaration) {
if (
$node->useClauses != null
&& \count($node->useClauses->children) > 1
) {
foreach ($node->useClauses->children as $useClause) {
if($useClause instanceof Node\NamespaceUseClause && !is_null($useClause->openBrace)) {
return new Diagnostic(
DiagnosticKind::Error,
"; expected.",
$useClause->getEndPosition(),
1
);
}
}
}
}
else if ($node instanceof Node\Statement\BreakOrContinueStatement) {
if ($node->breakoutLevel === null) {
return null;
}

$breakoutLevel = $node->breakoutLevel;
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
$breakoutLevel = $breakoutLevel->expression;
}

if (
$breakoutLevel instanceof Node\NumericLiteral
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
) {
$literalString = $breakoutLevel->getText();
$firstTwoChars = \substr($literalString, 0, 2);

if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
if (\bindec(\substr($literalString, 2)) > 0) {
return null;
}
}
else if (\intval($literalString, 0) > 0) {
return null;
}
}

if ($breakoutLevel instanceof Token) {
$start = $breakoutLevel->getStartPosition();
}
else {
$start = $breakoutLevel->getStart();
}
$end = $breakoutLevel->getEndPosition();

return new Diagnostic(
DiagnosticKind::Error,
"Positive integer literal expected.",
$start,
$end - $start
);
}
}
return null;
}

/**
Expand All @@ -139,12 +93,17 @@ public static function checkDiagnostics($node) {
public static function getDiagnostics(Node $n) : array {
$diagnostics = [];

foreach ($n->getDescendantNodesAndTokens() as $node) {
/**
* @param \Microsoft\PhpParser\Node|\Microsoft\PhpParser\Token $node
*/
$n->walkDescendantNodesAndTokens(function($node) use (&$diagnostics) {
if (($diagnostic = self::checkDiagnostics($node)) !== null) {
$diagnostics[] = $diagnostic;
}
}
});

return $diagnostics;
}
}

DiagnosticsProvider::initTokenKindToText();
51 changes: 49 additions & 2 deletions src/Node.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,55 @@ public function getDescendantNodesAndTokens(callable $shouldDescendIntoChildrenF
// TODO - write unit tests to prove invariants
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
foreach ($this->getChildNodesAndTokens() as $child) {
// Check possible types of $child, most frequent first
if ($child instanceof Node) {
yield $child;
if ($shouldDescendIntoChildrenFn == null || $shouldDescendIntoChildrenFn($child)) {
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
yield from $child->getDescendantNodesAndTokens($shouldDescendIntoChildrenFn);
}
} elseif ($child instanceof Token) {
yield $child;
}
}
}

/**
* Iterate over all descendant Nodes and Tokens, calling $callback.
* This can often be faster than getDescendantNodesAndTokens
* if you just need to call something and don't need a generator.
*
* @param callable $callback a callback that accepts Node|Token
* @param callable|null $shouldDescendIntoChildrenFn
* @return void
*/
public function walkDescendantNodesAndTokens(callable $callback, callable $shouldDescendIntoChildrenFn = null) {
// TODO - write unit tests to prove invariants
// (concatenating all descendant Tokens should produce document, concatenating all Nodes should produce document)
foreach (static::CHILD_NAMES as $name) {
$child = $this->$name;
// Check possible types of $child, most frequent first
if ($child instanceof Token) {
$callback($child);
} elseif ($child instanceof Node) {
$callback($child);
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($child)) {
$child->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
}
} elseif (\is_array($child)) {
foreach ($child as $childElement) {
if ($childElement instanceof Token) {
$callback($childElement);
} elseif ($childElement instanceof Node) {
$callback($childElement);
if ($shouldDescendIntoChildrenFn === null || $shouldDescendIntoChildrenFn($childElement)) {
$childElement->walkDescendantNodesAndTokens($callback, $shouldDescendIntoChildrenFn);
}
}
}
}
}
}

/**
* Gets a generator containing all descendant Nodes.
* @param callable|null $shouldDescendIntoChildrenFn
Expand Down Expand Up @@ -639,4 +677,13 @@ private function addToImportTable($alias, $functionOrConst, $namespaceNameParts,
}
return array($namespaceImportTable, $functionImportTable, $constImportTable);
}

/**
* This is overridden in subclasses
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
* @internal
*/
public function getDiagnosticForNode() {
return null;
}
}
21 changes: 21 additions & 0 deletions src/Node/MethodDeclaration.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

namespace Microsoft\PhpParser\Node;

use Microsoft\PhpParser\Diagnostic;
use Microsoft\PhpParser\DiagnosticKind;
use Microsoft\PhpParser\DiagnosticsProvider;
use Microsoft\PhpParser\FunctionLike;
use Microsoft\PhpParser\Node;
use Microsoft\PhpParser\Token;
Expand Down Expand Up @@ -52,4 +55,22 @@ public function isStatic() : bool {
public function getName() {
return $this->name->getText($this->getFileContents());
}

/**
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
* @internal
* @override
*/
public function getDiagnosticForNode() {
foreach ($this->modifiers as $modifier) {
if ($modifier->kind === TokenKind::VarKeyword) {
return new Diagnostic(
DiagnosticKind::Error,
"Unexpected modifier '" . DiagnosticsProvider::getTextForTokenKind($modifier->kind) . "'",
$modifier->start,
$modifier->length
);
}
}
}
}
52 changes: 52 additions & 0 deletions src/Node/Statement/BreakOrContinueStatement.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@

namespace Microsoft\PhpParser\Node\Statement;

use Microsoft\PhpParser\Diagnostic;
use Microsoft\PhpParser\DiagnosticKind;
use Microsoft\PhpParser\Node;
use Microsoft\PhpParser\Node\Expression;
use Microsoft\PhpParser\Node\StatementNode;
use Microsoft\PhpParser\Token;
use Microsoft\PhpParser\TokenKind;

class BreakOrContinueStatement extends StatementNode {
/** @var Token */
Expand All @@ -23,4 +27,52 @@ class BreakOrContinueStatement extends StatementNode {
'breakoutLevel',
'semicolon'
];

/**
* @return Diagnostic|null - Callers should use DiagnosticsProvider::getDiagnostics instead
* @internal
* @override
*/
public function getDiagnosticForNode() {
if ($this->breakoutLevel === null) {
return null;
}

$breakoutLevel = $this->breakoutLevel;
while ($breakoutLevel instanceof Node\Expression\ParenthesizedExpression) {
$breakoutLevel = $breakoutLevel->expression;
}

if (
$breakoutLevel instanceof Node\NumericLiteral
&& $breakoutLevel->children->kind === TokenKind::IntegerLiteralToken
) {
$literalString = $breakoutLevel->getText();
$firstTwoChars = \substr($literalString, 0, 2);

if ($firstTwoChars === '0b' || $firstTwoChars === '0B') {
if (\bindec(\substr($literalString, 2)) > 0) {
return null;
}
}
else if (\intval($literalString, 0) > 0) {
return null;
}
}

if ($breakoutLevel instanceof Token) {
$start = $breakoutLevel->getStartPosition();
}
else {
$start = $breakoutLevel->getStart();
}
$end = $breakoutLevel->getEndPosition();

return new Diagnostic(
DiagnosticKind::Error,
"Positive integer literal expected.",
$start,
$end - $start
);
}
}
Loading