Skip to content
This repository has been archived by the owner on Nov 10, 2023. It is now read-only.

Commit

Permalink
Intern identifiers
Browse files Browse the repository at this point in the history
Summary:
Intern identifiers of:
* function parameter names
* function argument names
* dot-expression rhs (e. g. `[].append`)

We do a lot of hashmap lookups with these strings (e. g. each time we invoke `lll.append(x)`, even with non-identity-hash-map, lookup by identity-equal string is faster (no need to perform deep string comparison).

This is exactly the same diff as submitted to Bazel PR, interning is performed with external interner, but since in Buck we do a lot of `String.intern`, probably we should just do `String.intern` in our version instead of external interner.

`String.intern` was expensive in old Java days, I'm not sure how good is it nowadays.

Upstream PR: bazelbuild/bazel#12521

Reviewed By: mzlee

fbshipit-source-id: 25f3feeb216e64269e2226a003a0b9b5695f4c6b
  • Loading branch information
stepancheg authored and facebook-github-bot committed Mar 10, 2021
1 parent e4d865d commit f557d47
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 9 deletions.
1 change: 1 addition & 0 deletions third-party/java/bazel/README.facebook
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ exists here to simplify sync with upstream if we ever do it.
* D26877126 Reuse positional array in native calls where possible
* D26877128 Faster parameter type check for Object
* D26877129 Faster dict.keys
* D26877121 Intern identifiers
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import javax.annotation.Nullable;
import net.starlark.java.annot.StarlarkAnnotations;
import net.starlark.java.annot.StarlarkMethod;
import net.starlark.java.syntax.StarlarkStringInterner;

/** Helper functions for StarlarkMethod-annotated fields and methods. */
final class CallUtils {
Expand Down Expand Up @@ -132,11 +133,15 @@ private static CacheValue buildCacheValue(Key key) {
continue;
}

// Identifiers are interned in the parser, intern them here too
// to speed up lookup in dot expressions.
String name = StarlarkStringInterner.intern(callable.name());

// regular method
methods.put(callable.name(), descriptor);
methods.put(name, descriptor);

// field method?
if (descriptor.isStructField() && fields.put(callable.name(), descriptor) != null) {
if (descriptor.isStructField() && fields.put(name, descriptor) != null) {
// TODO(b/72113542): Validate with annotation processor instead of at runtime.
throw new IllegalArgumentException(
String.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ java_library(
"FlowStatement.java",
"ForStatement.java",
"Identifier.java",
"StarlarkStringInterner.java",
"IfStatement.java",
"IndexExpression.java",
"IntLiteral.java",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,12 @@ private String intern(String s) {
return prev != null ? prev : s;
}

// Store a string in the global string interner.
// Use carefully because global interning is not free.
private String internGlobally(String s) {
return StarlarkStringInterner.intern(s);
}

// Returns a token's string form as used in error messages.
private static String tokenString(TokenKind kind, @Nullable Object value) {
return kind == TokenKind.STRING
Expand Down Expand Up @@ -416,6 +422,7 @@ private Argument parseArgument() {
expr = parseTest();
if (expr instanceof Identifier) {
Identifier id = (Identifier) expr;
id = new Identifier(locs, internGlobally(id.getName()), id.getStartOffset());
// parse a named argument
if (token.kind == TokenKind.EQUALS) {
nextToken();
Expand All @@ -434,22 +441,22 @@ private Parameter parseParameter() {
// **kwargs
if (token.kind == TokenKind.STAR_STAR) {
int starStarOffset = nextToken();
Identifier id = parseIdent();
Identifier id = parseIdent(false);
return new Parameter.StarStar(locs, starStarOffset, id);
}

// * or *args
if (token.kind == TokenKind.STAR) {
int starOffset = nextToken();
if (token.kind == TokenKind.IDENTIFIER) {
Identifier id = parseIdent();
Identifier id = parseIdent(false);
return new Parameter.Star(locs, starOffset, id);
}
return new Parameter.Star(locs, starOffset, null);
}

// name=default
Identifier id = parseIdent();
Identifier id = parseIdent(true);
if (token.kind == TokenKind.EQUALS) {
nextToken(); // TODO: save token pos?
Expression expr = parseTest();
Expand Down Expand Up @@ -499,7 +506,7 @@ private ImmutableList<Argument> parseArguments() {
private Expression parseSelectorSuffix(Expression e) {
int dotOffset = expect(TokenKind.DOT);
if (token.kind == TokenKind.IDENTIFIER) {
Identifier id = parseIdent();
Identifier id = parseIdent(true);
return new DotExpression(locs, e, dotOffset, id);
}

Expand Down Expand Up @@ -592,7 +599,7 @@ private Expression parsePrimary() {
return parseStringLiteral();

case IDENTIFIER:
return parseIdent();
return parseIdent(false);

case LBRACKET: // [...]
return parseListMaker();
Expand Down Expand Up @@ -854,14 +861,17 @@ private Expression parseDictExpression() {
return makeErrorExpression(lbraceOffset, end);
}

private Identifier parseIdent() {
private Identifier parseIdent(boolean intern) {
if (token.kind != TokenKind.IDENTIFIER) {
int start = token.start;
int end = expect(TokenKind.IDENTIFIER);
return makeErrorExpression(start, end);
}

String name = (String) token.value;
if (intern) {
name = internGlobally(name);
}
int offset = nextToken();
return new Identifier(locs, name, offset);
}
Expand Down Expand Up @@ -1190,7 +1200,7 @@ private ForStatement parseForStatement() {
// def_stmt = DEF IDENTIFIER '(' arguments ')' ':' suite
private DefStatement parseDefStatement() {
int defOffset = expect(TokenKind.DEF);
Identifier ident = parseIdent();
Identifier ident = parseIdent(false);
expect(TokenKind.LPAREN);
ImmutableList<Parameter> params = parseParameters();
expect(TokenKind.RPAREN);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package net.starlark.java.syntax;

import com.google.common.collect.Interner;
import com.google.common.collect.Interners;

/**
* Common place to intern strings in Starlark interpreter.
*
* <p>Interned strings are much faster to lookup, which is important, for example, when evaluating
* expression {@code foo.bar}.
*/
public class StarlarkStringInterner {
private StarlarkStringInterner() {}

private static final Interner<String> INTERNER = Interners.newWeakInterner();

/** Weak intern the string. */
public static String intern(String string) {
return INTERNER.intern(string);
}
}

0 comments on commit f557d47

Please sign in to comment.