Skip to content

Commit

Permalink
Always intern strings in TypedAst StringPool objects
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 704719681
  • Loading branch information
lauraharker authored and copybara-github committed Dec 10, 2024
1 parent 4a6152d commit 54f9f2f
Show file tree
Hide file tree
Showing 6 changed files with 338 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package com.google.javascript.rhino;

import java.util.List;
import java.util.stream.Stream;

/**
* A JS compatible implementation of the string pool.
*
Expand All @@ -33,4 +36,21 @@ public static String addOrGet(String s) {
}

private RhinoStringPool() {}

/**
* Stub implementation
*
* <p>Ok because it's only used for serialization/deserialization
*/
public static final class LazyInternedStringList {
public LazyInternedStringList(List<String> strings) {}

public String get(int offset) {
return null;
}

public Stream<String> stream() {
return Stream.empty();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,19 @@ private Node createSourceInfoTemplate(SourceFile file) {

private void setOriginalNameIfPresent(AstNode astNode, Node n) {
if (astNode.getOriginalNamePointer() != 0) {
n.setOriginalName(this.stringPool.get(astNode.getOriginalNamePointer()));
n.setOriginalNameFromStringPool(
this.stringPool.getInternedStrings(), astNode.getOriginalNamePointer());
}
}

private String getString(AstNode n) {
return this.stringPool.get(n.getStringValuePointer());
/**
* Creates a new string node with the given token & string value of the AstNode
*
* <p>Prefer calling this method over calling a regular Node.* or IR.* method when possible. This
* method integrates with {@link RhinoStringPool} to cache String interning results.
*/
private Node stringNode(Token token, AstNode n) {
return Node.newString(token, this.stringPool.getInternedStrings(), n.getStringValuePointer());
}

private Node deserializeSingleNode(AstNode n) {
Expand All @@ -348,9 +355,9 @@ private Node deserializeSingleNode(AstNode n) {
case NUMBER_LITERAL:
return IR.number(n.getDoubleValue());
case STRING_LITERAL:
return IR.string(getString(n));
return stringNode(Token.STRINGLIT, n);
case IDENTIFIER:
return IR.name(getString(n));
return stringNode(Token.NAME, n);
case FALSE:
return new Node(Token.FALSE);
case TRUE:
Expand All @@ -362,7 +369,8 @@ private Node deserializeSingleNode(AstNode n) {
case VOID:
return new Node(Token.VOID);
case BIGINT_LITERAL:
return IR.bigint(new BigInteger(getString(n)));
String bigintString = this.stringPool.get(n.getStringValuePointer());
return IR.bigint(new BigInteger(bigintString));
case REGEX_LITERAL:
return new Node(Token.REGEXP);
case ARRAY_LITERAL:
Expand All @@ -377,7 +385,7 @@ private Node deserializeSingleNode(AstNode n) {
case NEW:
return new Node(Token.NEW);
case PROPERTY_ACCESS:
return Node.newString(Token.GETPROP, getString(n));
return stringNode(Token.GETPROP, n);
case ELEMENT_ACCESS:
return new Node(Token.GETELEM);

Expand Down Expand Up @@ -497,10 +505,10 @@ private Node deserializeSingleNode(AstNode n) {
case TEMPLATELIT_STRING:
{
TemplateStringValue templateStringValue = n.getTemplateStringValue();
int cookedPointer = templateStringValue.getCookedStringPointer();
String cookedString = cookedPointer == -1 ? null : this.stringPool.get(cookedPointer);
String rawString = this.stringPool.get(templateStringValue.getRawStringPointer());
return Node.newTemplateLitString(cookedString, rawString);
return Node.newTemplateLitString(
this.stringPool.getInternedStrings(),
templateStringValue.getCookedStringPointer(),
templateStringValue.getRawStringPointer());
}
case NEW_TARGET:
return new Node(Token.NEW_TARGET);
Expand All @@ -509,7 +517,7 @@ private Node deserializeSingleNode(AstNode n) {
case IMPORT_META:
return new Node(Token.IMPORT_META);
case OPTCHAIN_PROPERTY_ACCESS:
return Node.newString(Token.OPTCHAIN_GETPROP, getString(n));
return stringNode(Token.OPTCHAIN_GETPROP, n);
case OPTCHAIN_CALL:
return new Node(Token.OPTCHAIN_CALL);
case OPTCHAIN_ELEMENT_ACCESS:
Expand Down Expand Up @@ -581,21 +589,21 @@ private Node deserializeSingleNode(AstNode n) {
case LABELED_STATEMENT:
return new Node(Token.LABEL);
case LABELED_NAME:
return IR.labelName(getString(n));
return stringNode(Token.LABEL_NAME, n);
case CLASS_MEMBERS:
return new Node(Token.CLASS_MEMBERS);
case METHOD_DECLARATION:
return Node.newString(Token.MEMBER_FUNCTION_DEF, getString(n));
return stringNode(Token.MEMBER_FUNCTION_DEF, n);
case FIELD_DECLARATION:
return Node.newString(Token.MEMBER_FIELD_DEF, getString(n));
return stringNode(Token.MEMBER_FIELD_DEF, n);
case COMPUTED_PROP_FIELD:
return new Node(Token.COMPUTED_FIELD_DEF);
case PARAMETER_LIST:
return new Node(Token.PARAM_LIST);
case RENAMABLE_STRING_KEY:
return IR.stringKey(getString(n));
return stringNode(Token.STRING_KEY, n);
case QUOTED_STRING_KEY:
Node quotedStringKey = IR.stringKey(getString(n));
Node quotedStringKey = stringNode(Token.STRING_KEY, n);
quotedStringKey.setQuotedStringKey();
return quotedStringKey;
case CASE:
Expand All @@ -617,14 +625,14 @@ private Node deserializeSingleNode(AstNode n) {

case RENAMABLE_GETTER_DEF:
case QUOTED_GETTER_DEF:
Node getterDef = Node.newString(Token.GETTER_DEF, getString(n));
Node getterDef = stringNode(Token.GETTER_DEF, n);
if (n.getKind().equals(NodeKind.QUOTED_GETTER_DEF)) {
getterDef.setQuotedStringKey();
}
return getterDef;
case RENAMABLE_SETTER_DEF:
case QUOTED_SETTER_DEF:
Node setterDef = Node.newString(Token.SETTER_DEF, getString(n));
Node setterDef = stringNode(Token.SETTER_DEF, n);
if (n.getKind().equals(NodeKind.QUOTED_SETTER_DEF)) {
setterDef.setQuotedStringKey();
}
Expand All @@ -635,7 +643,7 @@ private Node deserializeSingleNode(AstNode n) {
case IMPORT_SPEC:
return new Node(Token.IMPORT_SPEC);
case IMPORT_STAR:
return Node.newString(Token.IMPORT_STAR, getString(n));
return stringNode(Token.IMPORT_STAR, n);
case EXPORT_SPECS:
return new Node(Token.EXPORT_SPECS);
case EXPORT_SPEC:
Expand Down
23 changes: 13 additions & 10 deletions src/com/google/javascript/jscomp/serialization/StringPool.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;

import com.google.common.collect.ImmutableList;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import com.google.errorprone.annotations.Immutable;
import com.google.javascript.rhino.RhinoStringPool;
import com.google.protobuf.ByteString;
import java.util.ArrayList;
import java.util.LinkedHashMap;

/**
Expand All @@ -34,31 +34,30 @@
* <p>This implies default/unset/0-valued uuint32 StringPool pointers in protos are equivalent to
* the empty string.
*/
@Immutable
public final class StringPool {

private final int maxLength;
private final ImmutableList<String> pool;
private final int maxLength; // max length of any individual string in the pool
private final RhinoStringPool.LazyInternedStringList pool;

public static StringPool fromProto(StringPoolProto proto) {
Wtf8.Decoder decoder = Wtf8.decoder(proto.getMaxLength());

ImmutableList.Builder<String> pool = ImmutableList.builder();
ArrayList<String> pool = new ArrayList<>();
pool.add("");
for (ByteString s : proto.getStringsList()) {
pool.add(decoder.decode(s));
}

return new StringPool(proto.getMaxLength(), pool.build());
return new StringPool(proto.getMaxLength(), pool);
}

public static StringPool empty() {
return EMPTY;
}

private StringPool(int maxLength, ImmutableList<String> pool) {
private StringPool(int maxLength, ArrayList<String> pool) {
this.maxLength = maxLength;
this.pool = pool;
this.pool = new RhinoStringPool.LazyInternedStringList(pool);

checkState(pool.get(0).isEmpty());
}
Expand All @@ -67,6 +66,10 @@ public String get(int offset) {
return this.pool.get(offset);
}

public RhinoStringPool.LazyInternedStringList getInternedStrings() {
return this.pool;
}

public StringPoolProto toProto() {
StringPoolProto.Builder builder = StringPoolProto.newBuilder().setMaxLength(this.maxLength);
this.pool.stream().skip(1).map(Wtf8::encodeToWtf8).forEachOrdered(builder::addStrings);
Expand Down Expand Up @@ -104,7 +107,7 @@ public Builder putAnd(String string) {
}

public StringPool build() {
return new StringPool(this.maxLength, ImmutableList.copyOf(this.pool.keySet()));
return new StringPool(this.maxLength, new ArrayList<>(this.pool.keySet()));
}
}

Expand Down
42 changes: 42 additions & 0 deletions src/com/google/javascript/rhino/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,11 @@ private StringNode(Token token) {
setString(str);
}

StringNode(Token token, RhinoStringPool.LazyInternedStringList stringPool, int offset) {
super(token);
setStringFromStringPool(stringPool, offset);
}

@Override
public boolean isEquivalentTo(
Node node, boolean compareType, boolean recur, boolean jsDoc, boolean sideEffect) {
Expand Down Expand Up @@ -395,6 +400,16 @@ private static final class TemplateLiteralSubstringNode extends Node {
this.raw = RhinoStringPool.addOrGet(raw);
}

private TemplateLiteralSubstringNode(
RhinoStringPool.LazyInternedStringList stringPool,
int cookedOffsetOrNegativeOne,
int rawOffset) {
super(Token.TEMPLATELIT_STRING);
this.cooked =
(cookedOffsetOrNegativeOne == -1) ? null : stringPool.get(cookedOffsetOrNegativeOne);
this.raw = stringPool.get(rawOffset);
}

@Override
public boolean isEquivalentTo(
Node node, boolean compareType, boolean recur, boolean jsDoc, boolean sideEffect) {
Expand Down Expand Up @@ -557,10 +572,22 @@ public static Node newString(Token token, String str) {
return new StringNode(token, str);
}

public static Node newString(
Token token, RhinoStringPool.LazyInternedStringList stringPool, int offset) {
return new StringNode(token, stringPool, offset);
}

public static Node newTemplateLitString(String cooked, String raw) {
return new TemplateLiteralSubstringNode(cooked, raw);
}

public static Node newTemplateLitString(
RhinoStringPool.LazyInternedStringList stringPool,
int cookedOffsetOrNegativeOne,
int rawOffset) {
return new TemplateLiteralSubstringNode(stringPool, cookedOffsetOrNegativeOne, rawOffset);
}

public final Token getToken() {
return token;
}
Expand Down Expand Up @@ -1384,6 +1411,14 @@ public final void setString(String str) {
((StringNode) this).str = RhinoStringPool.addOrGet(str); // RhinoStringPool is null-hostile.
}

final void setStringFromStringPool(
RhinoStringPool.LazyInternedStringList stringPool, int offset) {
// RhinoStringPool.LazyInternedStringList guarantees that the result of calling get(offset) is
// interned. In some cases this is more performance than RhinoStringPool.addOrGet because the
// LazyInternedStringList caches interning results.
((StringNode) this).str = stringPool.get(offset);
}

public final String getRawString() {
return ((TemplateLiteralSubstringNode) this).raw;
}
Expand Down Expand Up @@ -1703,6 +1738,13 @@ public final void setOriginalName(String s) {
this.originalName = (s == null) ? null : RhinoStringPool.addOrGet(s);
}

public final void setOriginalNameFromStringPool(
RhinoStringPool.LazyInternedStringList stringPool, int offset) {
// RhinoStringPool.LazyInternedStringList guarantees that the result of calling get(offset) is
// interned, and sometimes caches results.
this.originalName = stringPool.get(offset);
}

// Copy the original
public final void setOriginalNameFromName(Node name) {
this.originalName = name.getString();
Expand Down
Loading

0 comments on commit 54f9f2f

Please sign in to comment.