diff --git a/src/com/google/javascript/jscomp/j2clbuild/super/com/google/javascript/rhino/RhinoStringPool.java b/src/com/google/javascript/jscomp/j2clbuild/super/com/google/javascript/rhino/RhinoStringPool.java index 8ff3e7e1e65..c7c9feec465 100644 --- a/src/com/google/javascript/jscomp/j2clbuild/super/com/google/javascript/rhino/RhinoStringPool.java +++ b/src/com/google/javascript/jscomp/j2clbuild/super/com/google/javascript/rhino/RhinoStringPool.java @@ -16,6 +16,9 @@ package com.google.javascript.rhino; +import java.util.List; +import java.util.stream.Stream; + /** * A JS compatible implementation of the string pool. * @@ -33,4 +36,21 @@ public static String addOrGet(String s) { } private RhinoStringPool() {} + + /** + * Stub implementation + * + *

Ok because it's only used for serialization/deserialization + */ + public static final class LazyInternedStringList { + public LazyInternedStringList(List strings) {} + + public String get(int offset) { + return null; + } + + public Stream stream() { + return Stream.empty(); + } + } } diff --git a/src/com/google/javascript/jscomp/serialization/ScriptNodeDeserializer.java b/src/com/google/javascript/jscomp/serialization/ScriptNodeDeserializer.java index 44458825783..6ad7e380523 100644 --- a/src/com/google/javascript/jscomp/serialization/ScriptNodeDeserializer.java +++ b/src/com/google/javascript/jscomp/serialization/ScriptNodeDeserializer.java @@ -332,12 +332,19 @@ private Node createSourceInfoTemplate(SourceFile file) { private void setOriginalNameIfPresent(AstNode astNode, Node n) { if (astNode.getOriginalNamePointer() != 0) { - n.setOriginalName(this.stringPool.get(astNode.getOriginalNamePointer())); + n.setOriginalNameFromStringPool( + this.stringPool.getInternedStrings(), astNode.getOriginalNamePointer()); } } - private String getString(AstNode n) { - return this.stringPool.get(n.getStringValuePointer()); + /** + * Creates a new string node with the given token & string value of the AstNode + * + *

Prefer calling this method over calling a regular Node.* or IR.* method when possible. This + * method integrates with {@link RhinoStringPool} to cache String interning results. + */ + private Node stringNode(Token token, AstNode n) { + return Node.newString(token, this.stringPool.getInternedStrings(), n.getStringValuePointer()); } private Node deserializeSingleNode(AstNode n) { @@ -348,9 +355,9 @@ private Node deserializeSingleNode(AstNode n) { case NUMBER_LITERAL: return IR.number(n.getDoubleValue()); case STRING_LITERAL: - return IR.string(getString(n)); + return stringNode(Token.STRINGLIT, n); case IDENTIFIER: - return IR.name(getString(n)); + return stringNode(Token.NAME, n); case FALSE: return new Node(Token.FALSE); case TRUE: @@ -362,7 +369,8 @@ private Node deserializeSingleNode(AstNode n) { case VOID: return new Node(Token.VOID); case BIGINT_LITERAL: - return IR.bigint(new BigInteger(getString(n))); + String bigintString = this.stringPool.get(n.getStringValuePointer()); + return IR.bigint(new BigInteger(bigintString)); case REGEX_LITERAL: return new Node(Token.REGEXP); case ARRAY_LITERAL: @@ -377,7 +385,7 @@ private Node deserializeSingleNode(AstNode n) { case NEW: return new Node(Token.NEW); case PROPERTY_ACCESS: - return Node.newString(Token.GETPROP, getString(n)); + return stringNode(Token.GETPROP, n); case ELEMENT_ACCESS: return new Node(Token.GETELEM); @@ -497,10 +505,10 @@ private Node deserializeSingleNode(AstNode n) { case TEMPLATELIT_STRING: { TemplateStringValue templateStringValue = n.getTemplateStringValue(); - int cookedPointer = templateStringValue.getCookedStringPointer(); - String cookedString = cookedPointer == -1 ? null : this.stringPool.get(cookedPointer); - String rawString = this.stringPool.get(templateStringValue.getRawStringPointer()); - return Node.newTemplateLitString(cookedString, rawString); + return Node.newTemplateLitString( + this.stringPool.getInternedStrings(), + templateStringValue.getCookedStringPointer(), + templateStringValue.getRawStringPointer()); } case NEW_TARGET: return new Node(Token.NEW_TARGET); @@ -509,7 +517,7 @@ private Node deserializeSingleNode(AstNode n) { case IMPORT_META: return new Node(Token.IMPORT_META); case OPTCHAIN_PROPERTY_ACCESS: - return Node.newString(Token.OPTCHAIN_GETPROP, getString(n)); + return stringNode(Token.OPTCHAIN_GETPROP, n); case OPTCHAIN_CALL: return new Node(Token.OPTCHAIN_CALL); case OPTCHAIN_ELEMENT_ACCESS: @@ -581,21 +589,21 @@ private Node deserializeSingleNode(AstNode n) { case LABELED_STATEMENT: return new Node(Token.LABEL); case LABELED_NAME: - return IR.labelName(getString(n)); + return stringNode(Token.LABEL_NAME, n); case CLASS_MEMBERS: return new Node(Token.CLASS_MEMBERS); case METHOD_DECLARATION: - return Node.newString(Token.MEMBER_FUNCTION_DEF, getString(n)); + return stringNode(Token.MEMBER_FUNCTION_DEF, n); case FIELD_DECLARATION: - return Node.newString(Token.MEMBER_FIELD_DEF, getString(n)); + return stringNode(Token.MEMBER_FIELD_DEF, n); case COMPUTED_PROP_FIELD: return new Node(Token.COMPUTED_FIELD_DEF); case PARAMETER_LIST: return new Node(Token.PARAM_LIST); case RENAMABLE_STRING_KEY: - return IR.stringKey(getString(n)); + return stringNode(Token.STRING_KEY, n); case QUOTED_STRING_KEY: - Node quotedStringKey = IR.stringKey(getString(n)); + Node quotedStringKey = stringNode(Token.STRING_KEY, n); quotedStringKey.setQuotedStringKey(); return quotedStringKey; case CASE: @@ -617,14 +625,14 @@ private Node deserializeSingleNode(AstNode n) { case RENAMABLE_GETTER_DEF: case QUOTED_GETTER_DEF: - Node getterDef = Node.newString(Token.GETTER_DEF, getString(n)); + Node getterDef = stringNode(Token.GETTER_DEF, n); if (n.getKind().equals(NodeKind.QUOTED_GETTER_DEF)) { getterDef.setQuotedStringKey(); } return getterDef; case RENAMABLE_SETTER_DEF: case QUOTED_SETTER_DEF: - Node setterDef = Node.newString(Token.SETTER_DEF, getString(n)); + Node setterDef = stringNode(Token.SETTER_DEF, n); if (n.getKind().equals(NodeKind.QUOTED_SETTER_DEF)) { setterDef.setQuotedStringKey(); } @@ -635,7 +643,7 @@ private Node deserializeSingleNode(AstNode n) { case IMPORT_SPEC: return new Node(Token.IMPORT_SPEC); case IMPORT_STAR: - return Node.newString(Token.IMPORT_STAR, getString(n)); + return stringNode(Token.IMPORT_STAR, n); case EXPORT_SPECS: return new Node(Token.EXPORT_SPECS); case EXPORT_SPEC: diff --git a/src/com/google/javascript/jscomp/serialization/StringPool.java b/src/com/google/javascript/jscomp/serialization/StringPool.java index f0125793021..de9b9c57af8 100644 --- a/src/com/google/javascript/jscomp/serialization/StringPool.java +++ b/src/com/google/javascript/jscomp/serialization/StringPool.java @@ -19,10 +19,10 @@ import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; -import com.google.common.collect.ImmutableList; import com.google.errorprone.annotations.CanIgnoreReturnValue; -import com.google.errorprone.annotations.Immutable; +import com.google.javascript.rhino.RhinoStringPool; import com.google.protobuf.ByteString; +import java.util.ArrayList; import java.util.LinkedHashMap; /** @@ -34,31 +34,30 @@ *

This implies default/unset/0-valued uuint32 StringPool pointers in protos are equivalent to * the empty string. */ -@Immutable public final class StringPool { - private final int maxLength; - private final ImmutableList pool; + private final int maxLength; // max length of any individual string in the pool + private final RhinoStringPool.LazyInternedStringList pool; public static StringPool fromProto(StringPoolProto proto) { Wtf8.Decoder decoder = Wtf8.decoder(proto.getMaxLength()); - ImmutableList.Builder pool = ImmutableList.builder(); + ArrayList pool = new ArrayList<>(); pool.add(""); for (ByteString s : proto.getStringsList()) { pool.add(decoder.decode(s)); } - return new StringPool(proto.getMaxLength(), pool.build()); + return new StringPool(proto.getMaxLength(), pool); } public static StringPool empty() { return EMPTY; } - private StringPool(int maxLength, ImmutableList pool) { + private StringPool(int maxLength, ArrayList pool) { this.maxLength = maxLength; - this.pool = pool; + this.pool = new RhinoStringPool.LazyInternedStringList(pool); checkState(pool.get(0).isEmpty()); } @@ -67,6 +66,10 @@ public String get(int offset) { return this.pool.get(offset); } + public RhinoStringPool.LazyInternedStringList getInternedStrings() { + return this.pool; + } + public StringPoolProto toProto() { StringPoolProto.Builder builder = StringPoolProto.newBuilder().setMaxLength(this.maxLength); this.pool.stream().skip(1).map(Wtf8::encodeToWtf8).forEachOrdered(builder::addStrings); @@ -104,7 +107,7 @@ public Builder putAnd(String string) { } public StringPool build() { - return new StringPool(this.maxLength, ImmutableList.copyOf(this.pool.keySet())); + return new StringPool(this.maxLength, new ArrayList<>(this.pool.keySet())); } } diff --git a/src/com/google/javascript/rhino/Node.java b/src/com/google/javascript/rhino/Node.java index 86c68a9dae1..90a6fcf3e61 100644 --- a/src/com/google/javascript/rhino/Node.java +++ b/src/com/google/javascript/rhino/Node.java @@ -359,6 +359,11 @@ private StringNode(Token token) { setString(str); } + StringNode(Token token, RhinoStringPool.LazyInternedStringList stringPool, int offset) { + super(token); + setStringFromStringPool(stringPool, offset); + } + @Override public boolean isEquivalentTo( Node node, boolean compareType, boolean recur, boolean jsDoc, boolean sideEffect) { @@ -395,6 +400,16 @@ private static final class TemplateLiteralSubstringNode extends Node { this.raw = RhinoStringPool.addOrGet(raw); } + private TemplateLiteralSubstringNode( + RhinoStringPool.LazyInternedStringList stringPool, + int cookedOffsetOrNegativeOne, + int rawOffset) { + super(Token.TEMPLATELIT_STRING); + this.cooked = + (cookedOffsetOrNegativeOne == -1) ? null : stringPool.get(cookedOffsetOrNegativeOne); + this.raw = stringPool.get(rawOffset); + } + @Override public boolean isEquivalentTo( Node node, boolean compareType, boolean recur, boolean jsDoc, boolean sideEffect) { @@ -557,10 +572,22 @@ public static Node newString(Token token, String str) { return new StringNode(token, str); } + public static Node newString( + Token token, RhinoStringPool.LazyInternedStringList stringPool, int offset) { + return new StringNode(token, stringPool, offset); + } + public static Node newTemplateLitString(String cooked, String raw) { return new TemplateLiteralSubstringNode(cooked, raw); } + public static Node newTemplateLitString( + RhinoStringPool.LazyInternedStringList stringPool, + int cookedOffsetOrNegativeOne, + int rawOffset) { + return new TemplateLiteralSubstringNode(stringPool, cookedOffsetOrNegativeOne, rawOffset); + } + public final Token getToken() { return token; } @@ -1384,6 +1411,14 @@ public final void setString(String str) { ((StringNode) this).str = RhinoStringPool.addOrGet(str); // RhinoStringPool is null-hostile. } + final void setStringFromStringPool( + RhinoStringPool.LazyInternedStringList stringPool, int offset) { + // RhinoStringPool.LazyInternedStringList guarantees that the result of calling get(offset) is + // interned. In some cases this is more performance than RhinoStringPool.addOrGet because the + // LazyInternedStringList caches interning results. + ((StringNode) this).str = stringPool.get(offset); + } + public final String getRawString() { return ((TemplateLiteralSubstringNode) this).raw; } @@ -1703,6 +1738,13 @@ public final void setOriginalName(String s) { this.originalName = (s == null) ? null : RhinoStringPool.addOrGet(s); } + public final void setOriginalNameFromStringPool( + RhinoStringPool.LazyInternedStringList stringPool, int offset) { + // RhinoStringPool.LazyInternedStringList guarantees that the result of calling get(offset) is + // interned, and sometimes caches results. + this.originalName = stringPool.get(offset); + } + // Copy the original public final void setOriginalNameFromName(Node name) { this.originalName = name.getString(); diff --git a/src/com/google/javascript/rhino/RhinoStringPool.java b/src/com/google/javascript/rhino/RhinoStringPool.java index 33a9d440bdc..287adf39c17 100644 --- a/src/com/google/javascript/rhino/RhinoStringPool.java +++ b/src/com/google/javascript/rhino/RhinoStringPool.java @@ -40,14 +40,19 @@ import static com.google.javascript.jscomp.base.JSCompObjects.identical; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Interner; import com.google.common.collect.Interners; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicIntegerArray; +import java.util.stream.Stream; /** * An interning pool for strings used by the Rhino package. * - *

As of 2021-03-16, this custom pool is measurably more performant than `String::intern`. Some - * reasons for this are presented at https://shipilev.net/jvm/anatomy-quarks/10-string-intern/. + *

As of 2021-03-16 and again as of 2024-12-09, this custom pool is measurably more performant + * than `String::intern`. */ public final class RhinoStringPool { @@ -75,9 +80,82 @@ public static boolean uncheckedEquals(String a, String b) { return identical(a, b); } - static String addOrGet(String s) { + public static String addOrGet(String s) { return INTERNER.intern(s); } private RhinoStringPool() {} + + /** + * A list of strings that is lazily interned into a RhinoStringPool as they are accessed. + * + *

The only reason to use this class is for performance optimizations - the {@link + * LazyInternedStringList#newStringNode} method skips re-interning the string. This is a + * performance win if creating a lot of string nodes with the same string. + */ + public static final class LazyInternedStringList { + private final ArrayList pool; + + private final WriteOnlyBitset isInterned; + + public LazyInternedStringList(List pool) { + this.pool = new ArrayList<>(pool); + this.isInterned = new WriteOnlyBitset(pool.size()); + } + + // if Java supported type-brands we would want to use one here to indicate that the returned + // string is always interned but that's not possible without an expensive wrapper type - see + // also the comment on uncheckedEquals(). + public String get(int offset) { + if (!this.isInterned.get(offset)) { + String uninterned = this.pool.get(offset); + String interned = RhinoStringPool.addOrGet(uninterned); + this.pool.set(offset, interned); + this.isInterned.set(offset); // must happen after this.pool.set() to avoid a race condition + return interned; + } + return this.pool.get(offset); + } + + public Stream stream() { + return this.pool.stream(); + } + } + + /** + * Bitset implementation that only supports setting bits from false => true, not vice versa, and + * is not resizable once created. + * + *

This class was originally copied from the JDK BitSet implementation, but has been heavily + * modified to be thread-safe and remove unnecessary functionality. + */ + @VisibleForTesting + static final class WriteOnlyBitset { + /* + * BitSets are packed into arrays of "words." Currently a word is + * an int, which consists of 32 bits, requiring 5 address bits. The choice of word size comes + * from the convenience of using AtomicIntegerArray. + */ + private static final int ADDRESS_BITS_PER_WORD = 5; + private final AtomicIntegerArray bits; + + WriteOnlyBitset(int size) { + this.bits = new AtomicIntegerArray(wordIndex(size - 1) + 1); + } + + void set(int offset) { + int wordIndex = wordIndex(offset); + this.bits.updateAndGet(wordIndex, (prevValue) -> prevValue | (1 << offset)); + } + + boolean get(int offset) { + int wordIndex = wordIndex(offset); + return ((bits.get(wordIndex) & (1 << offset)) != 0); + } + + /** Given a bit index, return word index containing it. */ + private static int wordIndex(int bitIndex) { + return bitIndex >> ADDRESS_BITS_PER_WORD; + } + } } diff --git a/test/com/google/javascript/rhino/RhinoStringPoolTest.java b/test/com/google/javascript/rhino/RhinoStringPoolTest.java new file mode 100644 index 00000000000..6be9b843454 --- /dev/null +++ b/test/com/google/javascript/rhino/RhinoStringPoolTest.java @@ -0,0 +1,154 @@ +/* + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Rhino code, released + * May 6, 1999. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1997-1999 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * Google Inc. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License Version 2 or later (the "GPL"), in which + * case the provisions of the GPL are applicable instead of those above. If + * you wish to allow use of your version of this file only under the terms of + * the GPL and not to allow others to use your version of this file under the + * MPL, indicate your decision by deleting the provisions above and replacing + * them with the notice and other provisions required by the GPL. If you do + * not delete the provisions above, a recipient may use your version of this + * file under either the MPL or the GPL. + * + * ***** END LICENSE BLOCK ***** */ + +package com.google.javascript.rhino; + +import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; + +import com.google.common.collect.ImmutableList; +import com.google.javascript.rhino.RhinoStringPool.LazyInternedStringList; +import com.google.javascript.rhino.RhinoStringPool.WriteOnlyBitset; +import java.util.ArrayList; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public final class RhinoStringPoolTest { + // note - RhinoStringPool intentionally uses static state, so these tests are not really order- + // independent. + + @Test + public void returnsDotEqualsString() { + String newFoo = RhinoStringPool.addOrGet("foo"); + assertThat(newFoo).isEqualTo("foo"); + } + + @Test + public void multipleCallsReturnSameInstance() { + String[] foos = new String[100]; + for (int i = 0; i < 100; i++) { + String newFoo = new String("foo"); // use new keyword to ensure different objects + foos[i] = RhinoStringPool.addOrGet(newFoo); + } + for (int i = 1; i < 100; i++) { + assertThat(foos[i]).isSameInstanceAs(foos[0]); + } + } + + @Test + public void lazyInternedStringList_outOfBounds_throwsException() { + LazyInternedStringList list = new LazyInternedStringList(ImmutableList.of("foo")); + assertThrows(ArrayIndexOutOfBoundsException.class, () -> list.get(100)); + } + + @Test + public void lazyInternedStringList_negativeIndex_throwsException() { + LazyInternedStringList list = new LazyInternedStringList(ImmutableList.of()); + assertThrows(ArrayIndexOutOfBoundsException.class, () -> list.get(-1)); + } + + @Test + public void lazyInternedStringList_getsEqualValuesAsInputList() { + LazyInternedStringList list = new LazyInternedStringList(ImmutableList.of("foo", "bar", "baz")); + assertThat(list.get(0)).isEqualTo("foo"); + assertThat(list.get(1)).isEqualTo("bar"); + assertThat(list.get(2)).isEqualTo("baz"); + } + + @Test + public void lazyInternedStringList_getsSameInstanceAsRhinoStringPool() { + // use new keyword to ensure we're passing unique String instances + ImmutableList strings = + ImmutableList.of(new String("foo"), new String("bar"), new String("baz")); + LazyInternedStringList list = new LazyInternedStringList(strings); + assertThat(list.get(0)).isSameInstanceAs(RhinoStringPool.addOrGet("foo")); + assertThat(list.get(1)).isSameInstanceAs(RhinoStringPool.addOrGet("bar")); + assertThat(list.get(2)).isSameInstanceAs(RhinoStringPool.addOrGet("baz")); + } + + @Test + public void lazyInternedStringList_veryLargeListWorks() { + ArrayList strings = new ArrayList<>(); + for (int i = 0; i < 10000; i++) { + strings.add(new String("foo" + i)); + } + ImmutableList immutableList = ImmutableList.copyOf(strings); + LazyInternedStringList list = new LazyInternedStringList(immutableList); + for (int i = 0; i < 10000; i++) { + assertThat(list.get(i)).isSameInstanceAs(RhinoStringPool.addOrGet("foo" + i)); + } + } + + @Test + public void writeOnlyBitset_initializedToFalse() { + WriteOnlyBitset bitset = new WriteOnlyBitset(5); + for (int i = 0; i < 5; i++) { + assertThat(bitset.get(i)).isFalse(); + } + } + + @Test + public void writeOnlyBitset_setToTrue_thenGetReturnsTrue() { + WriteOnlyBitset bitset = new WriteOnlyBitset(2); + bitset.set(0); + assertThat(bitset.get(0)).isTrue(); + assertThat(bitset.get(1)).isFalse(); + } + + @Test + public void writeOnlyBitset_wordBoundary_setToTrue_thenGetReturnsTrue() { + WriteOnlyBitset bitset = new WriteOnlyBitset(33); + bitset.set(32); + assertThat(bitset.get(32)).isTrue(); + } + + @Test + public void writeOnlyBitset_multipleSetsToTrue_allReturnTrue() { + WriteOnlyBitset bitset = new WriteOnlyBitset(5); + bitset.set(1); + bitset.set(2); + bitset.set(3); + assertThat(bitset.get(0)).isFalse(); + assertThat(bitset.get(1)).isTrue(); + assertThat(bitset.get(2)).isTrue(); + assertThat(bitset.get(3)).isTrue(); + assertThat(bitset.get(4)).isFalse(); + } +}