Skip to content

Commit dfcae55

Browse files
authored
StringGathering pass (#6257)
This pass finds all string.const and creates globals for them. After this transform, no string.const appears anywhere but in a global, and each string appears in one global which is then global.get-ed everywhere. This avoids overhead in VMs where executing a string.const is an allocation, and is also a good step towards imported strings. For that, this pass will be extended from gathering to a full lowering pass, which will first gather into globals as this pass does, and then turn each of those globals with a string.const into an imported externref. (For that reason this pass is in a file called StringLowering, as the two passes will share much of their code, and the larger pass should decide the name I think.) This pass runs in -O2 and above. Repeated executions have no downside (see details in code).
1 parent 396a826 commit dfcae55

File tree

9 files changed

+290
-66
lines changed

9 files changed

+290
-66
lines changed

src/ir/type-updating.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,6 @@ class TypeMapper : public GlobalTypeRewriter {
443443

444444
std::unordered_map<HeapType, Signature> newSignatures;
445445

446-
public:
447446
TypeMapper(Module& wasm, const TypeUpdates& mapping)
448447
: GlobalTypeRewriter(wasm), mapping(mapping) {}
449448

src/passes/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ set(passes_SOURCES
9090
SignaturePruning.cpp
9191
SignatureRefining.cpp
9292
SignExtLowering.cpp
93+
StringLowering.cpp
9394
Strip.cpp
9495
StripTargetFeatures.cpp
9596
RedundantSetElimination.cpp

src/passes/StringLowering.cpp

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/*
2+
* Copyright 2024 WebAssembly Community Group participants
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
//
18+
// Utilities for lowering strings into simpler things.
19+
//
20+
// StringGathering collects all string.const operations and stores them in
21+
// globals, avoiding them appearing in code that can run more than once (which
22+
// can have overhead in VMs).
23+
//
24+
// Building on that, an extended version of StringGathering will also replace
25+
// those new globals with imported globals of type externref, for use with the
26+
// string imports proposal. String operations will likewise need to be lowered.
27+
// TODO
28+
//
29+
30+
#include <algorithm>
31+
32+
#include "ir/module-utils.h"
33+
#include "ir/names.h"
34+
#include "pass.h"
35+
#include "wasm-builder.h"
36+
#include "wasm.h"
37+
38+
namespace wasm {
39+
40+
struct StringGathering : public Pass {
41+
// All the strings we found in the module.
42+
std::vector<Name> strings;
43+
44+
// Pointers to all StringConsts, so that we can replace them.
45+
using StringPtrs = std::vector<Expression**>;
46+
StringPtrs stringPtrs;
47+
48+
// Main entry point.
49+
void run(Module* module) override {
50+
processModule(module);
51+
addGlobals(module);
52+
replaceStrings(module);
53+
}
54+
55+
// Scan the entire wasm to find the relevant strings to populate our global
56+
// data structures.
57+
void processModule(Module* module) {
58+
struct StringWalker : public PostWalker<StringWalker> {
59+
StringPtrs& stringPtrs;
60+
61+
StringWalker(StringPtrs& stringPtrs) : stringPtrs(stringPtrs) {}
62+
63+
void visitStringConst(StringConst* curr) {
64+
stringPtrs.push_back(getCurrentPointer());
65+
}
66+
};
67+
68+
ModuleUtils::ParallelFunctionAnalysis<StringPtrs> analysis(
69+
*module, [&](Function* func, StringPtrs& stringPtrs) {
70+
if (!func->imported()) {
71+
StringWalker(stringPtrs).walk(func->body);
72+
}
73+
});
74+
75+
// Also walk the global module code (for simplicity, also add it to the
76+
// function map, using a "function" key of nullptr).
77+
auto& globalStrings = analysis.map[nullptr];
78+
StringWalker(globalStrings).walkModuleCode(module);
79+
80+
// Combine all the strings.
81+
std::unordered_set<Name> stringSet;
82+
for (auto& [_, currStringPtrs] : analysis.map) {
83+
for (auto** stringPtr : currStringPtrs) {
84+
stringSet.insert((*stringPtr)->cast<StringConst>()->string);
85+
stringPtrs.push_back(stringPtr);
86+
}
87+
}
88+
89+
// Sort the strings for determinism (alphabetically).
90+
strings = std::vector<Name>(stringSet.begin(), stringSet.end());
91+
std::sort(strings.begin(), strings.end());
92+
}
93+
94+
// For each string, the name of the global that replaces it.
95+
std::unordered_map<Name, Name> stringToGlobalName;
96+
97+
Type nnstringref = Type(HeapType::string, NonNullable);
98+
99+
// Existing globals already in the form we emit can be reused. That is, if
100+
// we see
101+
//
102+
// (global $foo (ref string) (string.const ..))
103+
//
104+
// then we can just use that as the global for that string. This avoids
105+
// repeated executions of the pass adding more and more globals.
106+
//
107+
// Note that we don't note these in newNames: They are already in the right
108+
// sorted position, before any uses, as we use the first of them for each
109+
// string. Only actually new names need sorting.
110+
//
111+
// Any time we reuse a global, we must not modify its body (or else we'd
112+
// replace the global that all others read from); we note them here and
113+
// avoid them in replaceStrings later to avoid such trampling.
114+
std::unordered_set<Expression**> stringPtrsToPreserve;
115+
116+
void addGlobals(Module* module) {
117+
// Note all the new names we create for the sorting later.
118+
std::unordered_set<Name> newNames;
119+
120+
// Find globals to reuse (see comment on stringPtrsToPreserve for context).
121+
for (auto& global : module->globals) {
122+
if (global->type == nnstringref && !global->imported()) {
123+
if (auto* stringConst = global->init->dynCast<StringConst>()) {
124+
auto& globalName = stringToGlobalName[stringConst->string];
125+
if (!globalName.is()) {
126+
// This is the first global for this string, use it.
127+
globalName = global->name;
128+
stringPtrsToPreserve.insert(&global->init);
129+
}
130+
}
131+
}
132+
}
133+
134+
Builder builder(*module);
135+
for (Index i = 0; i < strings.size(); i++) {
136+
auto& globalName = stringToGlobalName[strings[i]];
137+
if (globalName.is()) {
138+
// We are reusing a global for this one.
139+
continue;
140+
}
141+
142+
auto& string = strings[i];
143+
auto name = Names::getValidGlobalName(
144+
*module, std::string("string.const_") + std::string(string.str));
145+
globalName = name;
146+
newNames.insert(name);
147+
auto* stringConst = builder.makeStringConst(string);
148+
auto global =
149+
builder.makeGlobal(name, nnstringref, stringConst, Builder::Immutable);
150+
module->addGlobal(std::move(global));
151+
}
152+
153+
// Sort our new globals to the start, as other global initializers may use
154+
// them (and it would be invalid for us to appear after a use). This sort is
155+
// a simple way to ensure that we validate, but it may be unoptimal (we
156+
// leave that for reorder-globals).
157+
std::stable_sort(
158+
module->globals.begin(),
159+
module->globals.end(),
160+
[&](const std::unique_ptr<Global>& a, const std::unique_ptr<Global>& b) {
161+
return newNames.count(a->name) && !newNames.count(b->name);
162+
});
163+
}
164+
165+
void replaceStrings(Module* module) {
166+
Builder builder(*module);
167+
for (auto** stringPtr : stringPtrs) {
168+
if (stringPtrsToPreserve.count(stringPtr)) {
169+
continue;
170+
}
171+
auto* stringConst = (*stringPtr)->cast<StringConst>();
172+
auto globalName = stringToGlobalName[stringConst->string];
173+
*stringPtr = builder.makeGlobalGet(globalName, nnstringref);
174+
}
175+
}
176+
};
177+
178+
Pass* createStringGatheringPass() { return new StringGathering(); }
179+
180+
} // namespace wasm

src/passes/pass.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,9 @@ void PassRegistry::registerPasses() {
475475
"ssa-nomerge",
476476
"ssa-ify variables so that they have a single assignment, ignoring merges",
477477
createSSAifyNoMergePass);
478+
registerPass("string-gathering",
479+
"gathers wasm strings to globals",
480+
createStringGatheringPass);
478481
registerPass(
479482
"strip", "deprecated; same as strip-debug", createStripDebugPass);
480483
registerPass("stack-check",
@@ -710,6 +713,11 @@ void PassRunner::addDefaultGlobalOptimizationPostPasses() {
710713
addIfNoDWARFIssues("simplify-globals");
711714
}
712715
addIfNoDWARFIssues("remove-unused-module-elements");
716+
if (options.optimizeLevel >= 2 && wasm->features.hasStrings()) {
717+
// Gather strings to globals right before reorder-globals, which will then
718+
// sort them properly.
719+
addIfNoDWARFIssues("string-gathering");
720+
}
713721
if (options.optimizeLevel >= 2 || options.shrinkLevel >= 1) {
714722
addIfNoDWARFIssues("reorder-globals");
715723
}

src/passes/passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ Pass* createSimplifyLocalsNoTeePass();
153153
Pass* createSimplifyLocalsNoStructurePass();
154154
Pass* createSimplifyLocalsNoTeeNoStructurePass();
155155
Pass* createStackCheckPass();
156+
Pass* createStringGatheringPass();
156157
Pass* createStripDebugPass();
157158
Pass* createStripDWARFPass();
158159
Pass* createStripProducersPass();

test/lit/help/wasm-opt.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@
467467
;; CHECK-NEXT: --stack-check enforce limits on llvm's
468468
;; CHECK-NEXT: __stack_pointer global
469469
;; CHECK-NEXT:
470+
;; CHECK-NEXT: --string-gathering gathers wasm strings to globals
471+
;; CHECK-NEXT:
470472
;; CHECK-NEXT: --strip deprecated; same as strip-debug
471473
;; CHECK-NEXT:
472474
;; CHECK-NEXT: --strip-debug strip debug info (including the

test/lit/help/wasm2js.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,8 @@
426426
;; CHECK-NEXT: --stack-check enforce limits on llvm's
427427
;; CHECK-NEXT: __stack_pointer global
428428
;; CHECK-NEXT:
429+
;; CHECK-NEXT: --string-gathering gathers wasm strings to globals
430+
;; CHECK-NEXT:
429431
;; CHECK-NEXT: --strip deprecated; same as strip-debug
430432
;; CHECK-NEXT:
431433
;; CHECK-NEXT: --strip-debug strip debug info (including the

test/lit/passes/simplify-globals-strings.wast

Lines changed: 0 additions & 65 deletions
This file was deleted.

0 commit comments

Comments
 (0)