Skip to content

Provide Module& parameter to operateOnScopeNameUsesAndSentTypes #6096

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/abi/stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ getStackSpace(Index local, Function* func, Index size, Module& wasm) {
block->list.push_back(makeStackRestore());
block->list.push_back(
builder.makeReturn(builder.makeLocalGet(temp, ret->value->type)));
block->finalize();
block->finalize(&wasm);
*ptr = block;
} else {
// restore, then return
Expand All @@ -105,7 +105,7 @@ getStackSpace(Index local, Function* func, Index size, Module& wasm) {
block->list.push_back(makeStackRestore());
block->list.push_back(builder.makeLocalGet(temp, func->getResults()));
}
block->finalize();
block->finalize(&wasm);
func->body = block;
}

Expand Down
7 changes: 4 additions & 3 deletions src/binaryen-c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,7 @@ BinaryenExpressionRef BinaryenBlock(BinaryenModuleRef module,
if (type != BinaryenTypeAuto()) {
ret->finalize(Type(type));
} else {
ret->finalize();
ret->finalize((Module*)module);
}
return static_cast<Expression*>(ret);
}
Expand Down Expand Up @@ -1985,8 +1985,9 @@ void BinaryenExpressionSetType(BinaryenExpressionRef expr, BinaryenType type) {
void BinaryenExpressionPrint(BinaryenExpressionRef expr) {
std::cout << *(Expression*)expr << '\n';
}
void BinaryenExpressionFinalize(BinaryenExpressionRef expr) {
ReFinalizeNode().visit((Expression*)expr);
void BinaryenExpressionFinalize(BinaryenExpressionRef expr,
BinaryenModuleRef module) {
ReFinalizeNode(*(Module*)module).visit((Expression*)expr);
}

BinaryenExpressionRef BinaryenExpressionCopy(BinaryenExpressionRef expr,
Expand Down
3 changes: 2 additions & 1 deletion src/binaryen-c.h
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,8 @@ BINARYEN_API void BinaryenExpressionSetType(BinaryenExpressionRef expr,
// Prints text format of the given expression to stdout.
BINARYEN_API void BinaryenExpressionPrint(BinaryenExpressionRef expr);
// Re-finalizes an expression after it has been modified.
BINARYEN_API void BinaryenExpressionFinalize(BinaryenExpressionRef expr);
BINARYEN_API void BinaryenExpressionFinalize(BinaryenExpressionRef expr,
BinaryenModuleRef module);
// Makes a deep copy of the given expression.
BINARYEN_API BinaryenExpressionRef
BinaryenExpressionCopy(BinaryenExpressionRef expr, BinaryenModuleRef module);
Expand Down
17 changes: 9 additions & 8 deletions src/cfg/Relooper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,11 @@ static wasm::Expression* HandleFollowupMultiples(wasm::Expression* Ret,
}
for (auto& [Id, Body] : Multiple->InnerMap) {
Curr->name = Builder.getBlockBreakName(Id);
Curr->finalize(); // it may now be reachable, via a break
Curr->finalize(
&Builder.getModule()); // it may now be reachable, via a break
auto* Outer = Builder.makeBlock(Curr);
Outer->list.push_back(Body->Render(Builder, InLoop));
Outer->finalize(); // TODO: not really necessary
Outer->finalize(&Builder.getModule()); // TODO: not really necessary
Curr = Outer;
}
Parent->Next = Parent->Next->Next;
Expand All @@ -91,15 +92,15 @@ static wasm::Expression* HandleFollowupMultiples(wasm::Expression* Ret,
} else {
for (auto* Entry : Loop->Entries) {
Curr->name = Builder.getBlockBreakName(Entry->Id);
Curr->finalize();
Curr->finalize(&Builder.getModule());
auto* Outer = Builder.makeBlock(Curr);
Outer->finalize(); // TODO: not really necessary
Outer->finalize(&Builder.getModule()); // TODO: not really necessary
Curr = Outer;
}
}
}
}
Curr->finalize();
Curr->finalize(&Builder.getModule());
return Curr;
}

Expand Down Expand Up @@ -132,7 +133,7 @@ Branch::Render(RelooperBuilder& Builder, Block* Target, bool SetLabel) {
assert(Ancestor);
Ret->list.push_back(Builder.makeShapeContinue(Ancestor->Id));
}
Ret->finalize();
Ret->finalize(&Builder.getModule());
return Ret;
}

Expand Down Expand Up @@ -170,7 +171,7 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) {
}

if (!ProcessedBranchesOut.size()) {
Ret->finalize();
Ret->finalize(&Builder.getModule());
return Ret;
}

Expand Down Expand Up @@ -407,7 +408,7 @@ wasm::Expression* Block::Render(RelooperBuilder& Builder, bool InLoop) {
if (Root) {
Ret->list.push_back(Root);
}
Ret->finalize();
Ret->finalize(&Builder.getModule());

return Ret;
}
Expand Down
48 changes: 39 additions & 9 deletions src/ir/branch-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ template<typename T> void operateOnScopeNameUses(Expression* expr, T func) {
// Similar to operateOnScopeNameUses, but also passes in the type that is sent
// if the branch is taken. The type is none if there is no value.
template<typename T>
void operateOnScopeNameUsesAndSentTypes(Expression* expr, T func) {
void operateOnScopeNameUsesAndSentTypes(Module& wasm,
Expression* expr,
T func) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The root of this sequence of changes appears to be adding Module& wasm here. That leads to needing the module in the new BranchTypeSeeker which leads to Block::finalize needing it, which leads to lots of changes.

However, I do not actually see Module& wasm used here in operateOnScopeNameUsesAndSentTypes, so I don't yet understand the motivation. Reading the code in #6083 I don't see wasm used there. I do see some discussion in https://github.com/WebAssembly/binaryen/pull/6083/files#r1382658351 but I can't understand it 😄 Probably because it speaks about wasm proposals I am not familiar with.

What I am trying to understand is: what would the code look like, that uses Module& wasm here?

I ask because this PR looks like a very large refactoring of the sort we've tried to avoid in general. For example, RefFunc could in theory read the type of the function from the Module, but we purposefully do not do that in ReFinalize::visitRefFunc, and instead rely on it being set initially in a correct manner, and then we just preserve it there (and anyone that changes a Function's type is responsible for updating RefFuncs). In that case it avoids parallelism risks of modifying one function's type while modifying the contents of another, for example - in general, we try to avoid reading global state like this for Expressions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The core problem is that the new resume instruction from typed continuations and the new try_table instruction from the redesigned EH proposal act as a branches where the sent types depends on tag types. We currently depend on being able to determine the sent type of branches just by inspecting the branching Expression, but the new dependencies on tag types means that determining the types of branches requires being able to look up the tags on the module as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The root of this sequence of changes appears to be adding Module& wasm here. That leads to needing the module in the new BranchTypeSeeker which leads to Block::finalize needing it, which leads to lots of changes.

Yes, that's right.

Sorry, I realize that this PR and the linked one do a bad job at showing what the Module& is actually needed for.

From a high-level perspective, the typed continuations proposal allows writing something like this:

(type $ct (cont ...)) 
(tag $mytag (params ...) (results ...))

...
(func ...
  ...
 (block $myblock (results ...)
  (resume $ct (tag $mytag $myblock) (local.get $mycont))
 )
 ...
)

This means that if you suspend to the tag $mytag while running the continuation $mycont, then execution continues after $myblock. The type of the data that is provided to $myblock depends on the types of the tag used and the type of the continuation (the latter is $ct here).

Something very similar will happen for the new EH proposal, where exception handlers are just ordinary blocks, and throw jumps to them, with additional data.

Practically speaking, for operateOnScopeNameUsesAndSentTypes this means that we need to do something like this:

else if (auto* res = expr->dynCast<Resume>) {

...
  Type tag_sig = module.getTags(res->handlerTags[i])->sig;
  <type send to label is calculated from `tag_sig`>
...

}

I agree that this is a rather large refactoring. Id be happy to hear if there are ways of decreasing its footprint somehow.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the explanation!

Can we simply duplicate the type into the resume? This is what we do in practice for e.g. RefFunc today: the type of each RefFunc mirrors the module's information about the function's signature. Likewise a Call mirrors the returns of the signature, etc. While in general such duplication is not great, it allows us to avoid reading global state constantly.

If that can't work, maybe elaborate on what i is in that example, and what type send to label is calculated means in practice? (if those things are referring to some kind of "dynamic" read from the module, then simple mirroring of a single value might not work, but I hope that's not the case here...) For the new EH proposal, at least, I think this should work.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both of the new instructions take a vector of (tag, label) pairs, so they're similar to br_table except that they can potentially send different types to each of the labels. If we want to store the sent types directly in the new Expressions, they would have to be ArenaVector<Type> rather than just a single type.

Copy link
Contributor Author

@frank-emrich frank-emrich Nov 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we simply duplicate the type into the resume?

There is nothing prohibiting this from a technical perspective. Note that in general, each resume instruction has a list of (tag $sometag $someblock) clauses, where my example above only used one. This means that we would have to duplicate the Signatures of all of the tags into the Resume object.

If that can't work, maybe elaborate on what i is in that example,

I just used the i to reflect that the Resume node actually contains a list of tags and a list of blocks, together representing a (tag $sometag $someblock) entry. I was trying to hand-wave this away, because operateOnScopeNameUsesAndSentTypes needs some unrelated adapation to avoid a quadratic blowup because of this, but that is independent from the need to have access to the Module.

But coming back to your actual question: The information we need from the Module is entirely static. We just need access to the param and results types that were given when defining the tags in the tags section of the module.
More concretely, the types of the values that flow into a handler block (such as $myblock in my example above) are then a) the param types of the tag used, followed by b) a continuation type. The latter continuation type is determined entirely by the result types of the tag and the type of the resumed continuation (which can be read off from the instruction itself).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to store the sent types directly in the new Expressions, they would have to be ArenaVector rather than just a single type.

Right, in my previous comment I mentioned storing the signtatures of each of the tag in the Resume node for each (tag ...) clause, but we may equally store the sent types directly. Note that these are often tuples, because the types sent to the handler are the param types of the corresponding tag, followed by a continuation type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thanks, that it's a list of types is the tricky part then.

An ArenaVector<Type> seems like a reasonable way to proceed here, since I don't think we will have very many such nodes and not very long lists in them? For EH at least for C++ I believe the list will always be of length 1 (the single standard C++ exception tag with a pointer in it).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, the size of the ArenaVector<Type> itself will be equal to the number of (tag ...) clauses of the resume instruction. Each Type will then be a tuple type with n_i + 1 elements, where n_i is the number of parameters of the tag used by the i-th clause.
I think it's difficult to give a meaningful estimate of how many (tag ...) clauses a resume instruction will usually have, and how many params the involved tags will have. But I would be surprised to see larger numbers of each.

Going back to your original concerns about this refactoring, I don't quite understand what you meant by the following:

In that case it avoids parallelism risks of modifying one function's type while modifying the contents of another, for example - in general, we try to avoid reading global state like this for Expressions.

Were you concerned that we are relying on some kind of global mutable state here? Luckily, this is not the case. Or was your concern more generally about the fact that in, say, Block::finalize(Module& wasm) we would be relying on information that is maintained outside of the block in question? That is definitely true.
During the refactoring, it seemed like the Module was usually in reach and just needed to be passed on to additional places (which arguably then sums up to: quite a lot of additional places).

I don't mind discarding this PR and adding the ArenaVector to the Resume nodes in #6083 if that is your preferred option.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry if I wasn't clear about the parallelism. Yes, in this case it is safe. I was trying to give an example of why we avoid reading global state in general, one reason for which is that in some situations it is unsafe.

I do prefer to add an ArenaVector. It's less of a change, and it is more consistent with our existing coding style, I think. If we see it adds significant overhead in practice we can always optimize it later.

operateOnScopeNameUses(expr, [&](Name& name) {
// There isn't a delegate mechanism for getting a sent value, so do a direct
// if-else chain. This will need to be updated with new br variants.
Expand Down Expand Up @@ -281,19 +283,20 @@ struct BranchSeeker

Index found = 0;

std::unordered_set<Type> types;

BranchSeeker(Name target) : target(target) {}

void noteFound(Type newType) {
found++;
types.insert(newType);
}
void noteFound() { found++; }

void visitExpression(Expression* curr) {
operateOnScopeNameUsesAndSentTypes(curr, [&](Name& name, Type type) {
if (curr->is<Try>() || curr->is<Rethrow>()) {
// Keep the ignored types of users in sync with
// operateOnScopeNameUsesAndSentTypes and
// operateOnScopeNameUsesAndSentValues
return;
}
operateOnScopeNameUses(curr, [&](Name& name) {
if (name == target) {
noteFound(type);
noteFound();
}
});
}
Expand All @@ -317,6 +320,33 @@ struct BranchSeeker
}
};

// Like BranchSeeker, but accumulates the types sent to the block in question.
struct BranchTypeSeeker
: public PostWalker<BranchTypeSeeker,
UnifiedExpressionVisitor<BranchTypeSeeker>> {
Name target;
Module& wasm;

Index found = 0;

std::unordered_set<Type> types;

BranchTypeSeeker(Module& wasm, Name target) : target(target), wasm(wasm) {}

void noteFound(Type newType) {
found++;
types.insert(newType);
}

void visitExpression(Expression* curr) {
operateOnScopeNameUsesAndSentTypes(wasm, curr, [&](Name& name, Type type) {
if (name == target) {
noteFound(type);
}
});
}
};

// Accumulates all the branches in an entire tree.
struct BranchAccumulator
: public PostWalker<BranchAccumulator,
Expand Down
10 changes: 8 additions & 2 deletions src/ir/type-updating.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ namespace wasm {
struct TypeUpdater
: public ExpressionStackWalker<TypeUpdater,
UnifiedExpressionVisitor<TypeUpdater>> {

Module& wasm;

TypeUpdater(Module& wasm) : wasm(wasm) {}

// Part 1: Scanning

// track names to their blocks, so that when we remove a break to
Expand Down Expand Up @@ -172,8 +177,9 @@ struct TypeUpdater
// adds (or removes) breaks depending on break/switch contents
void discoverBreaks(Expression* curr, int change) {
BranchUtils::operateOnScopeNameUsesAndSentTypes(
curr,
[&](Name& name, Type type) { noteBreakChange(name, change, type); });
wasm, curr, [&](Name& name, Type type) {
noteBreakChange(name, change, type);
});
// TODO: it may be faster to accumulate all changes to a set first, then
// call noteBreakChange on the unique values, as a switch can be quite
// large and have lots of repeated targets.
Expand Down
21 changes: 17 additions & 4 deletions src/ir/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,16 @@ struct ReFinalize
// Re-finalize a single node. This is slow, if you want to refinalize
// an entire ast, use ReFinalize
struct ReFinalizeNode : public OverriddenVisitor<ReFinalizeNode> {

ReFinalizeNode(Module& wasm) : wasm(wasm) {}

template<typename T> void static finalize(Module& wasm, T* curr) {
curr->finalize();
}
void static finalize(Module& wasm, Block* curr) { curr->finalize(&wasm); }

#define DELEGATE(CLASS_TO_VISIT) \
void visit##CLASS_TO_VISIT(CLASS_TO_VISIT* curr) { curr->finalize(); }
void visit##CLASS_TO_VISIT(CLASS_TO_VISIT* curr) { finalize(wasm, curr); }

#include "wasm-delegations.def"

Expand All @@ -173,12 +181,15 @@ struct ReFinalizeNode : public OverriddenVisitor<ReFinalizeNode> {
void visitModule(Module* curr) { WASM_UNREACHABLE("unimp"); }

// given a stack of nested expressions, update them all from child to parent
static void updateStack(ExpressionStack& expressionStack) {
static void updateStack(Module& wasm, ExpressionStack& expressionStack) {
for (int i = int(expressionStack.size()) - 1; i >= 0; i--) {
auto* curr = expressionStack[i];
ReFinalizeNode().visit(curr);
ReFinalizeNode(wasm).visit(curr);
}
}

private:
Module& wasm;
};

// Adds drop() operations where necessary. This lets you not worry about adding
Expand Down Expand Up @@ -209,7 +220,9 @@ struct AutoDrop : public WalkerPass<ExpressionStackWalker<AutoDrop>> {
return acted;
}

void reFinalize() { ReFinalizeNode::updateStack(expressionStack); }
void reFinalize() {
ReFinalizeNode::updateStack(*getModule(), expressionStack);
}

void visitBlock(Block* curr) {
if (curr->list.size() == 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/passes/AlignmentLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ struct AlignmentLowering : public WalkerPass<PostWalker<AlignmentLowering>> {
} else {
WASM_UNREACHABLE("invalid size");
}
block->finalize();
block->finalize(getModule());
return block;
}

Expand Down
10 changes: 5 additions & 5 deletions src/passes/Asyncify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ struct AsyncifyFlow : public Pass {
// something valid (which the optimizer can remove later).
block->list.push_back(builder->makeUnreachable());
}
block->finalize();
block->finalize(module);
func->body = block;
// Making things like returns conditional may alter types.
ReFinalize().walkFunctionInModule(func, module);
Expand Down Expand Up @@ -1062,7 +1062,7 @@ struct AsyncifyFlow : public Pass {
for (auto j = begin; j <= i; j++) {
block->list.push_back(list[j]);
}
block->finalize();
block->finalize(module);
list[begin] = makeMaybeSkip(block);
for (auto j = begin + 1; j <= i; j++) {
list[j] = builder->makeNop();
Expand Down Expand Up @@ -1536,7 +1536,7 @@ struct AsyncifyLocals : public WalkerPass<PostWalker<AsyncifyLocals>> {
}
block->list.push_back(builder->makeLocalSet(i, load));
}
block->finalize();
block->finalize(getModule());
return block;
}

Expand Down Expand Up @@ -1578,7 +1578,7 @@ struct AsyncifyLocals : public WalkerPass<PostWalker<AsyncifyLocals>> {
}
}
block->list.push_back(builder->makeIncStackPos(offset));
block->finalize();
block->finalize(getModule());
return block;
}

Expand Down Expand Up @@ -1831,7 +1831,7 @@ struct Asyncify : public Pass {
builder.makeBinary(
Abstract::getBinary(pointerType, Abstract::GtU), stackPos, stackEnd),
builder.makeUnreachable()));
body->finalize();
body->finalize(module);
auto func = builder.makeFunction(
name, Signature(Type(params), Type::none), {}, body);
module->addFunction(std::move(func));
Expand Down
10 changes: 7 additions & 3 deletions src/passes/CodeFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,11 @@ struct CodeFolding : public WalkerPass<ControlFlowWalker<CodeFolding>> {
auto oldType = curr->type;
// NB: we template-specialize so that this calls the proper finalizer for
// the type
curr->finalize();
if (Block* currBlock = curr->template dynCast<Block>()) {
currBlock->finalize(getModule());
} else {
curr->finalize();
}
// ensure the replacement has the same type, so the outside is not surprised
block->finalize(oldType);
replaceCurrent(block);
Expand Down Expand Up @@ -726,7 +730,7 @@ struct CodeFolding : public WalkerPass<ControlFlowWalker<CodeFolding>> {
// change)
auto* toplevel = old->dynCast<Block>();
if (toplevel) {
toplevel->finalize();
toplevel->finalize(getModule());
}
if (old->type != Type::unreachable) {
inner->list.push_back(builder.makeReturn(old));
Expand All @@ -735,7 +739,7 @@ struct CodeFolding : public WalkerPass<ControlFlowWalker<CodeFolding>> {
}
}
}
inner->finalize();
inner->finalize(getModule());
auto* outer = builder.makeBlock();
outer->list.push_back(inner);
while (!mergeable.empty()) {
Expand Down
Loading