Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/ir/bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,30 @@ inline Expression* makeSignExt(Expression* value, Index bytes, Module& wasm) {
}
}

// Given a value that is read from a field, as a replacement for a
// StructGet/ArrayGet that we inferred the value of, and given the signedness of
// the get and the field info, if we are doing a signed read of a packed field
// then sign-extend it, or if it is unsigned then truncate. This fixes up cases
// where we can replace the StructGet/ArrayGet with the value we know must be
// there (without making any assumptions about |value|, that is, we do not
// assume it has been truncated already).
inline Expression* makePackedFieldGet(Expression* value,
const Field& field,
bool signed_,
Module& wasm) {
if (!field.isPacked()) {
return value;
}

if (signed_) {
return makeSignExt(value, field.getByteSize(), wasm);
}

Builder builder(wasm);
auto mask = Bits::lowBitMask(field.getByteSize() * 8);
return builder.makeBinary(AndInt32, value, builder.makeConst(int32_t(mask)));
}

// getMaxBits() helper that has pessimistic results for the bits used in locals.
struct DummyLocalInfoProvider {
Index getMaxBitsForLocal(LocalGet* get) {
Expand Down
9 changes: 2 additions & 7 deletions src/passes/ConstantFieldPropagation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,8 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
Expression* value = info.makeExpression(*getModule());
auto field = GCTypeUtils::getField(type, curr->index);
assert(field);
if (field->isPacked()) {
// We cannot just pass through a value that is packed, as the input gets
// truncated.
auto mask = Bits::lowBitMask(field->getByteSize() * 8);
value =
builder.makeBinary(AndInt32, value, builder.makeConst(int32_t(mask)));
}
value =
Bits::makePackedFieldGet(value, *field, curr->signed_, *getModule());
replaceCurrent(builder.makeSequence(
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)), value));
changed = true;
Expand Down
35 changes: 8 additions & 27 deletions src/passes/Heap2Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,21 +649,6 @@ struct Struct2Local : PostWalker<Struct2Local> {
curr->finalize();
}

// Add a mask for packed fields. We add masks on sets rather than on gets
// because gets tend to be more numerous both in code appearances and in
// runtime execution. As a result of masking on sets, the value in the local
// is always the masked unsigned value (which is also nice for debugging,
// incidentally).
Expression* addMask(Expression* value, const Field& field) {
if (!field.isPacked()) {
return value;
}

auto mask = Bits::lowBitMask(field.getByteSize() * 8);
return builder.makeBinary(
AndInt32, value, builder.makeConst(int32_t(mask)));
}

void visitStructNew(StructNew* curr) {
if (curr != allocation) {
return;
Expand Down Expand Up @@ -710,9 +695,7 @@ struct Struct2Local : PostWalker<Struct2Local> {
// Copy them to the normal ones.
for (Index i = 0; i < tempIndexes.size(); i++) {
auto* value = builder.makeLocalGet(tempIndexes[i], fields[i].type);
// Add a mask on the values we write.
contents.push_back(
builder.makeLocalSet(localIndexes[i], addMask(value, fields[i])));
contents.push_back(builder.makeLocalSet(localIndexes[i], value));
}

// TODO Check if the nondefault case does not increase code size in some
Expand All @@ -724,8 +707,6 @@ struct Struct2Local : PostWalker<Struct2Local> {
//
// Note that we must assign the defaults because we might be in a loop,
// that is, there might be a previous value.
//
// Note there is no need to mask as these are zeros anyhow.
for (Index i = 0; i < localIndexes.size(); i++) {
contents.push_back(builder.makeLocalSet(
localIndexes[i],
Expand Down Expand Up @@ -786,8 +767,7 @@ struct Struct2Local : PostWalker<Struct2Local> {
// write the data to the local instead of the heap allocation.
replaceCurrent(builder.makeSequence(
builder.makeDrop(curr->ref),
builder.makeLocalSet(localIndexes[curr->index],
addMask(curr->value, fields[curr->index]))));
builder.makeLocalSet(localIndexes[curr->index], curr->value)));
}

void visitStructGet(StructGet* curr) {
Expand All @@ -813,11 +793,12 @@ struct Struct2Local : PostWalker<Struct2Local> {
refinalize = true;
}
Expression* value = builder.makeLocalGet(localIndexes[curr->index], type);
if (field.isPacked() && curr->signed_) {
// The value in the local is the masked unsigned value, which we must
// sign-extend.
value = Bits::makeSignExt(value, field.getByteSize(), wasm);
}
// Note that in theory we could try to do better here than to fix up the
// packing and signedness on gets: we could truncate on sets. That would be
// more efficient if all gets are unsigned, as gets outnumber sets in
// general. However, signed gets make that more complicated, so leave this
// for other opts to handle.
Comment on lines +799 to +800
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do signed gets make that more complicated? If you truncate the value on the set and sign extend it again on the get, you'll still get the correct value back. Can't we leave the redundant truncation and sign extension to be cleaned up by other ops instead?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's more complicated in that it's hard to see what is best here. That is, correctness is easy but we'd need to count how many gets are signed and how many are unsigned, to know which model is more efficient. So it is simplest to do the simple thing (fix up reads in one place), and leave optimizing that further to another pass.

(Atm nothing optimizes this further, but we could do it - basically count the sign-ext operations on local.gets etc. and maybe decide to shift from fixups on gets to sets or vice versa. We actually do something similar for linear memory loads and stores, but not locals.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or, to put it another way, this PR simplifies Heap2Local code while also making it more efficient in at least one case (the case where there are many signed reads and many writes: we did the worst possible thing in that case before, which was to do a fixup on both those reads and those writes).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense 👍

value = Bits::makePackedFieldGet(value, field, curr->signed_, wasm);
replaceCurrent(builder.makeSequence(builder.makeDrop(curr->ref), value));
}
};
Expand Down
81 changes: 81 additions & 0 deletions test/lit/passes/cfp.wast
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,87 @@
)
)
)

;; CHECK: (func $test_signed (type $0)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.as_non_null
;; CHECK-NEXT: (struct.new $A_8
;; CHECK-NEXT: (i32.const 305419896)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.shr_s
;; CHECK-NEXT: (i32.shl
;; CHECK-NEXT: (i32.const 305419896)
;; CHECK-NEXT: (i32.const 24)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 24)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.as_non_null
;; CHECK-NEXT: (struct.new $A_16
;; CHECK-NEXT: (i32.const 305419896)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.shr_s
;; CHECK-NEXT: (i32.shl
;; CHECK-NEXT: (i32.const 305419896)
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.as_non_null
;; CHECK-NEXT: (struct.new $B_16
;; CHECK-NEXT: (global.get $g)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.shr_s
;; CHECK-NEXT: (i32.shl
;; CHECK-NEXT: (global.get $g)
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 16)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $test_signed
;; As above, but with signed gets.
(drop
(struct.get_s $A_8 0
(struct.new $A_8
(i32.const 0x12345678)
)
)
)
(drop
(struct.get_s $A_16 0
(struct.new $A_16
(i32.const 0x12345678)
)
)
)
(drop
(struct.get_s $B_16 0
(struct.new $B_16
(global.get $g)
)
)
)
)
)

(module
Expand Down
45 changes: 20 additions & 25 deletions test/lit/passes/heap2local.wast
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,7 @@
;; CHECK-NEXT: (i32.const 1338)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $1
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: (i32.const 255)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $3)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $2
;; CHECK-NEXT: (local.get $4)
Expand All @@ -207,18 +204,18 @@
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $1
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.const 99998)
;; CHECK-NEXT: (i32.const 255)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 99998)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (local.get $1)
;; CHECK-NEXT: (i32.const 255)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
Expand Down Expand Up @@ -265,8 +262,6 @@
;; CHECK-NEXT: )
(func $packed
(local $temp (ref $struct.packed))
;; Packed fields require masking of irrelevant bits, which we apply on the
;; sets.
(local.set $temp
(struct.new $struct.packed
(i32.const 1337)
Expand All @@ -277,12 +272,13 @@
(local.get $temp)
(i32.const 99998)
)
;; Packed fields require masking of irrelevant bits on unsigned gets and
;; sign-extending on signed ones.
(drop
(struct.get $struct.packed 0
(local.get $temp)
)
)
;; Signed gets require us to sign-extend them.
(drop
(struct.get_s $struct.packed 0
(local.get $temp)
Expand All @@ -298,8 +294,7 @@
(local.get $temp)
)
)
;; When using struct.new_default we do not need any masking, as the values
;; written are 0 anyhow.
;; Check we do not add any masking in new_default either.
(local.set $temp
(struct.new_default $struct.packed)
)
Expand Down Expand Up @@ -3463,18 +3458,18 @@
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $2
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.const 1337)
;; CHECK-NEXT: (i32.const 255)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 1337)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (i32.const 255)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (drop
Expand Down Expand Up @@ -3504,13 +3499,13 @@
(i32.const 1)
(i32.const 1337)
)
;; As with structs, we truncate/sign-extend gets of packed fields.
(drop
(array.get $array8
(local.get $temp)
(i32.const 1)
)
)
;; As with structs, a signed get causes us to emit a sign extend.
(drop
(array.get_s $array8
(local.get $temp)
Expand Down Expand Up @@ -3543,17 +3538,17 @@
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.set $2
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (i32.const 1337)
;; CHECK-NEXT: (i32.const 65535)
;; CHECK-NEXT: )
;; CHECK-NEXT: (i32.const 1337)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: (block (result i32)
;; CHECK-NEXT: (drop
;; CHECK-NEXT: (ref.null none)
;; CHECK-NEXT: )
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (i32.and
;; CHECK-NEXT: (local.get $2)
;; CHECK-NEXT: (i32.const 65535)
;; CHECK-NEXT: )
;; CHECK-NEXT: )
;; CHECK-NEXT: )
(func $array16 (result i32)
Expand Down