@@ -63,56 +63,88 @@ void BinaryInstWriter::visitLoop(Loop* curr) {
6363}
6464
6565void BinaryInstWriter::visitBreak (Break* curr) {
66+ auto type = curr->type ;
67+
68+ // See comment on |brIfsNeedingHandling| for the extra handling we need to
69+ // emit here for certain br_ifs. If we need that handling, we either use a
70+ // cast in simple cases, or scratch locals otherwise. We use the scratch
71+ // locals to stash the stack before the br_if (which contains the refined
72+ // types), then restore it later from those locals.
73+ bool needScratchLocals = false ;
74+ // If we need locals, we must track how many we've used from each type as we
75+ // go, as a type might appear multiple times in the tuple. We know we have
76+ // enough of a range allocated for them, so we just increment as we go.
77+ std::unordered_map<Type, Index> scratchTypeUses;
78+ // Logic to stash and restore the stack, given a vector of types we are
79+ // stashing/restoring. We will first stash the entire stack, including the i32
80+ // condition, and after the br_if, restore the value (without the condition).
81+ auto stashStack = [&](const std::vector<Type>& types) {
82+ for (Index i = 0 ; i < types.size (); i++) {
83+ auto t = types[types.size () - i - 1 ];
84+ assert (scratchLocals.find (t) != scratchLocals.end ());
85+ auto localIndex = scratchLocals[t] + scratchTypeUses[t]++;
86+ o << int8_t (BinaryConsts::LocalSet) << U32LEB (localIndex);
87+ }
88+ };
89+ auto restoreStack = [&](const std::vector<Type>& types) {
90+ // Use a copy of this data, as we will restore twice.
91+ auto currScratchTypeUses = scratchTypeUses;
92+ for (Index i = 0 ; i < types.size (); i++) {
93+ auto t = types[i];
94+ auto localIndex = scratchLocals[t] + --currScratchTypeUses[t];
95+ o << int8_t (BinaryConsts::LocalGet) << U32LEB (localIndex);
96+ }
97+ };
98+
99+ // The types on the stack before the br_if. We need this if we use locals to
100+ // stash the stack.
101+ std::vector<Type> typesOnStack;
102+
103+ auto needHandling = brIfsNeedingHandling.count (curr);
104+ if (needHandling) {
105+ // Tuples always need scratch locals. Uncastable types do as well, we we
106+ // can't fix them up below with a simple cast.
107+ needScratchLocals = type.isTuple () || !type.isCastable ();
108+ if (needScratchLocals) {
109+ // Stash all the values on the stack to those locals, then reload them for
110+ // the br_if to consume. Later, we can reload the refined values after the
111+ // br_if, for its parent to consume.
112+
113+ typesOnStack = std::vector<Type>(type.begin (), type.end ());
114+ typesOnStack.push_back (Type::i32 );
115+
116+ stashStack (typesOnStack);
117+ restoreStack (typesOnStack);
118+ // The stack is now in the same state as before, but we have copies in
119+ // locals for later.
120+ }
121+ }
122+
66123 o << int8_t (curr->condition ? BinaryConsts::BrIf : BinaryConsts::Br)
67124 << U32LEB (getBreakIndex (curr->name ));
68125
69- // See comment on |brIfsNeedingHandling| for the extra casts we need to emit
70- // here for certain br_ifs.
71- auto iter = brIfsNeedingHandling.find (curr);
72- if (iter != brIfsNeedingHandling.end ()) {
73- auto unrefinedType = iter->second ;
74- auto type = curr->type ;
75- assert (type.size () == unrefinedType.size ());
126+ if (needHandling) {
127+ if (!needScratchLocals) {
128+ // We can just cast here, avoiding scratch locals. (Casting adds overhead,
129+ // but this is very rare, and it avoids adding locals, which would keep
130+ // growing the wasm with each roundtrip.)
76131
77- assert (curr->type .hasRef ());
78-
79- auto emitCast = [&](Type to) {
80132 // Shim a tiny bit of IR, just enough to get visitRefCast to see what we
81133 // are casting, and to emit the proper thing.
82134 RefCast cast;
83- cast.type = to ;
135+ cast.type = type ;
84136 cast.ref = cast.desc = nullptr ;
85137 visitRefCast (&cast);
86- };
87-
88- if (!type.isTuple ()) {
89- // Simple: Just emit a cast, and then the type matches Binaryen IR's.
90- emitCast (type);
91138 } else {
92- // Tuples are trickier to handle, and we need to use scratch locals. Stash
93- // all the values on the stack to those locals, then reload them, casting
94- // as we go.
95- //
96- // We must track how many scratch locals we've used from each type as we
97- // go, as a type might appear multiple times in the tuple. We allocated
98- // enough for each, in a contiguous range, so we just increment as we go.
99- std::unordered_map<Type, Index> scratchTypeUses;
100- for (Index i = 0 ; i < unrefinedType.size (); i++) {
101- auto t = unrefinedType[unrefinedType.size () - i - 1 ];
102- assert (scratchLocals.find (t) != scratchLocals.end ());
103- auto localIndex = scratchLocals[t] + scratchTypeUses[t]++;
104- o << int8_t (BinaryConsts::LocalSet) << U32LEB (localIndex);
105- }
106- for (Index i = 0 ; i < unrefinedType.size (); i++) {
107- auto t = unrefinedType[i];
108- auto localIndex = scratchLocals[t] + --scratchTypeUses[t];
109- o << int8_t (BinaryConsts::LocalGet) << U32LEB (localIndex);
110- if (t.isRef ()) {
111- // Note that we cast all types here, when perhaps only some of the
112- // tuple's lanes need that. This is simpler.
113- emitCast (type[i]);
114- }
139+ // We need locals. Earlier we stashed the stack, so we just need to
140+ // restore the value from there (note we don't restore the condition),
141+ // after dropping the br_if's unrefined values.
142+ for (Index i = 0 ; i < type.size (); ++i) {
143+ o << int8_t (BinaryConsts::Drop);
115144 }
145+ assert (typesOnStack.back () == Type::i32 );
146+ typesOnStack.pop_back ();
147+ restoreStack (typesOnStack);
116148 }
117149 }
118150}
@@ -3094,8 +3126,9 @@ InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() {
30943126 : writer(writer), finder(finder) {}
30953127
30963128 void visitBreak (Break* curr) {
3129+ auto type = curr->type ;
30973130 // See if this is one of the dangerous br_ifs we must handle.
3098- if (!curr-> type .hasRef ()) {
3131+ if (!type.hasRef ()) {
30993132 // Not even a reference.
31003133 return ;
31013134 }
@@ -3106,7 +3139,7 @@ InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() {
31063139 return ;
31073140 }
31083141 if (auto * cast = parent->dynCast <RefCast>()) {
3109- if (Type::isSubType (cast->type , curr-> type )) {
3142+ if (Type::isSubType (cast->type , type)) {
31103143 // It is cast to the same type or a better one. In particular this
31113144 // handles the case of repeated roundtripping: After the first
31123145 // roundtrip we emit a cast that we'll identify here, and not emit
@@ -3117,23 +3150,30 @@ InsertOrderedMap<Type, Index> BinaryInstWriter::countScratchLocals() {
31173150 }
31183151 auto * breakTarget = findBreakTarget (curr->name );
31193152 auto unrefinedType = breakTarget->type ;
3120- if (unrefinedType == curr-> type ) {
3153+ if (unrefinedType == type) {
31213154 // It has the proper type anyhow.
31223155 return ;
31233156 }
31243157
31253158 // Mark the br_if as needing handling, and add the type to the set of
31263159 // types we need scratch tuple locals for (if relevant).
3127- writer.brIfsNeedingHandling [curr] = unrefinedType;
3128-
3129- if (unrefinedType.isTuple ()) {
3130- // We must allocate enough scratch locals for this tuple. Note that we
3131- // may need more than one per type in the tuple, if a type appears more
3132- // than once, so we count their appearances.
3160+ writer.brIfsNeedingHandling .insert (curr);
3161+
3162+ // Simple cases can be handled by a cast. However, tuples and uncastable
3163+ // types require us to use locals too.
3164+ if (type.isTuple () || !type.isCastable ()) {
3165+ // We must allocate enough scratch locals for this tuple, plus the i32
3166+ // of the condition, as we will stash it all so that we can restore the
3167+ // fully refined value after the br_if.
3168+ //
3169+ // Note that we may need more than one per type in the tuple, if a type
3170+ // appears more than once, so we count their appearances.
31333171 InsertOrderedMap<Type, Index> scratchTypeUses;
3134- for (auto t : unrefinedType ) {
3172+ for (auto t : type ) {
31353173 scratchTypeUses[t]++;
31363174 }
3175+ // The condition.
3176+ scratchTypeUses[Type::i32 ]++;
31373177 for (auto & [type, uses] : scratchTypeUses) {
31383178 auto & count = finder.scratches [type];
31393179 count = std::max (count, uses);
0 commit comments