Skip to content

Apply fixes from Simple project #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static boolean run(int round, RegAlloc alloc) {
}

// MultiNodes have projections which set registers
if( n instanceof MultiNode )
if( n instanceof MultiNode && !(n instanceof CFGNode) )
for( Node proj : n.outs() )
if( proj instanceof MachNode )
defLRG(alloc,proj);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,24 @@ public int getUID() {
public int getALIAS() { return _alias++; }


// idepths are cached and valid until *inserting* CFG edges (deleting is
// OK). This happens with inlining, which bumps the version to bulk
// invalidate the idepth caches.
private int _iDepthVersion = 0;
public void invalidateIDepthCaches() { _iDepthVersion++; }
public boolean validIDepth(int idepth) {
if( idepth==0 ) return false;
if( _iDepthVersion==0 ) return true;
return (idepth%100)==_iDepthVersion;
}
public int iDepthAt(int idepth) {
return 100*idepth+_iDepthVersion;
}
public int iDepthFrom(int idepth) {
assert idepth==0 || validIDepth(idepth);
return idepth+100;
}

// Popular visit bitset, declared here, so it gets reused all over
public final BitSet _visit = new BitSet();
public BitSet visit() { assert _visit.isEmpty(); return _visit; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,6 @@ private void writeEncodings() {
if( !(bb instanceof MachNode mach0) )
_opStart[bb._nid] = _bits.size();
else if( bb instanceof FunNode fun ) {
padN(16,_bits);
_fun = fun; // Currently encoding function
_opStart[bb._nid] = _bits.size();
mach0.encoding( this );
Expand All @@ -337,7 +336,6 @@ else if( bb instanceof FunNode fun ) {
}
}
}
padN(16,_bits);
}

// --------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ private static void breadth(Node stop, Node[] ns, CFGNode[] late) {
!(memuse instanceof NewNode) &&
// Load-use directly defines memory
(memuse._type instanceof SONTypeMem ||
// Load-use directly defines memory
memuse instanceof CallNode ||
// Load-use indirectly defines memory
(memuse._type instanceof SONTypeTuple tt && tt._types[ld._alias] instanceof SONTypeMem)) )
continue outer;
Expand Down Expand Up @@ -200,6 +202,33 @@ private static void _doSchedLate(Node n, Node[] ns, CFGNode[] late) {
if( n instanceof MemOpNode load && load._isLoad )
lca = find_anti_dep(lca,load,early,late);


// Nodes setting a single register and getting killed will stay close
// to the uses, since they will be forced to spill anyway. The kill
// check is very weak, and some may be hoisted only to spill in the RA.
if( n instanceof MachNode mach ) {
RegMask out = mach.outregmap();
if( out!=null && out.size1() ) {
int reg = mach.outregmap().firstReg();
// Look for kills
outer:
for( CFGNode lca2=lca; lca2 != early; lca2 = lca2.idom() ) {
if( lca2 instanceof MachNode mach2 ) {
for( int i=1; i<lca2.nIns(); i++ ) {
RegMask mask = mach2.regmap(i);
if( mask!=null && mask.test(reg) ) {
early = lca2 instanceof IfNode ? lca2.idom() : lca2;
break outer;
}
}
RegMask kill = mach2.killmap();
if( kill != null )
throw Utils.TODO();
}
}
}
}

// Walk up from the LCA to the early, looking for best place. This is
// the lowest execution frequency, approximated by least loop depth and
// deepest control flow.
Expand Down Expand Up @@ -231,8 +260,9 @@ private static CFGNode use_block(Node n, Node use, CFGNode[] late) {
// Least loop depth first, then largest idepth
private static boolean better( CFGNode lca, CFGNode best ) {
return lca.loopDepth() < best.loopDepth() ||
lca instanceof NeverNode ||
(lca.idepth() > best.idepth() || best instanceof IfNode);
lca instanceof NeverNode ||
lca.idepth() > best.idepth() ||
best instanceof IfNode;
}

private static CFGNode find_anti_dep(CFGNode lca, MemOpNode load, CFGNode early, CFGNode[] late) {
Expand All @@ -249,9 +279,9 @@ private static CFGNode find_anti_dep(CFGNode lca, MemOpNode load, CFGNode early,
lca = anti_dep( load, late[mem._nid], mem.cfg0(), lca, st );
}
break; // Loads do not cause anti-deps on other loads
case CallNode st:
assert late[st._nid]!=null;
lca = anti_dep(load,late[st._nid],st.cfg0(),lca,st);
case CallNode call:
assert late[call._nid]!=null;
lca = anti_dep(load,late[call._nid],call.cfg0(),lca,call);
break;
case PhiNode phi:
// Repeat anti-dep for matching Phi inputs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,14 +241,12 @@ private static void selfConflict(RegAlloc alloc, Node n, LRG lrg, Node prior) {
private static void mergeLiveOut( RegAlloc alloc, CFGNode priorbb, int i ) {
CFGNode bb = priorbb.cfg(i);
if( bb == null ) return; // Start has no prior
if( !bb.blockHead() ) bb = bb.cfg0();
//if( i==0 && !(bb instanceof StartNode) ) bb = bb.cfg0();
assert bb.blockHead();
while( !bb.blockHead() ) bb = bb.cfg0();

// Lazy get live-out set for bb
IdentityHashMap<LRG, Node> lrgs = BBOUTS.computeIfAbsent( bb, k -> new IdentityHashMap<>() );
IdentityHashMap<LRG, Node> lrgs = BBOUTS.computeIfAbsent( bb, k -> new IdentityHashMap<>() );

for( LRG lrg : TMP.keySet() ) {
for( LRG lrg : TMP.keySet() ) {
Node def = TMP.get(lrg);
// Effective def comes from phi input from prior block
if( def instanceof PhiNode phi && phi.cfg0()==priorbb ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ abstract public class Machine {
public abstract long callerSave();
// List of never-save registers, e.g. RSP or a ZERO register if you have one
public abstract long neverSave();
// Call Argument Mask
public abstract RegMask callArgMask(SONTypeFunPtr tfp, int arg);
// Call Argument Mask. Passed in the function signature and argument
// number (2-based; 0 is for control and 1 for memory). Also passed in a 0
// for the function itself, or for *outgoing* calls, the maximum stack slot
// given to the incoming function arguments (stack slots reserved for
// incoming arguments).
public abstract RegMask callArgMask(SONTypeFunPtr tfp, int arg, int maxArgSlot);
// Return register mask, based on signature (GPR vs FPR)
public abstract RegMask retMask(SONTypeFunPtr tfp);
// Return PC register
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ boolean splitEmptyMaskSimple( byte round, LRG lrg ) {
}

// Single-def live range with an empty mask. There are many single-reg
// uses. Theory is there's many repeats if the same reg amongst the uses.
// uses. Theory is there's many repeats of the same reg amongst the uses.
// In of splitting once per use, start by splitting into groups based on
// required input register.
boolean splitEmptyMaskByUse( byte round, LRG lrg ) {
Expand All @@ -270,35 +270,45 @@ boolean splitEmptyMaskByUse( byte round, LRG lrg ) {
// Look at each use, and break into non-overlapping register classes.
Ary<RegMask> rclass = new Ary<>(RegMask.class);
boolean done=false;
int ncalls=0;
while( !done ) {
done = true;
for( Node use : def._outputs )
if( use instanceof MachNode mach )
if( use instanceof MachNode mach ) {
if( mach instanceof CallNode ) ncalls++;
for( int i=1; i<use.nIns(); i++ )
if( use.in(i)==def )
done = putIntoRegClass( rclass, mach.regmap(i) );
}
}

// See how many register classes we split into
if( rclass._len <= 1 ) return false;
// See how many register classes we split into. Generally not
// productive to split like this across calls, which are going to kill
// all registers anyways.
if( rclass._len <= 1 || ncalls > 1 ) return false;

// Split by class
// Split by classh
Ary<Node> ns = new Ary<>(Node.class);
for( RegMask rmask : rclass ) {
ns.addAll(def._outputs);
Node split = makeSplit(def,"popular",round,lrg);
split.insertAfter( def );
if( split.nIns()>1 ) split.setDef(1,def);
// all uses by class to split
for( int j=0; j < def._outputs._len; j++ ) {
Node use = def._outputs.at(j);
for( Node use : ns ) {
if( use instanceof MachNode mach && use!=split ) {
// Check all use inputs for n, in case there's several
for( int i = 1; i < use.nIns(); i++ )
// Find a def input, and check register class
if( use.in( i ) == def && mach.regmap( i ).overlap( rmask ) )
// Modify use to use the split version specialized to this rclass
{ use.setDef( i, split ); j--; break; }
if( use.in( i ) == def ) {
RegMask m = mach.regmap( i );
if( m!=null && mach.regmap( i ).overlap( rmask ) )
// Modify use to use the split version specialized to this rclass
use.setDefOrdered( i, split );
}
}
}
ns.clear();
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
import com.compilerprogramming.ezlang.compiler.SB;

/** RegMask
* A "register mask" - 1 bit set for each allowed register. In addition
* A "register mask" - 1 bit set for each allowed register. In addition,
* "stack slot" registers may be allowed, effectively making the set infinite.
* <p>
* For smaller and simpler machines it suffices to make such masks an i64 or
* i128 (64 or 128 bit integers), and this presentation is by far the better
* i128 (64- or 128-bit integers), and this presentation is by far the better
* way to go... if all register allocations can fit in this bit limitation.
* The allocator will need bits for stack-based parameters and for splits
* which cannot get a register. For a 32-register machine like the X86, add 1
Expand All @@ -28,7 +28,7 @@ public class RegMask {

public RegMask(int bit) {
if( bit < 64 ) _bits0 = 1L<<bit;
else _bits1 = 1L<<(bit=64);
else _bits1 = 1L<<(bit-64);
}
public RegMask(long bits ) { _bits0 = bits; }
public RegMask(long bits0, long bits1 ) { _bits0 = bits0; _bits1 = bits1; }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.compilerprogramming.ezlang.compiler.nodes;

import com.compilerprogramming.ezlang.compiler.Compiler;
import com.compilerprogramming.ezlang.compiler.codegen.CodeGen;
import com.compilerprogramming.ezlang.compiler.sontypes.*;
import java.util.BitSet;

Expand Down Expand Up @@ -68,7 +69,9 @@ public CFGNode uctrlSkipEmpty() {
* See {@link <a href="https://en.wikipedia.org/wiki/Dominator_(graph_theory)">...</a>}
*/
public int _idepth;
public int idepth() { return _idepth==0 ? (_idepth=idom().idepth()+1) : _idepth; }
public int idepth() {
return CodeGen.CODE.validIDepth(_idepth) ? _idepth : (_idepth=CodeGen.CODE.iDepthFrom(idom().idepth()));
}

// Return the immediate dominator of this Node and compute dom tree depth.
public CFGNode idom(Node dep) { return cfg(0); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Node idealize() {

// Trivial inlining: call site calls a single function; single function
// is only called by this call site.
if( !_folding && nIns()==2 && in(0) instanceof CallNode call ) {
if( false && !_folding && nIns()==2 && in(0) instanceof CallNode call ) {
Node fptr = call.fptr();
if( fptr.nOuts() == 1 && // Only user is this call
fptr instanceof ConstantNode && // We have an immediate call
Expand All @@ -74,8 +74,9 @@ public Node idealize() {
assert fun.in(1) instanceof StartNode && fun.in(2)==call;
// Disallow self-recursive inlining (loop unrolling by another name)
CFGNode idom = call;
while( !(idom instanceof FunNode fun2) )
while( !(idom instanceof FunNode) )
idom = idom.idom();
// Inline?
if( idom != fun ) {
// Trivial inline: rewrite
_folding = true;
Expand All @@ -85,6 +86,9 @@ public Node idealize() {
fun.setDef(2,call.ctrl()); // Bypass the Call;
fun.ret().setDef(3,null); // Return is folding also
CodeGen.CODE.addAll(fun._outputs);
// Inlining immediately blows all cache idepth fields past the inline point.
// Bump the global version number invalidating them en-masse.
CodeGen.CODE.invalidateIDepthCaches();
return this;
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public void unlink_all() {
assert linked(fun);
int idx = fun._inputs.find(this);
for( Node use : fun._outputs )
if( use instanceof ParmNode parm )
if( use instanceof ParmNode )
use.delDef(idx);
fun.delDef(idx);
cend().delDef(cend()._inputs.find(fun.ret()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public Node idealize() {

// Upgrade inferred or user-written return type to actual
if( _ret!=null && _ret._type instanceof SONTypeTuple tt && tt.ret() != _sig.ret() )
// FIXME Dibyendu
//throw Utils.TODO();
return null;

Expand All @@ -113,8 +114,11 @@ public Node idealize() {
return null;
}

// Bypass Region idom, always assume depth == 1, one more than Start
@Override public int idepth() { return (_idepth=1); }
// Bypass Region idom, always assume depth == 1, one more than Start,
// unless folding then just a ID on input#1
@Override public int idepth() {
return _folding ? super.idepth() : CodeGen.CODE.iDepthAt(1);
}
// Bypass Region idom, always assume idom is Start
@Override public CFGNode idom(Node dep) { return cfg(1); }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ public SONType compute() {
}

// Bypass Region idom, same as the default idom() using use in(1) instead of in(0)
@Override public int idepth() { return _idepth==0 ? (_idepth=idom().idepth()+1) : _idepth; }
public int idepth() {
return CodeGen.CODE.validIDepth(_idepth) ? _idepth : (_idepth=CodeGen.CODE.iDepthFrom(idom().idepth()));
}
// Bypass Region idom, same as the default idom() using use in(1) instead of in(0)
@Override public CFGNode idom(Node dep) { return entry(); }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ public SONTypeTuple compute() {
private RegMask _retMask;
private RegMask _kills;
public void cacheRegs(CodeGen code) {
_arg2Reg = code._mach.callArgMask(SONTypeFunPtr.CALLOC,2).firstReg();
_arg3Mask = code._mach.callArgMask(SONTypeFunPtr.CALLOC,3);
_arg2Reg = code._mach.callArgMask(SONTypeFunPtr.CALLOC,2,0).firstReg();
_arg3Mask = code._mach.callArgMask(SONTypeFunPtr.CALLOC,3,0);
// Return mask depends on TFP (either GPR or FPR)
_retMask = code._mach.retMask(SONTypeFunPtr.CALLOC);
// Kill mask is all caller-saves, and any mirror stack slots for args
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class PhiNode extends Node {
final SONType _declaredType;

public PhiNode(String label, SONType declaredType, Node... inputs) { super(inputs); _label = label; assert declaredType!=null; _declaredType = declaredType; }
public PhiNode(PhiNode phi, String label, SONType declaredType) { super(phi); _label = label; _declaredType = declaredType; }
public PhiNode(PhiNode phi, String label, SONType declaredType) { super(phi); _label = label; _type = _declaredType = declaredType; }
public PhiNode(PhiNode phi) { super(phi); _label = phi._label; _declaredType = phi._declaredType; }

public PhiNode(RegionNode r, Node sample) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,13 @@ boolean hasMidUser(RegionNode r) {

// Immediate dominator of Region is a little more complicated.
@Override public int idepth() {
if( _idepth!=0 ) return _idepth;
if( CodeGen.CODE.validIDepth(_idepth) )
return _idepth;
int d=0;
for( Node n : _inputs )
if( n!=null )
d = Math.max(d,((CFGNode)n).idepth()+1);
return _idepth=d;
d = Math.max(d,CodeGen.CODE.iDepthFrom(((CFGNode)n).idepth()));
return _idepth = d;
}

@Override public CFGNode idom(Node dep) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ public SONType compute() {
if( inProgress () ) return null;
if( _fun.isDead() ) return null;

// // Upgrade signature based on return type
// Upgrade signature based on return type
// FIXME Dibyendu - EZ lang does not support modifying function signature
// but we should probably do the checking?
// SONType ret = expr()._type;
// SONTypeFunPtr fcn = _fun.sig();
// assert ret.isa(fcn.ret());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public StringBuilder _print1(StringBuilder sb, BitSet visited) {
@Override public Node idealize() { return null; }

// No immediate dominator, and idepth==0
@Override public int idepth() { return 0; }
@Override public int idepth() { return CodeGen.CODE.iDepthAt(0); }
@Override public CFGNode idom(Node dep) { return null; }

}
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ public Node idealize() {
}
}

// Store of zero after alloc
if( mem() instanceof ProjNode prj && prj.in(0) instanceof NewNode &&
prj.in(0)==ptr().in(0) && // Same NewNode memory & pointer
(val()._type==SONTypeInteger.ZERO || val()._type==SONType.NIL ) )
return mem();

return null;
}
Expand Down
Loading