Skip to content

Commit 478d864

Browse files
Jorropogopherbot
authored andcommitted
cmd/compile: on amd64 use 32bits copies for 64bits copies of 32bits values
Fixes golang#76449 This saves a single byte for the REX prefix per OpCopy it triggers on. Change-Id: I1eab364d07354555ba2f23ffd2f9c522d4a04bd0 Reviewed-on: https://go-review.googlesource.com/c/go/+/731640 Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Carlos Amedee <carlos@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Jorropo <jorropo.pgm@gmail.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 8f73916 commit 478d864

File tree

7 files changed

+51
-23
lines changed

7 files changed

+51
-23
lines changed

src/cmd/compile/internal/amd64/ssa.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
4343
}
4444
}
4545

46+
func isGPReg(r int16) bool {
47+
return x86.REG_AL <= r && r <= x86.REG_R15
48+
}
49+
4650
func isFPReg(r int16) bool {
4751
return x86.REG_X0 <= r && r <= x86.REG_Z31
4852
}
@@ -1225,14 +1229,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
12251229
if v.Type.IsMemory() {
12261230
return
12271231
}
1228-
x := v.Args[0].Reg()
1232+
arg := v.Args[0]
1233+
x := arg.Reg()
12291234
y := v.Reg()
12301235
if v.Type.IsSIMD() {
1231-
x = simdOrMaskReg(v.Args[0])
1236+
x = simdOrMaskReg(arg)
12321237
y = simdOrMaskReg(v)
12331238
}
12341239
if x != y {
1235-
opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
1240+
width := v.Type.Size()
1241+
if width == 8 && isGPReg(y) && ssa.ZeroUpper32Bits(arg, 3) {
1242+
// The source was naturally zext-ed from 32 to 64 bits,
1243+
// but we are asked to do a full 64-bit copy.
1244+
// Save the REX prefix byte in I-CACHE by using a 32-bit move,
1245+
// since it zeroes the upper 32 bits anyway.
1246+
width = 4
1247+
}
1248+
opregreg(s, moveByRegsWidth(y, x, width), y, x)
12361249
}
12371250
case ssa.OpLoadReg:
12381251
if v.Type.IsFlags() {

src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@
88
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
99

1010
// See comments in ARM64latelower.rules for why these are here.
11-
(MOVLQZX x) && zeroUpper32Bits(x,3) => x
12-
(MOVWQZX x) && zeroUpper48Bits(x,3) => x
13-
(MOVBQZX x) && zeroUpper56Bits(x,3) => x
11+
(MOVLQZX x) && ZeroUpper32Bits(x,3) => x
12+
(MOVWQZX x) && ZeroUpper48Bits(x,3) => x
13+
(MOVBQZX x) && ZeroUpper56Bits(x,3) => x

src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x
3030

3131
// omit unsigned extension
32-
(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
32+
(MOVWUreg x) && ZeroUpper32Bits(x, 3) => x
3333

3434
// don't extend after proper load
3535
(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)

src/cmd/compile/internal/ssa/rewrite.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,7 +1351,7 @@ func overlap(offset1, size1, offset2, size2 int64) bool {
13511351
// check if value zeroes out upper 32-bit of 64-bit register.
13521352
// depth limits recursion depth. In AMD64.rules 3 is used as limit,
13531353
// because it catches same amount of cases as 4.
1354-
func zeroUpper32Bits(x *Value, depth int) bool {
1354+
func ZeroUpper32Bits(x *Value, depth int) bool {
13551355
if x.Type.IsSigned() && x.Type.Size() < 8 {
13561356
// If the value is signed, it might get re-sign-extended
13571357
// during spill and restore. See issue 68227.
@@ -1368,6 +1368,8 @@ func zeroUpper32Bits(x *Value, depth int) bool {
13681368
OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
13691369
OpAMD64SHLL, OpAMD64SHLLconst:
13701370
return true
1371+
case OpAMD64MOVQconst:
1372+
return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
13711373
case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
13721374
OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
13731375
OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
@@ -1383,7 +1385,7 @@ func zeroUpper32Bits(x *Value, depth int) bool {
13831385
return false
13841386
}
13851387
for i := range x.Args {
1386-
if !zeroUpper32Bits(x.Args[i], depth-1) {
1388+
if !ZeroUpper32Bits(x.Args[i], depth-1) {
13871389
return false
13881390
}
13891391
}
@@ -1393,14 +1395,16 @@ func zeroUpper32Bits(x *Value, depth int) bool {
13931395
return false
13941396
}
13951397

1396-
// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
1397-
func zeroUpper48Bits(x *Value, depth int) bool {
1398+
// ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
1399+
func ZeroUpper48Bits(x *Value, depth int) bool {
13981400
if x.Type.IsSigned() && x.Type.Size() < 8 {
13991401
return false
14001402
}
14011403
switch x.Op {
14021404
case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
14031405
return true
1406+
case OpAMD64MOVQconst, OpAMD64MOVLconst:
1407+
return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
14041408
case OpArg: // note: but not ArgIntReg
14051409
return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
14061410
case OpPhi, OpSelect0, OpSelect1:
@@ -1410,7 +1414,7 @@ func zeroUpper48Bits(x *Value, depth int) bool {
14101414
return false
14111415
}
14121416
for i := range x.Args {
1413-
if !zeroUpper48Bits(x.Args[i], depth-1) {
1417+
if !ZeroUpper48Bits(x.Args[i], depth-1) {
14141418
return false
14151419
}
14161420
}
@@ -1420,14 +1424,16 @@ func zeroUpper48Bits(x *Value, depth int) bool {
14201424
return false
14211425
}
14221426

1423-
// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
1424-
func zeroUpper56Bits(x *Value, depth int) bool {
1427+
// ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
1428+
func ZeroUpper56Bits(x *Value, depth int) bool {
14251429
if x.Type.IsSigned() && x.Type.Size() < 8 {
14261430
return false
14271431
}
14281432
switch x.Op {
14291433
case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
14301434
return true
1435+
case OpAMD64MOVQconst, OpAMD64MOVLconst:
1436+
return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
14311437
case OpArg: // note: but not ArgIntReg
14321438
return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
14331439
case OpPhi, OpSelect0, OpSelect1:
@@ -1437,7 +1443,7 @@ func zeroUpper56Bits(x *Value, depth int) bool {
14371443
return false
14381444
}
14391445
for i := range x.Args {
1440-
if !zeroUpper56Bits(x.Args[i], depth-1) {
1446+
if !ZeroUpper56Bits(x.Args[i], depth-1) {
14411447
return false
14421448
}
14431449
}

src/cmd/compile/internal/ssa/rewriteAMD64latelower.go

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteARM64latelower.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/codegen/constants.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,12 @@ func contiguousMaskConstants() (out [64]uint64) {
3333
out[3] = 0xFFFFFFFE00000001
3434
return
3535
}
36+
37+
func issue76449_1() (_, _, _ uint64) {
38+
// amd64:-"MOVQ"
39+
return 0, 0, 0
40+
}
41+
func issue76449_2() (_, _, _ uint64) {
42+
// amd64:-"MOVQ"
43+
return 1, 2, 1
44+
}

0 commit comments

Comments
 (0)