Skip to content

Commit

Permalink
cmd/compile: wire up Bswap/ReverseBytes intrinsics for loong64
Browse files Browse the repository at this point in the history
Micro-benchmark results on Loongson 3A5000 and 3A6000:

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
               |  bench.old   |              bench.new               |
               |    sec/op    |    sec/op     vs base                |
ReverseBytes     2.0020n ± 0%   0.4040n ± 0%  -79.82% (p=0.000 n=20)
ReverseBytes16   0.8866n ± 1%   0.8007n ± 0%   -9.69% (p=0.000 n=20)
ReverseBytes32   1.2195n ± 0%   0.8007n ± 0%  -34.34% (p=0.000 n=20)
ReverseBytes64   2.0705n ± 0%   0.8008n ± 0%  -61.32% (p=0.000 n=20)
geomean           1.455n        0.6749n       -53.62%

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
               |  bench.old   |              bench.new               |
               |    sec/op    |    sec/op     vs base                |
ReverseBytes     2.8040n ± 0%   0.5205n ± 0%  -81.44% (p=0.000 n=20)
ReverseBytes16   0.7066n ± 0%   0.8011n ± 0%  +13.37% (p=0.000 n=20)
ReverseBytes32   1.5500n ± 0%   0.8010n ± 0%  -48.32% (p=0.000 n=20)
ReverseBytes64   2.7665n ± 0%   0.8010n ± 0%  -71.05% (p=0.000 n=20)
geomean           1.707n        0.7192n       -57.87%

Updates #59120

This patch is a copy of CL 483357.
Co-authored-by: WANG Xuerui <git@xen0n.name>

Change-Id: If355354cd031533df91991fcc3392e5a6c314295
Reviewed-on: https://go-review.googlesource.com/c/go/+/624576
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
  • Loading branch information
sophie-zhao authored and abner-chenc committed Nov 6, 2024
1 parent d98c518 commit d6fb0ab
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/cmd/compile/internal/loong64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpLOONG64CLZV,
ssa.OpLOONG64SQRTD,
ssa.OpLOONG64SQRTF,
ssa.OpLOONG64REVB2H,
ssa.OpLOONG64REVB2W,
ssa.OpLOONG64REVBV,
ssa.OpLOONG64ABSD:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@

(BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
(BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
(Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...)

// math package intrinsics
(Sqrt ...) => (SQRTD ...)
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,10 @@ func init() {
{name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // Count leading (high order) zeroes (returns 0-32)
{name: "CLZV", argLength: 1, reg: gp11, asm: "CLZV"}, // Count leading (high order) zeroes (returns 0-64)

{name: "REVB2H", argLength: 1, reg: gp11, asm: "REVB2H"}, // Swap bytes: 0x11223344 -> 0x22114433 (sign extends to 64 bits)
{name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
{name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211

{name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
{name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
{name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
Expand Down
42 changes: 42 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteLOONG64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion src/cmd/compile/internal/ssagen/intrinsics.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
},
all...)

brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.S390X}
brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X}
if cfg.goppc64 >= 10 {
// Use only on Power10 as the new byte reverse instructions that Power10 provide
// make it worthwhile as an intrinsic
Expand Down Expand Up @@ -804,6 +804,11 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
sys.S390X)
alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
addF("math/bits", "ReverseBytes16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
},
sys.Loong64)
// ReverseBytes inlines correctly, no need to intrinsify it.
// Nothing special is needed for targets where ReverseBytes16 lowers to a rotate
// On Power10, 16-bit rotate is not available so use BRH instruction
Expand Down
5 changes: 5 additions & 0 deletions src/cmd/compile/internal/ssagen/intrinsics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"loong64", "internal/runtime/math", "Add64"}: struct{}{},
{"loong64", "internal/runtime/math", "Mul64"}: struct{}{},
{"loong64", "internal/runtime/math", "MulUintptr"}: struct{}{},
{"loong64", "internal/runtime/sys", "Bswap32"}: struct{}{},
{"loong64", "internal/runtime/sys", "Bswap64"}: struct{}{},
{"loong64", "internal/runtime/sys", "GetCallerPC"}: struct{}{},
{"loong64", "internal/runtime/sys", "GetCallerSP"}: struct{}{},
{"loong64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{},
Expand All @@ -411,6 +413,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"loong64", "math/bits", "RotateLeft"}: struct{}{},
{"loong64", "math/bits", "RotateLeft32"}: struct{}{},
{"loong64", "math/bits", "RotateLeft64"}: struct{}{},
{"loong64", "math/bits", "ReverseBytes16"}: struct{}{},
{"loong64", "math/bits", "ReverseBytes32"}: struct{}{},
{"loong64", "math/bits", "ReverseBytes64"}: struct{}{},
{"loong64", "math/bits", "Sub"}: struct{}{},
{"loong64", "math/bits", "Sub64"}: struct{}{},
{"loong64", "runtime", "KeepAlive"}: struct{}{},
Expand Down
4 changes: 4 additions & 0 deletions test/codegen/mathbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ func ReverseBytes(n uint) uint {
// 386:"BSWAPL"
// s390x:"MOVDBR"
// arm64:"REV"
// loong64:"REVBV"
return bits.ReverseBytes(n)
}

Expand All @@ -217,6 +218,7 @@ func ReverseBytes64(n uint64) uint64 {
// s390x:"MOVDBR"
// arm64:"REV"
// ppc64x/power10: "BRD"
// loong64:"REVBV"
return bits.ReverseBytes64(n)
}

Expand All @@ -225,6 +227,7 @@ func ReverseBytes32(n uint32) uint32 {
// 386:"BSWAPL"
// s390x:"MOVWBR"
// arm64:"REVW"
// loong64:"REVB2W"
// ppc64x/power10: "BRW"
return bits.ReverseBytes32(n)
}
Expand All @@ -235,6 +238,7 @@ func ReverseBytes16(n uint16) uint16 {
// arm/5:"SLL","SRL","ORR"
// arm/6:"REV16"
// arm/7:"REV16"
// loong64:"REVB2H"
// ppc64x/power10: "BRH"
return bits.ReverseBytes16(n)
}
Expand Down

0 comments on commit d6fb0ab

Please sign in to comment.