-
Notifications
You must be signed in to change notification settings - Fork 5.2k
[RISC-V] Simplify codegen for GT_LEA #119634
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
| BlockRange().InsertBefore(addrMode, base); | ||
| addrMode->SetBase(base); | ||
| addrMode->SetIndex(nullptr); | ||
| LowerAdd(base->AsOp()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it'd be possible to also match index scale in genCreateAddrMode reusing the code paths for x64 instead of maintaining RISC-V specific TryLowerShiftAddToShxadd. I'll leave it for another PR.
|
@risc-vv /run |
4582da5 is being scheduled for building and testingGIT: Checked-build FAILEDbuildinfo.json |
|
The improvements come from allowing lowering to The few regressions come from allocating additional registers, hitherto codegen used Diffs are based on 121,320 contexts (52,834 MinOpts, 68,486 FullOpts). Overall (-244,398 bytes)
MinOpts (-36 bytes)
FullOpts (-244,362 bytes)
Example diffslinux.riscv64.Checked.1.mch-22 (-10.48%) : 7260.dasm - Test_init_struct:test18(long[,,],int,int) (FullOpts)@@ -7,80 +7,75 @@
; No matching PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T00] ( 9, 9 ) ref -> s1 class-hnd single-def <long[,,]>
+; V00 arg0 [V00,T00] ( 9, 9 ) ref -> a0 class-hnd single-def <long[,,]>
; V01 arg1 [V01,T04] ( 3, 3 ) int -> a1 single-def
; V02 arg2 [V02,T05] ( 3, 3 ) int -> a2 single-def
; V03 loc0 [V03 ] ( 1, 1 ) struct ( 8) [fp-0x08] do-not-enreg[XS] must-init addr-exposed ld-addr-op <Val>
;# V04 OutArgs [V04 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
; V05 tmp1 [V05 ] ( 1, 1 ) long -> [fp-0x08] do-not-enreg[X] addr-exposed "field V03.val (fldOffset=0x0)" P-DEP
-; V06 tmp2 [V06,T01] ( 3, 6 ) int -> a0 "MD array shared temp"
-; V07 tmp3 [V07,T02] ( 3, 6 ) int -> a3 "MD array shared temp"
+; V06 tmp2 [V06,T01] ( 3, 6 ) int -> a1 "MD array shared temp"
+; V07 tmp3 [V07,T02] ( 3, 6 ) int -> a4 "MD array shared temp"
; V08 tmp4 [V08,T03] ( 3, 6 ) int -> a2 "MD array shared temp"
-; V09 cse0 [V09,T06] ( 3, 3 ) int -> a1 "CSE #01: aggressive"
-; V10 cse1 [V10,T07] ( 3, 3 ) int -> a1 "CSE #02: aggressive"
+; V09 cse0 [V09,T06] ( 3, 3 ) int -> a3 "CSE #01: aggressive"
+; V10 cse1 [V10,T07] ( 3, 3 ) int -> a3 "CSE #02: aggressive"
;
-; Lcl frame size = 16
-Frame info. #outsz=0; #framesz=48; lcl=16
+; Lcl frame size = 8
+Frame info. #outsz=0; #framesz=32; lcl=8
G_M34441_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- addi sp, sp, -48
- sd fp, 16(sp)
- sd ra, 24(sp)
- sd s1, 32(sp)
- sd s2, 40(sp)
- addi fp, sp, 16
+ addi sp, sp, -32
+ sd fp, 8(sp)
+ sd ra, 16(sp)
+ sd s1, 24(sp)
+ addi fp, sp, 8
sd zero, -8(fp)
- mv s1, a0
- ; gcrRegs +[s1]
- ;; size=32 bbWeight=1 PerfScore 21.50
-G_M34441_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
- lw a0, 0xD1FFAB1E(s1)
- subw a0, a1, a0
- lw a1, 0xD1FFAB1E(s1)
- sext.w a3, a1
- sext.w a4, a0
- bgeu a4, a3, G_M34441_IG04
- lw a1, 0xD1FFAB1E(s1)
- mulw a0, a0, a1
- lw a3, 0xD1FFAB1E(s1)
- addi a4, zero, 0xD1FFAB1E
- subw a3, a4, a3
- sext.w a4, a1
- sext.w a5, a3
+ ;; size=24 bbWeight=1 PerfScore 17.00
+G_M34441_IG02: ; bbWeight=1, gcrefRegs=0400 {a0}, byrefRegs=0000 {}, byref
+ ; gcrRegs +[a0]
+ lw a3, 0xD1FFAB1E(a0)
+ subw a1, a1, a3
+ lw a3, 0xD1FFAB1E(a0)
+ sext.w a4, a3
+ sext.w a5, a1
bgeu a5, a4, G_M34441_IG04
- addw a0, a0, a3
- lw a1, 0xD1FFAB1E(s1)
- mulw a0, a0, a1
- lw a3, 0xD1FFAB1E(s1)
- subw a2, a2, a3
- sext.w a3, a1
- sext.w a4, a2
- bgeu a4, a3, G_M34441_IG04
- addw a0, a0, a2
- zext.w a0, a0
- slli s2, a0, 3
+ lw a3, 0xD1FFAB1E(a0)
+ mulw a1, a1, a3
+ lw a4, 0xD1FFAB1E(a0)
+ addi a5, zero, 0xD1FFAB1E
+ subw a4, a5, a4
+ sext.w a5, a3
+ sext.w a6, a4
+ bgeu a6, a5, G_M34441_IG04
+ addw a1, a1, a4
+ lw a3, 0xD1FFAB1E(a0)
+ mulw a1, a1, a3
+ lw a4, 0xD1FFAB1E(a0)
+ subw a2, a2, a4
+ sext.w a4, a3
+ sext.w a5, a2
+ bgeu a5, a4, G_M34441_IG04
+ addw a1, a1, a2
+ sh3add.uw s1, a1, a0
+ ; byrRegs +[s1]
addi a0, fp, -8
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- ld a1, 0xD1FFAB1E(t6)
+ ; gcrRegs -[a0]
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ slli a1, a1, 13
+ addi a1, a1, 0xD1FFAB1E
+ slli a1, a1, 2
+ ld a1, 0xD1FFAB1E(a1)
jalr a1 // Test_init_struct:noinline2(byref):long
- add a1, s1, s2
- ; byrRegs +[a1]
- sd a0, 0xD1FFAB1E(a1)
- ;; size=134 bbWeight=1 PerfScore 48.50
+ sd a0, 0xD1FFAB1E(s1)
+ ;; size=124 bbWeight=1 PerfScore 47.50
G_M34441_IG03: ; bbWeight=1, epilog, nogc, extend
- ld s2, 40(sp)
- ld s1, 32(sp)
- ld ra, 24(sp)
- ld fp, 16(sp)
- addi sp, sp, 48
- ret ;; size=24 bbWeight=1 PerfScore 11.50
+ ld s1, 24(sp)
+ ld ra, 16(sp)
+ ld fp, 8(sp)
+ addi sp, sp, 32
+ ret ;; size=20 bbWeight=1 PerfScore 9.50
G_M34441_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
- ; gcrRegs -[s1]
- ; byrRegs -[a1]
+ ; byrRegs -[s1]
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
slli a0, a0, 15
@@ -88,31 +83,29 @@ G_M34441_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
ebreak
;; size=20 bbWeight=0 PerfScore 0.00
-; Total bytes of code 210, prolog size 28, PerfScore 81.50, instruction count 48, allocated bytes for code 210 (MethodHash=8ade7976) for method Test_init_struct:test18(long[,,],int,int) (FullOpts)
+; Total bytes of code 188, prolog size 24, PerfScore 74.00, instruction count 43, allocated bytes for code 188 (MethodHash=8ade7976) for method Test_init_struct:test18(long[,,],int,int) (FullOpts)
; ============================================================
Unwind Info:
>> Start offset : 0x000000 (not in unwind data)
>> End offset : 0xd1ffab1e (not in unwind data)
- Code Words : 5
+ Code Words : 4
Epilog Count : 1
E bit : 0
X bit : 0
Vers : 0
- Function Length : 105 (0x00069) Actual length = 210 (0x0000d2)
+ Function Length : 94 (0x0005e) Actual length = 188 (0x0000bc)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
Epilog Start Index : 3 (0x03)
---- Unwind codes ----
- E2 00 02 add_fp 2 (0x02); addi fp, sp, #16
+ E2 00 01 add_fp 1 (0x01); addi fp, sp, #8
---- Epilog start at index 3 ----
- D0 11 05 save_reg X#17 Z#5 (0x05); sd s2, sp, 40
- D0 08 04 save_reg X#8 Z#4 (0x04); sd s1, sp, 32
- D0 00 03 save_reg X#0 Z#3 (0x03); sd ra, sp, 24
- D0 07 02 save_reg X#7 Z#2 (0x02); sd fp, sp, 16
- 03 alloc_s #3 (0x03); addi sp, sp, -48 (0x030)
- E4 end
+ D0 08 03 save_reg X#8 Z#3 (0x03); sd s1, sp, 24
+ D0 00 02 save_reg X#0 Z#2 (0x02); sd ra, sp, 16
+ D0 07 01 save_reg X#7 Z#1 (0x01); sd fp, sp, 8
+ 02 alloc_s #2 (0x02); addi sp, sp, -32 (0x020)
E4 end
E4 end
E4 end-24 (-9.92%) : 54353.dasm - Test_10w5d.testout1:Func_0_1_2_1_1():double (FullOpts)@@ -73,33 +73,29 @@ G_M10741_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
sext.w a6, a4
bgeu a6, a5, G_M10741_IG04
addw a3, a3, a4
- zext.w a3, a3
- slli a3, a3, 3
- add a5, a0, a3
- ; byrRegs +[a5]
- sd zero, 0xD1FFAB1E(a5)
+ sh3add.uw a3, a3, a0
+ ; byrRegs +[a3]
+ sd zero, 0xD1FFAB1E(a3)
mulw a3, a1, a2
+ ; byrRegs -[a3]
addw a3, a3, a4
- zext.w a3, a3
- slli a3, a3, 3
- add a5, a0, a3
- ld zero, 0xD1FFAB1E(a5)
+ sh3add.uw a3, a3, a0
+ ; byrRegs +[a3]
+ ld zero, 0xD1FFAB1E(a3)
mulw a1, a1, a2
addw a1, a1, a4
- zext.w a1, a1
- slli a1, a1, 3
- add a2, a0, a1
- ; byrRegs +[a2]
- fld fa0, 0xD1FFAB1E(a2)
- ;; size=186 bbWeight=1 PerfScore 61.00
+ sh3add.uw a0, a1, a0
+ ; gcrRegs -[a0]
+ ; byrRegs +[a0]
+ fld fa0, 0xD1FFAB1E(a0)
+ ;; size=162 bbWeight=1 PerfScore 58.00
G_M10741_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 24(sp)
ld fp, 16(sp)
addi sp, sp, 32
ret ;; size=16 bbWeight=1 PerfScore 7.50
G_M10741_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
- ; gcrRegs -[a0]
- ; byrRegs -[a2 a5]
+ ; byrRegs -[a0 a3]
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
slli a0, a0, 15
@@ -107,7 +103,7 @@ G_M10741_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
ebreak
;; size=20 bbWeight=0 PerfScore 0.00
-; Total bytes of code 242, prolog size 20, PerfScore 81.50, instruction count 53, allocated bytes for code 242 (MethodHash=c8f8d60a) for method Test_10w5d.testout1:Func_0_1_2_1_1():double (FullOpts)
+; Total bytes of code 218, prolog size 20, PerfScore 78.50, instruction count 47, allocated bytes for code 218 (MethodHash=c8f8d60a) for method Test_10w5d.testout1:Func_0_1_2_1_1():double (FullOpts)
; ============================================================
Unwind Info:
@@ -118,7 +114,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 121 (0x00079) Actual length = 242 (0x0000f2)
+ Function Length : 109 (0x0006d) Actual length = 218 (0x0000da)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-24 (-9.76%) : 54054.dasm - Test_10w5d.testout1:Func_0_3_1_1_4():float (FullOpts)@@ -71,33 +71,29 @@ G_M44852_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
sext.w a6, a4
bgeu a6, a5, G_M44852_IG04
addw a3, a3, a4
- zext.w a3, a3
- slli a3, a3, 2
- add a5, a0, a3
- ; byrRegs +[a5]
- sw zero, 0xD1FFAB1E(a5)
+ sh2add.uw a3, a3, a0
+ ; byrRegs +[a3]
+ sw zero, 0xD1FFAB1E(a3)
mulw a3, a1, a2
+ ; byrRegs -[a3]
addw a3, a3, a4
- zext.w a3, a3
- slli a3, a3, 2
- add a5, a0, a3
- lw zero, 0xD1FFAB1E(a5)
+ sh2add.uw a3, a3, a0
+ ; byrRegs +[a3]
+ lw zero, 0xD1FFAB1E(a3)
mulw a1, a1, a2
addw a1, a1, a4
- zext.w a1, a1
- slli a1, a1, 2
- add a2, a0, a1
- ; byrRegs +[a2]
- flw fa0, 0xD1FFAB1E(a2)
- ;; size=190 bbWeight=1 PerfScore 62.00
+ sh2add.uw a0, a1, a0
+ ; gcrRegs -[a0]
+ ; byrRegs +[a0]
+ flw fa0, 0xD1FFAB1E(a0)
+ ;; size=166 bbWeight=1 PerfScore 59.00
G_M44852_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 24(sp)
ld fp, 16(sp)
addi sp, sp, 32
ret ;; size=16 bbWeight=1 PerfScore 7.50
G_M44852_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
- ; gcrRegs -[a0]
- ; byrRegs -[a2 a5]
+ ; byrRegs -[a0 a3]
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
slli a0, a0, 15
@@ -105,7 +101,7 @@ G_M44852_IG04: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {
ebreak
;; size=20 bbWeight=0 PerfScore 0.00
-; Total bytes of code 246, prolog size 20, PerfScore 82.50, instruction count 53, allocated bytes for code 246 (MethodHash=15a950cb) for method Test_10w5d.testout1:Func_0_3_1_1_4():float (FullOpts)
+; Total bytes of code 222, prolog size 20, PerfScore 79.50, instruction count 47, allocated bytes for code 222 (MethodHash=15a950cb) for method Test_10w5d.testout1:Func_0_3_1_1_4():float (FullOpts)
; ============================================================
Unwind Info:
@@ -116,7 +112,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 123 (0x0007b) Actual length = 246 (0x0000f6)
+ Function Length : 111 (0x0006f) Actual length = 222 (0x0000de)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+8 (+2.41%) : 60384.dasm - System.Numerics.Tensors.Tensor:FillUniformDistribution[double](byref,System.Random):byref (FullOpts)@@ -30,25 +30,26 @@
; V19 rat0 [V19,T00] ( 4, 12.38) long -> s6 "Strength reduced derived IV"
; V20 rat1 [V20,T01] ( 4, 12.38) int -> s4 "Trip count IV"
;
-; Lcl frame size = 0
-Frame info. #outsz=0; #framesz=64; lcl=0
+; Lcl frame size = 8
+Frame info. #outsz=0; #framesz=80; lcl=8
G_M9509_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- addi sp, sp, -64
- sd fp, 0(sp)
- sd ra, 8(sp)
- sd s1, 16(sp)
- sd s2, 24(sp)
- sd s3, 32(sp)
- sd s4, 40(sp)
- sd s5, 48(sp)
- sd s6, 56(sp)
- mv fp, sp
+ addi sp, sp, -80
+ sd fp, 8(sp)
+ sd ra, 16(sp)
+ sd s1, 24(sp)
+ sd s2, 32(sp)
+ sd s3, 40(sp)
+ sd s4, 48(sp)
+ sd s5, 56(sp)
+ sd s6, 64(sp)
+ sd s7, 72(sp)
+ addi fp, sp, 8
mv s2, a0
; byrRegs +[s2]
mv s1, a1
; gcrRegs +[s1]
- ;; size=48 bbWeight=1 PerfScore 34.00
+ ;; size=52 bbWeight=1 PerfScore 38.00
G_M9509_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=40000 {s2}, byref
ld s3, 0xD1FFAB1E(s2)
; byrRegs +[s3]
@@ -67,11 +68,11 @@ G_M9509_IG03: ; bbWeight=0.50, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s
slli a1, a1, 12
addi a1, a1, 0xD1FFAB1E
slli a1, a1, 3
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a2, 0xD1FFAB1E(t6)
+ lui a2, 0xD1FFAB1E
+ addiw a2, a2, 0xD1FFAB1E
+ slli a2, a2, 32
+ srli a2, a2, 18
+ ld a2, 0xD1FFAB1E(a2)
jalr a2 // <unknown method>
;; size=64 bbWeight=0.50 PerfScore 9.50
G_M9509_IG04: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3}, byref
@@ -80,20 +81,20 @@ G_M9509_IG04: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3},
G_M9509_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, byref
; gcrRegs -[s1]
fence 3, 3
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 16
- lw a0, 0xD1FFAB1E(t6)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 16
+ lw a0, 0xD1FFAB1E(a0)
andi a0, a0, 1
sext.w t6, a0
addi ra, zero, 0xD1FFAB1E
bne t6, ra, G_M9509_IG12
;; size=36 bbWeight=0.50 PerfScore 7.50
G_M9509_IG06: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, byref
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 22
- ld s1, 0xD1FFAB1E(t6)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 22
+ ld s1, 0xD1FFAB1E(a0)
; gcrRegs +[s1]
;; size=16 bbWeight=0.50 PerfScore 2.50
G_M9509_IG07: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3}, byref
@@ -107,37 +108,37 @@ G_M9509_IG08: ; bbWeight=0.50, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s
mv s6, zero
;; size=16 bbWeight=0.50 PerfScore 3.25
G_M9509_IG09: ; bbWeight=3.96, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3}, byref
+ add s7, s3, s6
+ ; byrRegs +[s7]
mv a0, s1
; gcrRegs +[a0]
ld a1, 0xD1FFAB1E(s5)
jalr a1 // <unknown method>
; gcrRegs -[a0]
- add t6, s3, s6
- ; byrRegs +[t6]
- fsd fa0, 0xD1FFAB1E(t6)
+ fsd fa0, 0xD1FFAB1E(s7)
addi s6, s6, 0xD1FFAB1E
addiw s4, s4, 0xD1FFAB1E
sext.w t6, s4
- ; byrRegs -[t6]
bnez t6, G_M9509_IG09
;; size=36 bbWeight=3.96 PerfScore 59.40
G_M9509_IG10: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=40000 {s2}, byref
; gcrRegs -[s1]
- ; byrRegs -[s3]
+ ; byrRegs -[s3 s7]
mv a0, s2
; byrRegs +[a0]
;; size=4 bbWeight=1 PerfScore 0.50
G_M9509_IG11: ; bbWeight=1, epilog, nogc, extend
- ld s6, 56(sp)
- ld s5, 48(sp)
- ld s4, 40(sp)
- ld s3, 32(sp)
- ld s2, 24(sp)
- ld s1, 16(sp)
- ld ra, 8(sp)
- ld fp, 0(sp)
- addi sp, sp, 64
- ret ;; size=40 bbWeight=1 PerfScore 19.50
+ ld s7, 72(sp)
+ ld s6, 64(sp)
+ ld s5, 56(sp)
+ ld s4, 48(sp)
+ ld s3, 40(sp)
+ ld s2, 32(sp)
+ ld s1, 24(sp)
+ ld ra, 16(sp)
+ ld fp, 8(sp)
+ addi sp, sp, 80
+ ret ;; size=44 bbWeight=1 PerfScore 21.50
G_M9509_IG12: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, gcvars, byref
; byrRegs -[a0] +[s3]
lui a0, 0xD1FFAB1E
@@ -153,34 +154,34 @@ G_M9509_IG12: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}
j G_M9509_IG06
;; size=44 bbWeight=0 PerfScore 0.00
-; Total bytes of code 332, prolog size 40, PerfScore 151.65, instruction count 61, allocated bytes for code 332 (MethodHash=d9ccdada) for method System.Numerics.Tensors.Tensor:FillUniformDistribution[double](byref,System.Random):byref (FullOpts)
+; Total bytes of code 340, prolog size 44, PerfScore 157.65, instruction count 63, allocated bytes for code 340 (MethodHash=d9ccdada) for method System.Numerics.Tensors.Tensor:FillUniformDistribution[double](byref,System.Random):byref (FullOpts)
; ============================================================
Unwind Info:
>> Start offset : 0x000000 (not in unwind data)
>> End offset : 0xd1ffab1e (not in unwind data)
- Code Words : 7
+ Code Words : 8
Epilog Count : 1
E bit : 0
X bit : 0
Vers : 0
- Function Length : 166 (0x000a6) Actual length = 332 (0x00014c)
+ Function Length : 170 (0x000aa) Actual length = 340 (0x000154)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
- Epilog Start Index : 1 (0x01)
+ Epilog Start Index : 3 (0x03)
---- Unwind codes ----
- E1 set_fp; move fp, sp
- ---- Epilog start at index 1 ----
- D0 15 07 save_reg X#21 Z#7 (0x07); sd s6, sp, 56
- D0 14 06 save_reg X#20 Z#6 (0x06); sd s5, sp, 48
- D0 13 05 save_reg X#19 Z#5 (0x05); sd s4, sp, 40
- D0 12 04 save_reg X#18 Z#4 (0x04); sd s3, sp, 32
- D0 11 03 save_reg X#17 Z#3 (0x03); sd s2, sp, 24
- D0 08 02 save_reg X#8 Z#2 (0x02); sd s1, sp, 16
- D0 00 01 save_reg X#0 Z#1 (0x01); sd ra, sp, 8
- D0 07 00 save_reg X#7 Z#0 (0x00); sd fp, sp, 0
- 04 alloc_s #4 (0x04); addi sp, sp, -64 (0x040)
- E4 end
+ E2 00 01 add_fp 1 (0x01); addi fp, sp, #8
+ ---- Epilog start at index 3 ----
+ D0 16 09 save_reg X#22 Z#9 (0x09); sd s7, sp, 72
+ D0 15 08 save_reg X#21 Z#8 (0x08); sd s6, sp, 64
+ D0 14 07 save_reg X#20 Z#7 (0x07); sd s5, sp, 56
+ D0 13 06 save_reg X#19 Z#6 (0x06); sd s4, sp, 48
+ D0 12 05 save_reg X#18 Z#5 (0x05); sd s3, sp, 40
+ D0 11 04 save_reg X#17 Z#4 (0x04); sd s2, sp, 32
+ D0 08 03 save_reg X#8 Z#3 (0x03); sd s1, sp, 24
+ D0 00 02 save_reg X#0 Z#2 (0x02); sd ra, sp, 16
+ D0 07 01 save_reg X#7 Z#1 (0x01); sd fp, sp, 8
+ 05 alloc_s #5 (0x05); addi sp, sp, -80 (0x050)
E4 end
+8 (+1.63%) : 60383.dasm - System.Numerics.Tensors.Tensor:FillGaussianNormalDistribution[double](byref,System.Random):byref (FullOpts)@@ -36,30 +36,31 @@
; V25 rat1 [V25,T02] ( 4, 12.38) int -> s4 "Trip count IV"
; TEMP_01 double -> [fp-0x08]
;
-; Lcl frame size = 8
-Frame info. #outsz=0; #framesz=112; lcl=8
+; Lcl frame size = 16
+Frame info. #outsz=0; #framesz=128; lcl=16
G_M2439_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- addi sp, sp, -112
- sd fp, 8(sp)
- sd ra, 16(sp)
- sd s1, 24(sp)
- sd s2, 32(sp)
- sd s3, 40(sp)
- sd s4, 48(sp)
- sd s5, 56(sp)
- sd s6, 64(sp)
- fsd fs6, 72(sp)
- fsd fs7, 80(sp)
- fsd fs8, 88(sp)
- fsd fs9, 96(sp)
- fsd fs10, 104(sp)
- addi fp, sp, 8
+ addi sp, sp, -128
+ sd fp, 16(sp)
+ sd ra, 24(sp)
+ sd s1, 32(sp)
+ sd s2, 40(sp)
+ sd s3, 48(sp)
+ sd s4, 56(sp)
+ sd s5, 64(sp)
+ sd s6, 72(sp)
+ sd s7, 80(sp)
+ fsd fs6, 88(sp)
+ fsd fs7, 96(sp)
+ fsd fs8, 104(sp)
+ fsd fs9, 112(sp)
+ fsd fs10, 120(sp)
+ addi fp, sp, 16
mv s2, a0
; byrRegs +[s2]
mv s1, a1
; gcrRegs +[s1]
- ;; size=68 bbWeight=1 PerfScore 54.00
+ ;; size=72 bbWeight=1 PerfScore 58.00
G_M2439_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=40000 {s2}, byref
ld s3, 0xD1FFAB1E(s2)
; byrRegs +[s3]
@@ -78,11 +79,11 @@ G_M2439_IG03: ; bbWeight=0.50, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s
slli a1, a1, 12
addi a1, a1, 0xD1FFAB1E
slli a1, a1, 3
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a2, 0xD1FFAB1E(t6)
+ lui a2, 0xD1FFAB1E
+ addiw a2, a2, 0xD1FFAB1E
+ slli a2, a2, 32
+ srli a2, a2, 18
+ ld a2, 0xD1FFAB1E(a2)
jalr a2 // <unknown method>
;; size=64 bbWeight=0.50 PerfScore 9.50
G_M2439_IG04: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3}, byref
@@ -91,20 +92,20 @@ G_M2439_IG04: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3},
G_M2439_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, byref
; gcrRegs -[s1]
fence 3, 3
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 16
- lw a0, 0xD1FFAB1E(t6)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 16
+ lw a0, 0xD1FFAB1E(a0)
andi a0, a0, 1
sext.w t6, a0
addi ra, zero, 0xD1FFAB1E
bne t6, ra, G_M2439_IG12
;; size=36 bbWeight=0.50 PerfScore 7.50
G_M2439_IG06: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, byref
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 22
- ld s1, 0xD1FFAB1E(t6)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 22
+ ld s1, 0xD1FFAB1E(a0)
; gcrRegs +[s1]
;; size=16 bbWeight=0.50 PerfScore 2.50
G_M2439_IG07: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s3}, byref
@@ -136,6 +137,8 @@ G_M2439_IG09: ; bbWeight=3.96, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s
jalr a1 // <unknown method>
; gcrRegs -[a0]
fsub.d fs10, fs6, fa0
+ add s7, s3, s6
+ ; byrRegs +[s7]
fmv.d fa0, fs9
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
@@ -155,37 +158,35 @@ G_M2439_IG09: ; bbWeight=3.96, gcrefRegs=0200 {s1}, byrefRegs=C0000 {s2 s
jalr ra, 0xD1FFAB1E(a0) // <unknown method>
fld ft4, -8(fp)
fmul.d ft4, ft4, fa0
- add t6, s3, s6
- ; byrRegs +[t6]
- fsd ft4, 0xD1FFAB1E(t6)
+ fsd ft4, 0xD1FFAB1E(s7)
addi s6, s6, 0xD1FFAB1E
addiw s4, s4, 0xD1FFAB1E
sext.w t6, s4
- ; byrRegs -[t6]
bnez t6, G_M2439_IG09
;; size=132 bbWeight=3.96 PerfScore 421.74
G_M2439_IG10: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=40000 {s2}, byref
; gcrRegs -[s1]
- ; byrRegs -[s3]
+ ; byrRegs -[s3 s7]
mv a0, s2
; byrRegs +[a0]
;; size=4 bbWeight=1 PerfScore 0.50
G_M2439_IG11: ; bbWeight=1, epilog, nogc, extend
- fld fs10, 104(sp)
- fld fs9, 96(sp)
- fld fs8, 88(sp)
- fld fs7, 80(sp)
- fld fs6, 72(sp)
- ld s6, 64(sp)
- ld s5, 56(sp)
- ld s4, 48(sp)
- ld s3, 40(sp)
- ld s2, 32(sp)
- ld s1, 24(sp)
- ld ra, 16(sp)
- ld fp, 8(sp)
- addi sp, sp, 112
- ret ;; size=60 bbWeight=1 PerfScore 29.50
+ fld fs10, 120(sp)
+ fld fs9, 112(sp)
+ fld fs8, 104(sp)
+ fld fs7, 96(sp)
+ fld fs6, 88(sp)
+ ld s7, 80(sp)
+ ld s6, 72(sp)
+ ld s5, 64(sp)
+ ld s4, 56(sp)
+ ld s3, 48(sp)
+ ld s2, 40(sp)
+ ld s1, 32(sp)
+ ld ra, 24(sp)
+ ld fp, 16(sp)
+ addi sp, sp, 128
+ ret ;; size=64 bbWeight=1 PerfScore 31.50
G_M2439_IG12: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=C0000 {s2 s3}, gcvars, byref
; byrRegs -[a0] +[s3]
lui a0, 0xD1FFAB1E
@@ -205,38 +206,40 @@ RWD08 dq C000000000000000h ; -2
RWD16 dq 401921FB54442D18h ; 6.28318531
-; Total bytes of code 492, prolog size 60, PerfScore 546.99, instruction count 90, allocated bytes for code 492 (MethodHash=e8f2f678) for method System.Numerics.Tensors.Tensor:FillGaussianNormalDistribution[double](byref,System.Random):byref (FullOpts)
+; Total bytes of code 500, prolog size 64, PerfScore 552.99, instruction count 92, allocated bytes for code 500 (MethodHash=e8f2f678) for method System.Numerics.Tensors.Tensor:FillGaussianNormalDistribution[double](byref,System.Random):byref (FullOpts)
; ============================================================
Unwind Info:
>> Start offset : 0x000000 (not in unwind data)
>> End offset : 0xd1ffab1e (not in unwind data)
- Code Words : 11
+ Code Words : 12
Epilog Count : 1
E bit : 0
X bit : 0
Vers : 0
- Function Length : 246 (0x000f6) Actual length = 492 (0x0001ec)
+ Function Length : 250 (0x000fa) Actual length = 500 (0x0001f4)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
Epilog Start Index : 3 (0x03)
---- Unwind codes ----
- E2 00 01 add_fp 1 (0x01); addi fp, sp, #8
+ E2 00 02 add_fp 2 (0x02); addi fp, sp, #16
---- Epilog start at index 3 ----
- DD 20 0D save_freg X#18 Z#13 (0x0D); fsd fs10, [sp, #104]
- DD 10 0C save_freg X#17 Z#12 (0x0C); fsd fs9, [sp, #96]
- DD 00 0B save_freg X#16 Z#11 (0x0B); fsd fs8, [sp, #88]
- DC F0 0A save_freg X#15 Z#10 (0x0A); fsd fs7, [sp, #80]
- DC E0 09 save_freg X#14 Z#9 (0x09); fsd fs6, [sp, #72]
- D0 15 08 save_reg X#21 Z#8 (0x08); sd s6, sp, 64
- D0 14 07 save_reg X#20 Z#7 (0x07); sd s5, sp, 56
- D0 13 06 save_reg X#19 Z#6 (0x06); sd s4, sp, 48
- D0 12 05 save_reg X#18 Z#5 (0x05); sd s3, sp, 40
- D0 11 04 save_reg X#17 Z#4 (0x04); sd s2, sp, 32
- D0 08 03 save_reg X#8 Z#3 (0x03); sd s1, sp, 24
- D0 00 02 save_reg X#0 Z#2 (0x02); sd ra, sp, 16
- D0 07 01 save_reg X#7 Z#1 (0x01); sd fp, sp, 8
- 07 alloc_s #7 (0x07); addi sp, sp, -112 (0x070)
+ DD 20 0F save_freg X#18 Z#15 (0x0F); fsd fs10, [sp, #120]
+ DD 10 0E save_freg X#17 Z#14 (0x0E); fsd fs9, [sp, #112]
+ DD 00 0D save_freg X#16 Z#13 (0x0D); fsd fs8, [sp, #104]
+ DC F0 0C save_freg X#15 Z#12 (0x0C); fsd fs7, [sp, #96]
+ DC E0 0B save_freg X#14 Z#11 (0x0B); fsd fs6, [sp, #88]
+ D0 16 0A save_reg X#22 Z#10 (0x0A); sd s7, sp, 80
+ D0 15 09 save_reg X#21 Z#9 (0x09); sd s6, sp, 72
+ D0 14 08 save_reg X#20 Z#8 (0x08); sd s5, sp, 64
+ D0 13 07 save_reg X#19 Z#7 (0x07); sd s4, sp, 56
+ D0 12 06 save_reg X#18 Z#6 (0x06); sd s3, sp, 48
+ D0 11 05 save_reg X#17 Z#5 (0x05); sd s2, sp, 40
+ D0 08 04 save_reg X#8 Z#4 (0x04); sd s1, sp, 32
+ D0 00 03 save_reg X#0 Z#3 (0x03); sd ra, sp, 24
+ D0 07 02 save_reg X#7 Z#2 (0x02); sd fp, sp, 16
+ 08 alloc_s #8 (0x08); addi sp, sp, -128 (0x080)
+ E4 end
E4 end
+2 (+1.39%) : 49105.dasm - System.Collections.Generic.ArrayBuilder`1[byte]:AsSpan(int):System.Span`1[byte]:this (FullOpts)@@ -18,16 +18,16 @@
;* V07 tmp5 [V07 ] ( 0, 0 ) byref -> zero-ref "field V03._reference (fldOffset=0x0)" P-DEP
;* V08 tmp6 [V08 ] ( 0, 0 ) int -> zero-ref "field V03._length (fldOffset=0x8)" P-DEP
; V09 tmp7 [V09,T04] ( 3, 1.50) byref -> a2 "field V06._reference (fldOffset=0x0)" P-INDEP
-; V10 tmp8 [V10,T05] ( 3, 1.50) int -> a1 "field V06._length (fldOffset=0x8)" P-INDEP
+; V10 tmp8 [V10,T05] ( 3, 1.50) int -> [fp-0x04] "field V06._length (fldOffset=0x8)" P-INDEP
;
-; Lcl frame size = 0
-Frame info. #outsz=0; #framesz=16; lcl=0
+; Lcl frame size = 16
+Frame info. #outsz=0; #framesz=32; lcl=16
G_M4522_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
- addi sp, sp, -16
- sd fp, 0(sp)
- sd ra, 8(sp)
- mv fp, sp
+ addi sp, sp, -32
+ sd fp, 16(sp)
+ sd ra, 24(sp)
+ addi fp, sp, 16
;; size=16 bbWeight=1 PerfScore 9.00
G_M4522_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0400 {a0}, byref
; byrRegs +[a0]
@@ -47,18 +47,18 @@ G_M4522_IG03: ; bbWeight=0.50, gcrefRegs=1000 {a2}, byrefRegs=0000 {}, by
addi a2, a2, 0xD1FFAB1E
; gcrRegs -[a2]
; byrRegs +[a2]
- zext.w a1, a1
- add a2, a2, a1
- sext.w a1, a0
- ;; size=34 bbWeight=0.50 PerfScore 4.50
+ add.uw a2, a1, a2
+ sw a0, -4(fp)
+ ;; size=32 bbWeight=0.50 PerfScore 6.00
G_M4522_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref
mv a0, a2
; byrRegs +[a0]
- ;; size=4 bbWeight=1 PerfScore 0.50
+ lw a1, -4(fp)
+ ;; size=8 bbWeight=1 PerfScore 2.50
G_M4522_IG05: ; bbWeight=1, epilog, nogc, extend
- ld ra, 8(sp)
- ld fp, 0(sp)
- addi sp, sp, 16
+ ld ra, 24(sp)
+ ld fp, 16(sp)
+ addi sp, sp, 32
ret ;; size=16 bbWeight=1 PerfScore 7.50
G_M4522_IG06: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
; byrRegs -[a0 a2]
@@ -68,22 +68,22 @@ G_M4522_IG06: ; bbWeight=0, gcVars=0000000000000000 {}, gcrefRegs=0000 {}
bnez t6, G_M4522_IG07
mv a2, zero
; byrRegs +[a2]
- sext.w a1, zero
+ sw zero, -4(fp)
j G_M4522_IG04
;; size=28 bbWeight=0 PerfScore 0.00
G_M4522_IG07: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[a2]
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- ld a0, 0xD1FFAB1E(t6)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 13
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 2
+ ld a0, 0xD1FFAB1E(a0)
jalr a0 // System.ThrowHelper:ThrowArgumentOutOfRangeException()
ebreak
;; size=32 bbWeight=0 PerfScore 0.00
-; Total bytes of code 144, prolog size 16, PerfScore 29.50, instruction count 33, allocated bytes for code 144 (MethodHash=76a4ee55) for method System.Collections.Generic.ArrayBuilder`1[byte]:AsSpan(int):System.Span`1[byte]:this (FullOpts)
+; Total bytes of code 146, prolog size 16, PerfScore 33.00, instruction count 33, allocated bytes for code 146 (MethodHash=76a4ee55) for method System.Collections.Generic.ArrayBuilder`1[byte]:AsSpan(int):System.Span`1[byte]:this (FullOpts)
; ============================================================
Unwind Info:
@@ -94,19 +94,17 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 72 (0x00048) Actual length = 144 (0x000090)
+ Function Length : 73 (0x00049) Actual length = 146 (0x000092)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
- Epilog Start Index : 1 (0x01)
+ Epilog Start Index : 3 (0x03)
---- Unwind codes ----
- E1 set_fp; move fp, sp
- ---- Epilog start at index 1 ----
- D0 00 01 save_reg X#0 Z#1 (0x01); sd ra, sp, 8
- D0 07 00 save_reg X#7 Z#0 (0x00); sd fp, sp, 0
- 01 alloc_s #1 (0x01); addi sp, sp, -16 (0x010)
- E4 end
- E4 end
+ E2 00 02 add_fp 2 (0x02); addi fp, sp, #16
+ ---- Epilog start at index 3 ----
+ D0 00 03 save_reg X#0 Z#3 (0x03); sd ra, sp, 24
+ D0 07 02 save_reg X#7 Z#2 (0x02); sd fp, sp, 16
+ 02 alloc_s #2 (0x02); addi sp, sp, -32 (0x020)
E4 end
E4 end
DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze output |
RISC-V pull_request-CLR-QEMU: 9112 / 9142 (99.67%)report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-CLR-VF2: 9111 / 9142 (99.66%)report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-QEMU: 0 / 0 (100.00%)report.xml, report.md, failures.xml, testclr_details.tar.zst RISC-V pull_request-FX-VF2: 0 / 65 (0.00%)report.xml, report.md, failures.xml, testclr_details.tar.zst Build information and commandsGIT: |
|
We will review .NET 11 PRs when we are done with .NET 10 last minute works and less busy. Please give us a few more weeks. |
|
@jakobbotsch PTAL. |
|
/azp run runtime |
|
Azure Pipelines successfully started running 1 pipeline(s). |
RISC-V doesn't have indexed loads/stores so replacing the base with a standard ADD(base, index) node simplifies and improves the code generation as the tree is closer to architectural reality.
Part of #84834, cc @dotnet/samsung