Skip to content

Commit 410036e

Browse files
MaxGraeydcodeIO
authored andcommitted
Use mixed Horner scheme in Math.exp/expm1 to improve instruction parallelization (AssemblyScript#311)
1 parent 54311fd commit 410036e

File tree

8 files changed

+3882
-3023
lines changed

8 files changed

+3882
-3023
lines changed

std/assembly/math.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,10 @@ export namespace NativeMath {
422422
} else if (hx > 0x3E300000) {
423423
hi = x;
424424
} else return 1.0 + x;
425-
var xx = x * x;
426-
var c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5))));
425+
var xs = x * x;
426+
// var c = x - xp2 * (P1 + xp2 * (P2 + xp2 * (P3 + xp2 * (P4 + xp2 * P5))));
427+
var xq = xs * xs;
428+
var c = x - (xs * P1 + xq * ((P2 + xs * P3) + xq * (P4 + xs * P5)));
427429
var y = 1.0 + (x * c / (2 - c) - lo + hi);
428430
if (k == 0) return y;
429431
return scalbn(y, k);
@@ -464,7 +466,9 @@ export namespace NativeMath {
464466
} else if (hx < 0x3C900000) return x;
465467
var hfx = 0.5 * x;
466468
var hxs = x * hfx;
467-
var r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
469+
// var r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
470+
var hxq = hxs * hxs;
471+
var r1 = (1.0 + hxs * Q1) + hxq * ((Q2 + hxs * Q3) + hxq * (Q4 + hxs * Q5));
468472
t = 3.0 - r1 * hfx;
469473
var e = hxs * ((r1 - t) / (6.0 - x * t));
470474
if (k == 0) return x - (x * e - hxs);

tests/compiler/std/array.optimized.wat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3457,7 +3457,7 @@
34573457
if
34583458
i32.const 0
34593459
i32.const 552
3460-
i32.const 955
3460+
i32.const 959
34613461
i32.const 4
34623462
call $~lib/env/abort
34633463
unreachable
@@ -5173,7 +5173,7 @@
51735173
if
51745174
i32.const 0
51755175
i32.const 552
5176-
i32.const 964
5176+
i32.const 968
51775177
i32.const 24
51785178
call $~lib/env/abort
51795179
unreachable

tests/compiler/std/array.untouched.wat

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4512,7 +4512,7 @@
45124512
if
45134513
i32.const 0
45144514
i32.const 552
4515-
i32.const 955
4515+
i32.const 959
45164516
i32.const 4
45174517
call $~lib/env/abort
45184518
unreachable
@@ -7609,7 +7609,7 @@
76097609
if
76107610
i32.const 0
76117611
i32.const 552
7612-
i32.const 964
7612+
i32.const 968
76137613
i32.const 24
76147614
call $~lib/env/abort
76157615
unreachable

tests/compiler/std/libm.optimized.wat

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1695,20 +1695,29 @@
16951695
return
16961696
end
16971697
end
1698-
f64.const 1
16991698
get_local $0
17001699
f64.const 0.5
17011700
get_local $0
17021701
f64.mul
17031702
tee_local $8
17041703
f64.mul
17051704
tee_local $2
1706-
f64.const -0.03333333333333313
17071705
get_local $2
1706+
f64.mul
1707+
set_local $1
1708+
f64.const 3
1709+
f64.const 1
1710+
get_local $2
1711+
f64.const -0.03333333333333313
1712+
f64.mul
1713+
f64.add
1714+
get_local $1
17081715
f64.const 1.5873015872548146e-03
17091716
get_local $2
17101717
f64.const -7.93650757867488e-05
1711-
get_local $2
1718+
f64.mul
1719+
f64.add
1720+
get_local $1
17121721
f64.const 4.008217827329362e-06
17131722
get_local $2
17141723
f64.const -2.0109921818362437e-07
@@ -1718,20 +1727,14 @@
17181727
f64.add
17191728
f64.mul
17201729
f64.add
1721-
f64.mul
1722-
f64.add
1723-
f64.mul
1724-
f64.add
17251730
tee_local $9
1731+
get_local $8
1732+
f64.mul
1733+
f64.sub
17261734
set_local $1
17271735
get_local $2
17281736
get_local $9
1729-
f64.const 3
17301737
get_local $1
1731-
get_local $8
1732-
f64.mul
1733-
f64.sub
1734-
tee_local $1
17351738
f64.sub
17361739
f64.const 6
17371740
get_local $0
@@ -1954,10 +1957,12 @@
19541957
)
19551958
(func $~lib/math/NativeMath.exp (; 25 ;) (type $FF) (param $0 f64) (result f64)
19561959
(local $1 i32)
1957-
(local $2 i32)
1958-
(local $3 f64)
1959-
(local $4 i32)
1960+
(local $2 f64)
1961+
(local $3 i32)
1962+
(local $4 f64)
19601963
(local $5 f64)
1964+
(local $6 i32)
1965+
(local $7 f64)
19611966
get_local $0
19621967
i64.reinterpret/f64
19631968
i64.const 32
@@ -1966,7 +1971,7 @@
19661971
tee_local $1
19671972
i32.const 31
19681973
i32.shr_u
1969-
set_local $4
1974+
set_local $6
19701975
get_local $1
19711976
i32.const 2147483647
19721977
i32.and
@@ -2017,22 +2022,22 @@
20172022
i32.trunc_s/f64
20182023
else
20192024
i32.const 1
2020-
get_local $4
2025+
get_local $6
20212026
i32.const 1
20222027
i32.shl
20232028
i32.sub
20242029
end
2025-
tee_local $2
2030+
tee_local $3
20262031
f64.convert_s/i32
20272032
tee_local $0
20282033
f64.const 0.6931471803691238
20292034
f64.mul
20302035
f64.sub
2031-
tee_local $3
2036+
tee_local $4
20322037
get_local $0
20332038
f64.const 1.9082149292705877e-10
20342039
f64.mul
2035-
tee_local $5
2040+
tee_local $7
20362041
f64.sub
20372042
set_local $0
20382043
else
@@ -2046,54 +2051,58 @@
20462051
return
20472052
end
20482053
get_local $0
2049-
set_local $3
2054+
set_local $4
20502055
end
2051-
f64.const 1
20522056
get_local $0
20532057
get_local $0
2058+
f64.mul
2059+
tee_local $2
2060+
get_local $2
2061+
f64.mul
2062+
set_local $5
2063+
f64.const 1
20542064
get_local $0
20552065
get_local $0
2056-
f64.mul
2057-
tee_local $0
2066+
get_local $2
20582067
f64.const 0.16666666666666602
2059-
get_local $0
2068+
f64.mul
2069+
get_local $5
20602070
f64.const -2.7777777777015593e-03
2061-
get_local $0
2071+
get_local $2
20622072
f64.const 6.613756321437934e-05
2063-
get_local $0
2064-
f64.const -1.6533902205465252e-06
2065-
get_local $0
2066-
f64.const 4.1381367970572385e-08
20672073
f64.mul
20682074
f64.add
2075+
get_local $5
2076+
f64.const -1.6533902205465252e-06
2077+
get_local $2
2078+
f64.const 4.1381367970572385e-08
20692079
f64.mul
20702080
f64.add
20712081
f64.mul
20722082
f64.add
20732083
f64.mul
20742084
f64.add
2075-
f64.mul
20762085
f64.sub
20772086
tee_local $0
20782087
f64.mul
20792088
f64.const 2
20802089
get_local $0
20812090
f64.sub
20822091
f64.div
2083-
get_local $5
2092+
get_local $7
20842093
f64.sub
2085-
get_local $3
2094+
get_local $4
20862095
f64.add
20872096
f64.add
20882097
set_local $0
2089-
get_local $2
2098+
get_local $3
20902099
i32.eqz
20912100
if
20922101
get_local $0
20932102
return
20942103
end
20952104
get_local $0
2096-
get_local $2
2105+
get_local $3
20972106
call $~lib/math/NativeMath.scalbn
20982107
)
20992108
(func $~lib/math/NativeMath.cosh (; 26 ;) (type $FF) (param $0 f64) (result f64)

0 commit comments

Comments
 (0)