diff --git a/include/mitsuba/render/optix/vector.cuh b/include/mitsuba/render/optix/vector.cuh index e4799c300..3b7caa7bc 100644 --- a/include/mitsuba/render/optix/vector.cuh +++ b/include/mitsuba/render/optix/vector.cuh @@ -179,7 +179,7 @@ template DEVICE Value dot(const Array &a1, const Array &a2) { Value result = a1.v[0] * a2.v[0]; for (size_t i = 1; i < Size; ++i) - result = fmaf(a1.v[i], a2.v[i], result); + result = ::fmaf(a1.v[i], a2.v[i], result); return result; } @@ -187,7 +187,7 @@ template DEVICE Value squared_norm(const Array &a) { Value result = a.v[0] * a.v[0]; for (size_t i = 1; i < Size; ++i) - result = fmaf(a.v[i], a.v[i], result); + result = ::fmaf(a.v[i], a.v[i], result); return result; } diff --git a/resources/ptx/optix_rt.ptx b/resources/ptx/optix_rt.ptx index 111d76565..85c5099db 100644 --- a/resources/ptx/optix_rt.ptx +++ b/resources/ptx/optix_rt.ptx @@ -1,12 +1,12 @@ // // Generated by NVIDIA NVVM Compiler // -// Compiler Build ID: CL-27506705 -// Cuda compilation tools, release 10.2, V10.2.89 +// Compiler Build ID: CL-29069683 +// Cuda compilation tools, release 11.1, V11.1.74 // Based on LLVM 3.4svn // -.version 6.5 +.version 7.1 .target sm_61 .address_size 64 @@ -19,13 +19,13 @@ ; .const .align 8 .b8 params[368]; .global .align 1 .b8 $str[36] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 83, 84, 65, 67, 75, 95, 79, 86, 69, 82, 70, 76, 79, 87, 0}; -.global .align 1 .b8 $str1[42] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 67, 69, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; -.global .align 1 .b8 $str2[46] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; -.global .align 1 .b8 $str3[51] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 84, 82, 65, 86, 69, 82, 83, 65, 66, 76, 69, 0}; -.global .align 1 .b8 $str4[48] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 77, 73, 83, 83, 95, 83, 66, 84, 0}; -.global .align 1 .b8 $str5[47] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 72, 73, 84, 95, 83, 66, 84, 0}; -.const .align 8 .u64 exceptions[12] = {4294967295, generic($str), 4294967294, generic($str1), 4294967293, generic($str2), 4294967291, generic($str3), 4294967290, generic($str4), 4294967289, generic($str5)}; -.global .align 1 .b8 $str6[24] = {79, 112, 116, 105, 120, 32, 69, 120, 99, 101, 112, 116, 105, 111, 110, 32, 37, 117, 58, 32, 37, 115, 10, 0}; +.global .align 1 .b8 $str$1[42] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 67, 69, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; +.global .align 1 .b8 $str$2[46] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 68, 69, 80, 84, 72, 95, 69, 88, 67, 69, 69, 68, 69, 68, 0}; +.global .align 1 .b8 $str$3[51] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 84, 82, 65, 86, 69, 82, 83, 65, 66, 76, 69, 0}; +.global .align 1 .b8 $str$4[48] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 77, 73, 83, 83, 95, 83, 66, 84, 0}; +.global .align 1 .b8 $str$5[47] = {79, 80, 84, 73, 88, 95, 69, 88, 67, 69, 80, 84, 73, 79, 78, 95, 67, 79, 68, 69, 95, 84, 82, 65, 86, 69, 82, 83, 65, 76, 95, 73, 78, 86, 65, 76, 73, 68, 95, 72, 73, 84, 95, 83, 66, 84, 0}; +.const .align 8 .u64 exceptions[12] = {4294967295, generic($str), 4294967294, generic($str$1), 4294967293, generic($str$2), 4294967291, generic($str$3), 4294967290, generic($str$4), 4294967289, generic($str$5)}; +.global .align 1 .b8 $str$6[24] = {79, 112, 116, 105, 120, 32, 69, 120, 99, 101, 112, 116, 105, 111, 110, 32, 37, 117, 58, 32, 37, 115, 10, 0}; .visible .entry __intersection__cylinder( @@ -1534,11 +1534,12 @@ BB0_48: @%p39 bra BB0_50; ld.f32 %f835, [%rd1+288]; - setp.le.f32 %p41, %f326, %f835; - setp.ge.f32 %p42, %f957, %f775; + setp.gtu.f32 %p41, %f326, %f835; + setp.ltu.f32 %p42, %f957, %f775; + or.pred %p43, %p41, %p42; mov.pred %p53, -1; - and.pred %p43, %p41, %p42; - @%p43 bra BB0_54; + @!%p43 bra BB0_54; + bra.uni BB0_50; BB0_50: setp.ltu.f32 %p45, %f327, 0f00000000; @@ -9565,7 +9566,7 @@ BB3_116: .reg .b16 %rs<10>; .reg .f32 %f<1259>; .reg .b32 %r<336>; - .reg .b64 %rd<450>; + .reg .b64 %rd<451>; // inline asm @@ -9589,17 +9590,18 @@ BB3_116: setp.eq.s64 %p1, %rd1, 0; @%p1 bra BB4_2; - cvta.to.global.u64 %rd40, %rd1; - cvt.u64.u32 %rd41, %r1; - add.s64 %rd42, %rd40, %rd41; + cvta.to.global.u64 %rd41, %rd1; + cvt.u64.u32 %rd42, %r1; + add.s64 %rd43, %rd41, %rd42; mov.u16 %rs1, 1; - st.global.u8 [%rd42], %rs1; + st.global.u8 [%rd43], %rs1; bra.uni BB4_77; BB4_2: // inline asm - call (%rd43), _optix_get_sbt_data_ptr_64, (); + call (%rd44), _optix_get_sbt_data_ptr_64, (); // inline asm + ld.u64 %rd3, [%rd44+8]; // inline asm call (%r23), _optix_read_primitive_idx, (); // inline asm @@ -9609,26 +9611,26 @@ BB4_2: // inline asm call (%f1099, %f1100), _optix_get_triangle_barycentrics, (); // inline asm - ld.const.u64 %rd3, [params+80]; - setp.eq.s64 %p2, %rd3, 0; + ld.const.u64 %rd4, [params+80]; + setp.eq.s64 %p2, %rd4, 0; @%p2 bra BB4_7; - ld.u64 %rd44, [%rd43]; - ld.const.u64 %rd45, [params+328]; - cvta.to.global.u64 %rd46, %rd45; - cvt.u64.u32 %rd4, %r1; - mul.wide.u32 %rd47, %r1, 8; - add.s64 %rd48, %rd46, %rd47; - st.global.u64 [%rd48], %rd44; - ld.const.u64 %rd49, [params+336]; - cvta.to.global.u64 %rd50, %rd49; - mul.wide.u32 %rd51, %r1, 4; - add.s64 %rd52, %rd50, %rd51; - st.global.u32 [%rd52], %r23; - ld.const.u64 %rd53, [params+344]; - cvta.to.global.u64 %rd54, %rd53; - add.s64 %rd5, %rd54, %rd51; - ld.global.u32 %r3, [%rd5]; + ld.u64 %rd45, [%rd44]; + ld.const.u64 %rd46, [params+328]; + cvta.to.global.u64 %rd47, %rd46; + cvt.u64.u32 %rd5, %r1; + mul.wide.u32 %rd48, %r1, 8; + add.s64 %rd49, %rd47, %rd48; + st.global.u64 [%rd49], %rd45; + ld.const.u64 %rd50, [params+336]; + cvta.to.global.u64 %rd51, %rd50; + mul.wide.u32 %rd52, %r1, 4; + add.s64 %rd53, %rd51, %rd52; + st.global.u32 [%rd53], %r23; + ld.const.u64 %rd54, [params+344]; + cvta.to.global.u64 %rd55, %rd54; + add.s64 %rd6, %rd55, %rd52; + ld.global.u32 %r3, [%rd6]; setp.eq.s32 %p3, %r3, 0; @%p3 bra BB4_6; @@ -9638,60 +9640,59 @@ BB4_2: setp.ge.u32 %p4, %r24, %r3; @%p4 bra BB4_6; - st.global.u32 [%rd5], %r24; + st.global.u32 [%rd6], %r24; BB4_6: - cvta.to.global.u64 %rd55, %rd3; - shl.b64 %rd56, %rd4, 2; - add.s64 %rd57, %rd55, %rd56; - st.global.f32 [%rd57], %f1099; - ld.const.u64 %rd58, [params+88]; - cvta.to.global.u64 %rd59, %rd58; - add.s64 %rd60, %rd59, %rd56; - st.global.f32 [%rd60], %f1100; - ld.const.u64 %rd61, [params+72]; - cvta.to.global.u64 %rd62, %rd61; - add.s64 %rd63, %rd62, %rd56; - st.global.f32 [%rd63], %f428; + cvta.to.global.u64 %rd56, %rd4; + shl.b64 %rd57, %rd5, 2; + add.s64 %rd58, %rd56, %rd57; + st.global.f32 [%rd58], %f1099; + ld.const.u64 %rd59, [params+88]; + cvta.to.global.u64 %rd60, %rd59; + add.s64 %rd61, %rd60, %rd57; + st.global.f32 [%rd61], %f1100; + ld.const.u64 %rd62, [params+72]; + cvta.to.global.u64 %rd63, %rd62; + add.s64 %rd64, %rd63, %rd57; + st.global.f32 [%rd64], %f428; bra.uni BB4_77; BB4_7: - ld.u64 %rd6, [%rd43+8]; mov.f32 %f437, 0f3F800000; sub.f32 %f438, %f437, %f1099; sub.f32 %f4, %f438, %f1100; - mul.wide.u32 %rd64, %r23, 3; - ld.u64 %rd65, [%rd6]; - shl.b64 %rd66, %rd64, 2; - add.s64 %rd67, %rd65, %rd66; - ld.u32 %r25, [%rd67]; - mul.wide.u32 %rd8, %r25, 3; - ld.u64 %rd68, [%rd6+8]; - shl.b64 %rd69, %rd8, 2; - add.s64 %rd70, %rd68, %rd69; - ld.u32 %r26, [%rd67+4]; - mul.wide.u32 %rd10, %r26, 3; - shl.b64 %rd71, %rd10, 2; - add.s64 %rd72, %rd68, %rd71; - ld.u32 %r27, [%rd67+8]; - mul.wide.u32 %rd12, %r27, 3; - shl.b64 %rd73, %rd12, 2; - add.s64 %rd74, %rd68, %rd73; - ld.f32 %f439, [%rd70]; - ld.f32 %f440, [%rd70+4]; - ld.f32 %f441, [%rd70+8]; - ld.f32 %f442, [%rd72]; + mul.wide.u32 %rd65, %r23, 3; + ld.u64 %rd66, [%rd3]; + shl.b64 %rd67, %rd65, 2; + add.s64 %rd68, %rd66, %rd67; + ld.u32 %r25, [%rd68]; + mul.wide.u32 %rd9, %r25, 3; + ld.u64 %rd69, [%rd3+8]; + shl.b64 %rd70, %rd9, 2; + add.s64 %rd71, %rd69, %rd70; + ld.u32 %r26, [%rd68+4]; + mul.wide.u32 %rd11, %r26, 3; + shl.b64 %rd72, %rd11, 2; + add.s64 %rd73, %rd69, %rd72; + ld.u32 %r27, [%rd68+8]; + mul.wide.u32 %rd13, %r27, 3; + shl.b64 %rd74, %rd13, 2; + add.s64 %rd75, %rd69, %rd74; + ld.f32 %f439, [%rd71]; + ld.f32 %f440, [%rd71+4]; + ld.f32 %f441, [%rd71+8]; + ld.f32 %f442, [%rd73]; mul.f32 %f443, %f442, %f1099; - ld.f32 %f444, [%rd72+4]; + ld.f32 %f444, [%rd73+4]; mul.f32 %f445, %f444, %f1099; - ld.f32 %f446, [%rd72+8]; + ld.f32 %f446, [%rd73+8]; mul.f32 %f447, %f446, %f1099; fma.rn.f32 %f448, %f439, %f4, %f443; fma.rn.f32 %f449, %f440, %f4, %f445; fma.rn.f32 %f450, %f441, %f4, %f447; - ld.f32 %f451, [%rd74]; - ld.f32 %f452, [%rd74+4]; - ld.f32 %f453, [%rd74+8]; + ld.f32 %f451, [%rd75]; + ld.f32 %f452, [%rd75+4]; + ld.f32 %f453, [%rd75+8]; fma.rn.f32 %f1256, %f451, %f1100, %f448; fma.rn.f32 %f1257, %f452, %f1100, %f449; fma.rn.f32 %f1258, %f453, %f1100, %f450; @@ -9717,8 +9718,8 @@ BB4_7: div.rn.f32 %f1250, %f456, %f466; div.rn.f32 %f1251, %f459, %f466; div.rn.f32 %f1252, %f462, %f466; - ld.const.u64 %rd13, [params+136]; - setp.eq.s64 %p5, %rd13, 0; + ld.const.u64 %rd14, [params+136]; + setp.eq.s64 %p5, %rd14, 0; mov.f32 %f1084, 0f00000000; mov.f32 %f1085, %f1084; mov.f32 %f1086, %f1084; @@ -9731,8 +9732,8 @@ BB4_7: @%p5 bra BB4_12; mov.f32 %f1084, 0f00000000; - ld.u64 %rd14, [%rd6+16]; - setp.eq.s64 %p6, %rd14, 0; + ld.u64 %rd15, [%rd3+16]; + setp.eq.s64 %p6, %rd15, 0; mov.f32 %f1085, %f1084; mov.f32 %f1086, %f1084; mov.f32 %f1087, %f1084; @@ -9743,27 +9744,27 @@ BB4_7: mov.f32 %f1092, %f1250; @%p6 bra BB4_12; - mul.wide.u32 %rd444, %r25, 3; - shl.b64 %rd443, %rd444, 2; + mul.wide.u32 %rd445, %r25, 3; + shl.b64 %rd444, %rd445, 2; mov.f32 %f1084, 0f00000000; - add.s64 %rd76, %rd14, %rd443; - add.s64 %rd78, %rd14, %rd71; - add.s64 %rd80, %rd14, %rd73; - ld.f32 %f17, [%rd76]; - ld.f32 %f18, [%rd76+4]; - ld.f32 %f19, [%rd76+8]; - ld.f32 %f20, [%rd78]; + add.s64 %rd77, %rd15, %rd444; + add.s64 %rd79, %rd15, %rd72; + add.s64 %rd81, %rd15, %rd74; + ld.f32 %f17, [%rd77]; + ld.f32 %f18, [%rd77+4]; + ld.f32 %f19, [%rd77+8]; + ld.f32 %f20, [%rd79]; mul.f32 %f479, %f20, %f1099; - ld.f32 %f21, [%rd78+4]; + ld.f32 %f21, [%rd79+4]; mul.f32 %f480, %f21, %f1099; - ld.f32 %f22, [%rd78+8]; + ld.f32 %f22, [%rd79+8]; mul.f32 %f481, %f22, %f1099; fma.rn.f32 %f482, %f17, %f4, %f479; fma.rn.f32 %f483, %f18, %f4, %f480; fma.rn.f32 %f484, %f19, %f4, %f481; - ld.f32 %f23, [%rd80]; - ld.f32 %f24, [%rd80+4]; - ld.f32 %f25, [%rd80+8]; + ld.f32 %f23, [%rd81]; + ld.f32 %f24, [%rd81+4]; + ld.f32 %f25, [%rd81+8]; fma.rn.f32 %f485, %f23, %f1100, %f482; fma.rn.f32 %f486, %f24, %f1100, %f483; fma.rn.f32 %f487, %f25, %f1100, %f484; @@ -9774,8 +9775,8 @@ BB4_7: div.rn.f32 %f1092, %f485, %f491; div.rn.f32 %f1091, %f486, %f491; div.rn.f32 %f1090, %f487, %f491; - ld.const.u64 %rd81, [params+280]; - setp.eq.s64 %p7, %rd81, 0; + ld.const.u64 %rd82, [params+280]; + setp.eq.s64 %p7, %rd82, 0; @%p7 bra BB4_10; mul.f32 %f492, %f1099, %f23; @@ -9850,40 +9851,40 @@ BB4_12: mul.f32 %f538, %f1251, %f1251; fma.rn.f32 %f1245, %f538, %f533, %f530; neg.f32 %f1246, %f1251; - ld.const.u64 %rd15, [params+96]; - setp.eq.s64 %p8, %rd15, 0; + ld.const.u64 %rd16, [params+96]; + setp.eq.s64 %p8, %rd16, 0; @%p8 bra BB4_17; - ld.u64 %rd16, [%rd6+24]; - setp.eq.s64 %p9, %rd16, 0; + ld.u64 %rd17, [%rd3+24]; + setp.eq.s64 %p9, %rd17, 0; @%p9 bra BB4_17; mov.f32 %f1083, 0f3F800000; sub.f32 %f1082, %f1083, %f1099; sub.f32 %f1081, %f1082, %f1100; - cvt.u64.u32 %rd447, %r27; - cvt.u64.u32 %rd446, %r26; - cvt.u64.u32 %rd445, %r25; - shl.b64 %rd82, %rd445, 3; - add.s64 %rd83, %rd16, %rd82; - shl.b64 %rd84, %rd446, 3; - add.s64 %rd85, %rd16, %rd84; - shl.b64 %rd86, %rd447, 3; - add.s64 %rd87, %rd16, %rd86; - ld.f32 %f50, [%rd83]; - ld.f32 %f51, [%rd83+4]; - ld.f32 %f52, [%rd85]; + cvt.u64.u32 %rd448, %r27; + cvt.u64.u32 %rd447, %r26; + cvt.u64.u32 %rd446, %r25; + shl.b64 %rd83, %rd446, 3; + add.s64 %rd84, %rd17, %rd83; + shl.b64 %rd85, %rd447, 3; + add.s64 %rd86, %rd17, %rd85; + shl.b64 %rd87, %rd448, 3; + add.s64 %rd88, %rd17, %rd87; + ld.f32 %f50, [%rd84]; + ld.f32 %f51, [%rd84+4]; + ld.f32 %f52, [%rd86]; mul.f32 %f539, %f52, %f1099; - ld.f32 %f53, [%rd85+4]; + ld.f32 %f53, [%rd86+4]; mul.f32 %f540, %f53, %f1099; fma.rn.f32 %f541, %f50, %f1081, %f539; fma.rn.f32 %f542, %f51, %f1081, %f540; - ld.f32 %f54, [%rd87]; - ld.f32 %f55, [%rd87+4]; + ld.f32 %f54, [%rd88]; + ld.f32 %f55, [%rd88+4]; fma.rn.f32 %f1099, %f54, %f1100, %f541; fma.rn.f32 %f1100, %f55, %f1100, %f542; - ld.const.u64 %rd88, [params+184]; - setp.eq.s64 %p10, %rd88, 0; + ld.const.u64 %rd89, [params+184]; + setp.eq.s64 %p10, %rd89, 0; @%p10 bra BB4_17; sub.f32 %f58, %f52, %f50; @@ -9923,12 +9924,12 @@ BB4_12: mul.f32 %f1246, %f563, %f545; BB4_17: - ld.u64 %rd17, [%rd43]; - ld.const.u64 %rd89, [params+344]; - cvta.to.global.u64 %rd90, %rd89; - mul.wide.u32 %rd91, %r1, 4; - add.s64 %rd19, %rd90, %rd91; - ld.global.u32 %r5, [%rd19]; + ld.u64 %rd18, [%rd44]; + ld.const.u64 %rd90, [params+344]; + cvta.to.global.u64 %rd91, %rd90; + mul.wide.u32 %rd92, %r1, 4; + add.s64 %rd20, %rd91, %rd92; + ld.global.u32 %r5, [%rd20]; setp.eq.s32 %p12, %r5, 0; @%p12 bra BB4_65; @@ -9966,10 +9967,10 @@ BB4_17: BB4_21: .pragma "nounroll"; // inline asm - call (%rd92), _optix_get_transform_list_handle, (%r334); + call (%rd93), _optix_get_transform_list_handle, (%r334); // inline asm // inline asm - call (%r34), _optix_get_transform_type_from_handle, (%rd92); + call (%r34), _optix_get_transform_type_from_handle, (%rd93); // inline asm and.b32 %r35, %r34, -2; setp.eq.s32 %p16, %r35, 2; @@ -9983,63 +9984,63 @@ BB4_27: BB4_31: // inline asm - call (%rd166), _optix_get_matrix_motion_transform_from_handle, (%rd92); + call (%rd167), _optix_get_matrix_motion_transform_from_handle, (%rd93); // inline asm // inline asm - cvta.to.global.u64 %rd168, %rd166; + cvta.to.global.u64 %rd169, %rd167; // inline asm // inline asm - ld.global.v4.u32 {%r123,%r124,%r125,%r126}, [%rd168]; + ld.global.v4.u32 {%r123,%r124,%r125,%r126}, [%rd169]; // inline asm mov.b32 {%rs4, %rs5}, %r125; - add.s64 %rd172, %rd166, 16; + add.s64 %rd173, %rd167, 16; // inline asm - cvta.to.global.u64 %rd171, %rd172; + cvta.to.global.u64 %rd172, %rd173; // inline asm // inline asm - ld.global.v4.u32 {%r127,%r128,%r129,%r130}, [%rd171]; + ld.global.v4.u32 {%r127,%r128,%r129,%r130}, [%rd172]; // inline asm - add.s64 %rd175, %rd166, 32; + add.s64 %rd176, %rd167, 32; // inline asm - cvta.to.global.u64 %rd174, %rd175; + cvta.to.global.u64 %rd175, %rd176; // inline asm // inline asm - ld.global.v4.u32 {%r131,%r132,%r133,%r134}, [%rd174]; + ld.global.v4.u32 {%r131,%r132,%r133,%r134}, [%rd175]; // inline asm - add.s64 %rd178, %rd166, 48; + add.s64 %rd179, %rd167, 48; // inline asm - cvta.to.global.u64 %rd177, %rd178; + cvta.to.global.u64 %rd178, %rd179; // inline asm // inline asm - ld.global.v4.u32 {%r135,%r136,%r137,%r138}, [%rd177]; + ld.global.v4.u32 {%r135,%r136,%r137,%r138}, [%rd178]; // inline asm - add.s64 %rd181, %rd166, 64; + add.s64 %rd182, %rd167, 64; // inline asm - cvta.to.global.u64 %rd180, %rd181; + cvta.to.global.u64 %rd181, %rd182; // inline asm // inline asm - ld.global.v4.u32 {%r139,%r140,%r141,%r142}, [%rd180]; + ld.global.v4.u32 {%r139,%r140,%r141,%r142}, [%rd181]; // inline asm - add.s64 %rd184, %rd166, 80; + add.s64 %rd185, %rd167, 80; // inline asm - cvta.to.global.u64 %rd183, %rd184; + cvta.to.global.u64 %rd184, %rd185; // inline asm // inline asm - ld.global.v4.u32 {%r143,%r144,%r145,%r146}, [%rd183]; + ld.global.v4.u32 {%r143,%r144,%r145,%r146}, [%rd184]; // inline asm - add.s64 %rd187, %rd166, 96; + add.s64 %rd188, %rd167, 96; // inline asm - cvta.to.global.u64 %rd186, %rd187; + cvta.to.global.u64 %rd187, %rd188; // inline asm // inline asm - ld.global.v4.u32 {%r147,%r148,%r149,%r150}, [%rd186]; + ld.global.v4.u32 {%r147,%r148,%r149,%r150}, [%rd187]; // inline asm - add.s64 %rd190, %rd166, 112; + add.s64 %rd191, %rd167, 112; // inline asm - cvta.to.global.u64 %rd189, %rd190; + cvta.to.global.u64 %rd190, %rd191; // inline asm // inline asm - ld.global.v4.u32 {%r151,%r152,%r153,%r154}, [%rd189]; + ld.global.v4.u32 {%r151,%r152,%r153,%r154}, [%rd190]; // inline asm mov.b32 %f703, %r126; mov.b32 %f704, %r127; @@ -10055,35 +10056,35 @@ BB4_31: max.f32 %f712, %f711, %f710; cvt.rmi.f32.f32 %f713, %f712; cvt.rzi.s32.f32 %r169, %f713; - mul.wide.s32 %rd201, %r169, 48; - add.s64 %rd193, %rd175, %rd201; + mul.wide.s32 %rd202, %r169, 48; + add.s64 %rd194, %rd176, %rd202; // inline asm - cvta.to.global.u64 %rd192, %rd193; + cvta.to.global.u64 %rd193, %rd194; // inline asm // inline asm - ld.global.v4.u32 {%r155,%r156,%r157,%r158}, [%rd192]; + ld.global.v4.u32 {%r155,%r156,%r157,%r158}, [%rd193]; // inline asm mov.b32 %f1137, %r155; mov.b32 %f1138, %r156; mov.b32 %f1139, %r157; mov.b32 %f1140, %r158; - add.s64 %rd196, %rd193, 16; + add.s64 %rd197, %rd194, 16; // inline asm - cvta.to.global.u64 %rd195, %rd196; + cvta.to.global.u64 %rd196, %rd197; // inline asm // inline asm - ld.global.v4.u32 {%r159,%r160,%r161,%r162}, [%rd195]; + ld.global.v4.u32 {%r159,%r160,%r161,%r162}, [%rd196]; // inline asm mov.b32 %f1133, %r159; mov.b32 %f1134, %r160; mov.b32 %f1135, %r161; mov.b32 %f1136, %r162; - add.s64 %rd199, %rd193, 32; + add.s64 %rd200, %rd194, 32; // inline asm - cvta.to.global.u64 %rd198, %rd199; + cvta.to.global.u64 %rd199, %rd200; // inline asm // inline asm - ld.global.v4.u32 {%r163,%r164,%r165,%r166}, [%rd198]; + ld.global.v4.u32 {%r163,%r164,%r165,%r166}, [%rd199]; // inline asm sub.f32 %f171, %f712, %f713; mov.b32 %f1129, %r163; @@ -10095,37 +10096,37 @@ BB4_31: cvt.rmi.f32.f32 %f1077, %f712; cvt.rzi.s32.f32 %r333, %f1077; - cvt.s64.s32 %rd442, %r333; - mul.lo.s64 %rd211, %rd442, 48; - add.s64 %rd212, %rd166, %rd211; - add.s64 %rd203, %rd212, 80; + cvt.s64.s32 %rd443, %r333; + mul.lo.s64 %rd212, %rd443, 48; + add.s64 %rd213, %rd167, %rd212; + add.s64 %rd204, %rd213, 80; // inline asm - cvta.to.global.u64 %rd202, %rd203; + cvta.to.global.u64 %rd203, %rd204; // inline asm // inline asm - ld.global.v4.u32 {%r170,%r171,%r172,%r173}, [%rd202]; + ld.global.v4.u32 {%r170,%r171,%r172,%r173}, [%rd203]; // inline asm mov.b32 %f714, %r170; mov.b32 %f715, %r171; mov.b32 %f716, %r172; mov.b32 %f717, %r173; - add.s64 %rd206, %rd212, 96; + add.s64 %rd207, %rd213, 96; // inline asm - cvta.to.global.u64 %rd205, %rd206; + cvta.to.global.u64 %rd206, %rd207; // inline asm // inline asm - ld.global.v4.u32 {%r174,%r175,%r176,%r177}, [%rd205]; + ld.global.v4.u32 {%r174,%r175,%r176,%r177}, [%rd206]; // inline asm mov.b32 %f718, %r174; mov.b32 %f719, %r175; mov.b32 %f720, %r176; mov.b32 %f721, %r177; - add.s64 %rd209, %rd212, 112; + add.s64 %rd210, %rd213, 112; // inline asm - cvta.to.global.u64 %rd208, %rd209; + cvta.to.global.u64 %rd209, %rd210; // inline asm // inline asm - ld.global.v4.u32 {%r178,%r179,%r180,%r181}, [%rd208]; + ld.global.v4.u32 {%r178,%r179,%r180,%r181}, [%rd209]; // inline asm mov.f32 %f722, 0f3F800000; sub.f32 %f723, %f722, %f171; @@ -10168,83 +10169,83 @@ BB4_22: BB4_25: // inline asm - call (%rd448), _optix_get_instance_transform_from_handle, (%rd92); + call (%rd449), _optix_get_instance_transform_from_handle, (%rd93); // inline asm bra.uni BB4_26; BB4_28: // inline asm - call (%rd107), _optix_get_srt_motion_transform_from_handle, (%rd92); + call (%rd108), _optix_get_srt_motion_transform_from_handle, (%rd93); // inline asm // inline asm - cvta.to.global.u64 %rd109, %rd107; + cvta.to.global.u64 %rd110, %rd108; // inline asm // inline asm - ld.global.v4.u32 {%r48,%r49,%r50,%r51}, [%rd109]; + ld.global.v4.u32 {%r48,%r49,%r50,%r51}, [%rd110]; // inline asm mov.b32 {%rs2, %rs3}, %r50; - add.s64 %rd113, %rd107, 16; + add.s64 %rd114, %rd108, 16; // inline asm - cvta.to.global.u64 %rd112, %rd113; + cvta.to.global.u64 %rd113, %rd114; // inline asm // inline asm - ld.global.v4.u32 {%r52,%r53,%r54,%r55}, [%rd112]; + ld.global.v4.u32 {%r52,%r53,%r54,%r55}, [%rd113]; // inline asm - add.s64 %rd116, %rd107, 32; + add.s64 %rd117, %rd108, 32; // inline asm - cvta.to.global.u64 %rd115, %rd116; + cvta.to.global.u64 %rd116, %rd117; // inline asm // inline asm - ld.global.v4.u32 {%r56,%r57,%r58,%r59}, [%rd115]; + ld.global.v4.u32 {%r56,%r57,%r58,%r59}, [%rd116]; // inline asm - add.s64 %rd119, %rd107, 48; + add.s64 %rd120, %rd108, 48; // inline asm - cvta.to.global.u64 %rd118, %rd119; + cvta.to.global.u64 %rd119, %rd120; // inline asm // inline asm - ld.global.v4.u32 {%r60,%r61,%r62,%r63}, [%rd118]; + ld.global.v4.u32 {%r60,%r61,%r62,%r63}, [%rd119]; // inline asm - add.s64 %rd122, %rd107, 64; + add.s64 %rd123, %rd108, 64; // inline asm - cvta.to.global.u64 %rd121, %rd122; + cvta.to.global.u64 %rd122, %rd123; // inline asm // inline asm - ld.global.v4.u32 {%r64,%r65,%r66,%r67}, [%rd121]; + ld.global.v4.u32 {%r64,%r65,%r66,%r67}, [%rd122]; // inline asm - add.s64 %rd125, %rd107, 80; + add.s64 %rd126, %rd108, 80; // inline asm - cvta.to.global.u64 %rd124, %rd125; + cvta.to.global.u64 %rd125, %rd126; // inline asm // inline asm - ld.global.v4.u32 {%r68,%r69,%r70,%r71}, [%rd124]; + ld.global.v4.u32 {%r68,%r69,%r70,%r71}, [%rd125]; // inline asm - add.s64 %rd128, %rd107, 96; + add.s64 %rd129, %rd108, 96; // inline asm - cvta.to.global.u64 %rd127, %rd128; + cvta.to.global.u64 %rd128, %rd129; // inline asm // inline asm - ld.global.v4.u32 {%r72,%r73,%r74,%r75}, [%rd127]; + ld.global.v4.u32 {%r72,%r73,%r74,%r75}, [%rd128]; // inline asm - add.s64 %rd131, %rd107, 112; + add.s64 %rd132, %rd108, 112; // inline asm - cvta.to.global.u64 %rd130, %rd131; + cvta.to.global.u64 %rd131, %rd132; // inline asm // inline asm - ld.global.v4.u32 {%r76,%r77,%r78,%r79}, [%rd130]; + ld.global.v4.u32 {%r76,%r77,%r78,%r79}, [%rd131]; // inline asm - add.s64 %rd134, %rd107, 128; + add.s64 %rd135, %rd108, 128; // inline asm - cvta.to.global.u64 %rd133, %rd134; + cvta.to.global.u64 %rd134, %rd135; // inline asm // inline asm - ld.global.v4.u32 {%r80,%r81,%r82,%r83}, [%rd133]; + ld.global.v4.u32 {%r80,%r81,%r82,%r83}, [%rd134]; // inline asm - add.s64 %rd137, %rd107, 144; + add.s64 %rd138, %rd108, 144; // inline asm - cvta.to.global.u64 %rd136, %rd137; + cvta.to.global.u64 %rd137, %rd138; // inline asm // inline asm - ld.global.v4.u32 {%r84,%r85,%r86,%r87}, [%rd136]; + ld.global.v4.u32 {%r84,%r85,%r86,%r87}, [%rd137]; // inline asm mov.b32 %f590, %r51; mov.b32 %f591, %r52; @@ -10260,47 +10261,47 @@ BB4_28: max.f32 %f599, %f598, %f597; cvt.rmi.f32.f32 %f600, %f599; cvt.rzi.s32.f32 %r106, %f600; - mul.wide.s32 %rd151, %r106, 64; - add.s64 %rd140, %rd116, %rd151; + mul.wide.s32 %rd152, %r106, 64; + add.s64 %rd141, %rd117, %rd152; // inline asm - cvta.to.global.u64 %rd139, %rd140; + cvta.to.global.u64 %rd140, %rd141; // inline asm // inline asm - ld.global.v4.u32 {%r88,%r89,%r90,%r91}, [%rd139]; + ld.global.v4.u32 {%r88,%r89,%r90,%r91}, [%rd140]; // inline asm mov.b32 %f1113, %r88; mov.b32 %f1114, %r89; mov.b32 %f1115, %r90; mov.b32 %f1116, %r91; - add.s64 %rd143, %rd140, 16; + add.s64 %rd144, %rd141, 16; // inline asm - cvta.to.global.u64 %rd142, %rd143; + cvta.to.global.u64 %rd143, %rd144; // inline asm // inline asm - ld.global.v4.u32 {%r92,%r93,%r94,%r95}, [%rd142]; + ld.global.v4.u32 {%r92,%r93,%r94,%r95}, [%rd143]; // inline asm mov.b32 %f1117, %r92; mov.b32 %f1118, %r93; mov.b32 %f1119, %r94; mov.b32 %f1120, %r95; - add.s64 %rd146, %rd140, 32; + add.s64 %rd147, %rd141, 32; // inline asm - cvta.to.global.u64 %rd145, %rd146; + cvta.to.global.u64 %rd146, %rd147; // inline asm // inline asm - ld.global.v4.u32 {%r96,%r97,%r98,%r99}, [%rd145]; + ld.global.v4.u32 {%r96,%r97,%r98,%r99}, [%rd146]; // inline asm sub.f32 %f110, %f599, %f600; mov.b32 %f1121, %r96; mov.b32 %f1122, %r97; mov.b32 %f1123, %r98; mov.b32 %f1124, %r99; - add.s64 %rd149, %rd140, 48; + add.s64 %rd150, %rd141, 48; // inline asm - cvta.to.global.u64 %rd148, %rd149; + cvta.to.global.u64 %rd149, %rd150; // inline asm // inline asm - ld.global.v4.u32 {%r100,%r101,%r102,%r103}, [%rd148]; + ld.global.v4.u32 {%r100,%r101,%r102,%r103}, [%rd149]; // inline asm mov.b32 %f1125, %r100; mov.b32 %f1126, %r101; @@ -10311,48 +10312,48 @@ BB4_28: cvt.rmi.f32.f32 %f1076, %f599; cvt.rzi.s32.f32 %r332, %f1076; - cvt.s64.s32 %rd440, %r332; - shl.b64 %rd164, %rd440, 6; - add.s64 %rd165, %rd164, %rd107; - add.s64 %rd153, %rd165, 96; + cvt.s64.s32 %rd441, %r332; + shl.b64 %rd165, %rd441, 6; + add.s64 %rd166, %rd165, %rd108; + add.s64 %rd154, %rd166, 96; // inline asm - cvta.to.global.u64 %rd152, %rd153; + cvta.to.global.u64 %rd153, %rd154; // inline asm // inline asm - ld.global.v4.u32 {%r107,%r108,%r109,%r110}, [%rd152]; + ld.global.v4.u32 {%r107,%r108,%r109,%r110}, [%rd153]; // inline asm mov.b32 %f601, %r107; mov.b32 %f602, %r108; mov.b32 %f603, %r109; mov.b32 %f604, %r110; - add.s64 %rd156, %rd165, 112; + add.s64 %rd157, %rd166, 112; // inline asm - cvta.to.global.u64 %rd155, %rd156; + cvta.to.global.u64 %rd156, %rd157; // inline asm // inline asm - ld.global.v4.u32 {%r111,%r112,%r113,%r114}, [%rd155]; + ld.global.v4.u32 {%r111,%r112,%r113,%r114}, [%rd156]; // inline asm mov.b32 %f605, %r111; mov.b32 %f606, %r112; mov.b32 %f607, %r113; mov.b32 %f608, %r114; - add.s64 %rd159, %rd165, 128; + add.s64 %rd160, %rd166, 128; // inline asm - cvta.to.global.u64 %rd158, %rd159; + cvta.to.global.u64 %rd159, %rd160; // inline asm // inline asm - ld.global.v4.u32 {%r115,%r116,%r117,%r118}, [%rd158]; + ld.global.v4.u32 {%r115,%r116,%r117,%r118}, [%rd159]; // inline asm mov.b32 %f609, %r115; mov.b32 %f610, %r116; mov.b32 %f611, %r117; mov.b32 %f612, %r118; - add.s64 %rd162, %rd165, 144; + add.s64 %rd163, %rd166, 144; // inline asm - cvta.to.global.u64 %rd161, %rd162; + cvta.to.global.u64 %rd162, %rd163; // inline asm // inline asm - ld.global.v4.u32 {%r119,%r120,%r121,%r122}, [%rd161]; + ld.global.v4.u32 {%r119,%r120,%r121,%r122}, [%rd162]; // inline asm mov.f32 %f613, 0f3F800000; sub.f32 %f614, %f613, %f110; @@ -10491,38 +10492,38 @@ BB4_23: @%p18 bra BB4_33; // inline asm - call (%rd94), _optix_get_static_transform_from_handle, (%rd92); + call (%rd95), _optix_get_static_transform_from_handle, (%rd93); // inline asm - add.s64 %rd448, %rd94, 16; + add.s64 %rd449, %rd95, 16; BB4_26: // inline asm - cvta.to.global.u64 %rd98, %rd448; + cvta.to.global.u64 %rd99, %rd449; // inline asm // inline asm - ld.global.v4.u32 {%r36,%r37,%r38,%r39}, [%rd98]; + ld.global.v4.u32 {%r36,%r37,%r38,%r39}, [%rd99]; // inline asm mov.b32 %f1137, %r36; mov.b32 %f1138, %r37; mov.b32 %f1139, %r38; mov.b32 %f1140, %r39; - add.s64 %rd102, %rd448, 16; + add.s64 %rd103, %rd449, 16; // inline asm - cvta.to.global.u64 %rd101, %rd102; + cvta.to.global.u64 %rd102, %rd103; // inline asm // inline asm - ld.global.v4.u32 {%r40,%r41,%r42,%r43}, [%rd101]; + ld.global.v4.u32 {%r40,%r41,%r42,%r43}, [%rd102]; // inline asm mov.b32 %f1133, %r40; mov.b32 %f1134, %r41; mov.b32 %f1135, %r42; mov.b32 %f1136, %r43; - add.s64 %rd105, %rd448, 32; + add.s64 %rd106, %rd449, 32; // inline asm - cvta.to.global.u64 %rd104, %rd105; + cvta.to.global.u64 %rd105, %rd106; // inline asm // inline asm - ld.global.v4.u32 {%r44,%r45,%r46,%r47}, [%rd104]; + ld.global.v4.u32 {%r44,%r45,%r46,%r47}, [%rd105]; // inline asm mov.b32 %f1129, %r44; mov.b32 %f1130, %r45; @@ -10625,10 +10626,10 @@ BB4_37: BB4_39: .pragma "nounroll"; // inline asm - call (%rd213), _optix_get_transform_list_handle, (%r335); + call (%rd214), _optix_get_transform_list_handle, (%r335); // inline asm // inline asm - call (%r184), _optix_get_transform_type_from_handle, (%rd213); + call (%r184), _optix_get_transform_type_from_handle, (%rd214); // inline asm and.b32 %r185, %r184, -2; setp.eq.s32 %p25, %r185, 2; @@ -10642,63 +10643,63 @@ BB4_45: BB4_49: // inline asm - call (%rd287), _optix_get_matrix_motion_transform_from_handle, (%rd213); + call (%rd288), _optix_get_matrix_motion_transform_from_handle, (%rd214); // inline asm // inline asm - cvta.to.global.u64 %rd289, %rd287; + cvta.to.global.u64 %rd290, %rd288; // inline asm // inline asm - ld.global.v4.u32 {%r273,%r274,%r275,%r276}, [%rd289]; + ld.global.v4.u32 {%r273,%r274,%r275,%r276}, [%rd290]; // inline asm mov.b32 {%rs8, %rs9}, %r275; - add.s64 %rd293, %rd287, 16; + add.s64 %rd294, %rd288, 16; // inline asm - cvta.to.global.u64 %rd292, %rd293; + cvta.to.global.u64 %rd293, %rd294; // inline asm // inline asm - ld.global.v4.u32 {%r277,%r278,%r279,%r280}, [%rd292]; + ld.global.v4.u32 {%r277,%r278,%r279,%r280}, [%rd293]; // inline asm - add.s64 %rd296, %rd287, 32; + add.s64 %rd297, %rd288, 32; // inline asm - cvta.to.global.u64 %rd295, %rd296; + cvta.to.global.u64 %rd296, %rd297; // inline asm // inline asm - ld.global.v4.u32 {%r281,%r282,%r283,%r284}, [%rd295]; + ld.global.v4.u32 {%r281,%r282,%r283,%r284}, [%rd296]; // inline asm - add.s64 %rd299, %rd287, 48; + add.s64 %rd300, %rd288, 48; // inline asm - cvta.to.global.u64 %rd298, %rd299; + cvta.to.global.u64 %rd299, %rd300; // inline asm // inline asm - ld.global.v4.u32 {%r285,%r286,%r287,%r288}, [%rd298]; + ld.global.v4.u32 {%r285,%r286,%r287,%r288}, [%rd299]; // inline asm - add.s64 %rd302, %rd287, 64; + add.s64 %rd303, %rd288, 64; // inline asm - cvta.to.global.u64 %rd301, %rd302; + cvta.to.global.u64 %rd302, %rd303; // inline asm // inline asm - ld.global.v4.u32 {%r289,%r290,%r291,%r292}, [%rd301]; + ld.global.v4.u32 {%r289,%r290,%r291,%r292}, [%rd302]; // inline asm - add.s64 %rd305, %rd287, 80; + add.s64 %rd306, %rd288, 80; // inline asm - cvta.to.global.u64 %rd304, %rd305; + cvta.to.global.u64 %rd305, %rd306; // inline asm // inline asm - ld.global.v4.u32 {%r293,%r294,%r295,%r296}, [%rd304]; + ld.global.v4.u32 {%r293,%r294,%r295,%r296}, [%rd305]; // inline asm - add.s64 %rd308, %rd287, 96; + add.s64 %rd309, %rd288, 96; // inline asm - cvta.to.global.u64 %rd307, %rd308; + cvta.to.global.u64 %rd308, %rd309; // inline asm // inline asm - ld.global.v4.u32 {%r297,%r298,%r299,%r300}, [%rd307]; + ld.global.v4.u32 {%r297,%r298,%r299,%r300}, [%rd308]; // inline asm - add.s64 %rd311, %rd287, 112; + add.s64 %rd312, %rd288, 112; // inline asm - cvta.to.global.u64 %rd310, %rd311; + cvta.to.global.u64 %rd311, %rd312; // inline asm // inline asm - ld.global.v4.u32 {%r301,%r302,%r303,%r304}, [%rd310]; + ld.global.v4.u32 {%r301,%r302,%r303,%r304}, [%rd311]; // inline asm mov.b32 %f879, %r276; mov.b32 %f880, %r277; @@ -10714,34 +10715,34 @@ BB4_49: max.f32 %f888, %f887, %f886; cvt.rmi.f32.f32 %f889, %f888; cvt.rzi.s32.f32 %r319, %f889; - cvt.s64.s32 %rd35, %r319; - mul.wide.s32 %rd322, %r319, 48; - add.s64 %rd314, %rd296, %rd322; + cvt.s64.s32 %rd36, %r319; + mul.wide.s32 %rd323, %r319, 48; + add.s64 %rd315, %rd297, %rd323; // inline asm - cvta.to.global.u64 %rd313, %rd314; + cvta.to.global.u64 %rd314, %rd315; // inline asm // inline asm - ld.global.v4.u32 {%r305,%r306,%r307,%r308}, [%rd313]; + ld.global.v4.u32 {%r305,%r306,%r307,%r308}, [%rd314]; // inline asm mov.b32 %f1190, %r305; mov.b32 %f1191, %r306; mov.b32 %f1192, %r307; - add.s64 %rd317, %rd314, 16; + add.s64 %rd318, %rd315, 16; // inline asm - cvta.to.global.u64 %rd316, %rd317; + cvta.to.global.u64 %rd317, %rd318; // inline asm // inline asm - ld.global.v4.u32 {%r309,%r310,%r311,%r312}, [%rd316]; + ld.global.v4.u32 {%r309,%r310,%r311,%r312}, [%rd317]; // inline asm mov.b32 %f1187, %r309; mov.b32 %f1188, %r310; mov.b32 %f1189, %r311; - add.s64 %rd320, %rd314, 32; + add.s64 %rd321, %rd315, 32; // inline asm - cvta.to.global.u64 %rd319, %rd320; + cvta.to.global.u64 %rd320, %rd321; // inline asm // inline asm - ld.global.v4.u32 {%r313,%r314,%r315,%r316}, [%rd319]; + ld.global.v4.u32 {%r313,%r314,%r315,%r316}, [%rd320]; // inline asm sub.f32 %f301, %f888, %f889; mov.b32 %f1184, %r313; @@ -10750,34 +10751,34 @@ BB4_49: setp.leu.f32 %p30, %f301, 0f00000000; @%p30 bra BB4_51; - mul.lo.s64 %rd332, %rd35, 48; - add.s64 %rd333, %rd287, %rd332; - add.s64 %rd324, %rd333, 80; + mul.lo.s64 %rd333, %rd36, 48; + add.s64 %rd334, %rd288, %rd333; + add.s64 %rd325, %rd334, 80; // inline asm - cvta.to.global.u64 %rd323, %rd324; + cvta.to.global.u64 %rd324, %rd325; // inline asm // inline asm - ld.global.v4.u32 {%r320,%r321,%r322,%r323}, [%rd323]; + ld.global.v4.u32 {%r320,%r321,%r322,%r323}, [%rd324]; // inline asm mov.b32 %f890, %r320; mov.b32 %f891, %r321; mov.b32 %f892, %r322; - add.s64 %rd327, %rd333, 96; + add.s64 %rd328, %rd334, 96; // inline asm - cvta.to.global.u64 %rd326, %rd327; + cvta.to.global.u64 %rd327, %rd328; // inline asm // inline asm - ld.global.v4.u32 {%r324,%r325,%r326,%r327}, [%rd326]; + ld.global.v4.u32 {%r324,%r325,%r326,%r327}, [%rd327]; // inline asm mov.b32 %f893, %r324; mov.b32 %f894, %r325; mov.b32 %f895, %r326; - add.s64 %rd330, %rd333, 112; + add.s64 %rd331, %rd334, 112; // inline asm - cvta.to.global.u64 %rd329, %rd330; + cvta.to.global.u64 %rd330, %rd331; // inline asm // inline asm - ld.global.v4.u32 {%r328,%r329,%r330,%r331}, [%rd329]; + ld.global.v4.u32 {%r328,%r329,%r330,%r331}, [%rd330]; // inline asm mov.f32 %f896, 0f3F800000; sub.f32 %f897, %f896, %f301; @@ -10813,83 +10814,83 @@ BB4_40: BB4_43: // inline asm - call (%rd449), _optix_get_instance_inverse_transform_from_handle, (%rd213); + call (%rd450), _optix_get_instance_inverse_transform_from_handle, (%rd214); // inline asm bra.uni BB4_44; BB4_46: // inline asm - call (%rd228), _optix_get_srt_motion_transform_from_handle, (%rd213); + call (%rd229), _optix_get_srt_motion_transform_from_handle, (%rd214); // inline asm // inline asm - cvta.to.global.u64 %rd230, %rd228; + cvta.to.global.u64 %rd231, %rd229; // inline asm // inline asm - ld.global.v4.u32 {%r198,%r199,%r200,%r201}, [%rd230]; + ld.global.v4.u32 {%r198,%r199,%r200,%r201}, [%rd231]; // inline asm mov.b32 {%rs6, %rs7}, %r200; - add.s64 %rd234, %rd228, 16; + add.s64 %rd235, %rd229, 16; // inline asm - cvta.to.global.u64 %rd233, %rd234; + cvta.to.global.u64 %rd234, %rd235; // inline asm // inline asm - ld.global.v4.u32 {%r202,%r203,%r204,%r205}, [%rd233]; + ld.global.v4.u32 {%r202,%r203,%r204,%r205}, [%rd234]; // inline asm - add.s64 %rd237, %rd228, 32; + add.s64 %rd238, %rd229, 32; // inline asm - cvta.to.global.u64 %rd236, %rd237; + cvta.to.global.u64 %rd237, %rd238; // inline asm // inline asm - ld.global.v4.u32 {%r206,%r207,%r208,%r209}, [%rd236]; + ld.global.v4.u32 {%r206,%r207,%r208,%r209}, [%rd237]; // inline asm - add.s64 %rd240, %rd228, 48; + add.s64 %rd241, %rd229, 48; // inline asm - cvta.to.global.u64 %rd239, %rd240; + cvta.to.global.u64 %rd240, %rd241; // inline asm // inline asm - ld.global.v4.u32 {%r210,%r211,%r212,%r213}, [%rd239]; + ld.global.v4.u32 {%r210,%r211,%r212,%r213}, [%rd240]; // inline asm - add.s64 %rd243, %rd228, 64; + add.s64 %rd244, %rd229, 64; // inline asm - cvta.to.global.u64 %rd242, %rd243; + cvta.to.global.u64 %rd243, %rd244; // inline asm // inline asm - ld.global.v4.u32 {%r214,%r215,%r216,%r217}, [%rd242]; + ld.global.v4.u32 {%r214,%r215,%r216,%r217}, [%rd243]; // inline asm - add.s64 %rd246, %rd228, 80; + add.s64 %rd247, %rd229, 80; // inline asm - cvta.to.global.u64 %rd245, %rd246; + cvta.to.global.u64 %rd246, %rd247; // inline asm // inline asm - ld.global.v4.u32 {%r218,%r219,%r220,%r221}, [%rd245]; + ld.global.v4.u32 {%r218,%r219,%r220,%r221}, [%rd246]; // inline asm - add.s64 %rd249, %rd228, 96; + add.s64 %rd250, %rd229, 96; // inline asm - cvta.to.global.u64 %rd248, %rd249; + cvta.to.global.u64 %rd249, %rd250; // inline asm // inline asm - ld.global.v4.u32 {%r222,%r223,%r224,%r225}, [%rd248]; + ld.global.v4.u32 {%r222,%r223,%r224,%r225}, [%rd249]; // inline asm - add.s64 %rd252, %rd228, 112; + add.s64 %rd253, %rd229, 112; // inline asm - cvta.to.global.u64 %rd251, %rd252; + cvta.to.global.u64 %rd252, %rd253; // inline asm // inline asm - ld.global.v4.u32 {%r226,%r227,%r228,%r229}, [%rd251]; + ld.global.v4.u32 {%r226,%r227,%r228,%r229}, [%rd252]; // inline asm - add.s64 %rd255, %rd228, 128; + add.s64 %rd256, %rd229, 128; // inline asm - cvta.to.global.u64 %rd254, %rd255; + cvta.to.global.u64 %rd255, %rd256; // inline asm // inline asm - ld.global.v4.u32 {%r230,%r231,%r232,%r233}, [%rd254]; + ld.global.v4.u32 {%r230,%r231,%r232,%r233}, [%rd255]; // inline asm - add.s64 %rd258, %rd228, 144; + add.s64 %rd259, %rd229, 144; // inline asm - cvta.to.global.u64 %rd257, %rd258; + cvta.to.global.u64 %rd258, %rd259; // inline asm // inline asm - ld.global.v4.u32 {%r234,%r235,%r236,%r237}, [%rd257]; + ld.global.v4.u32 {%r234,%r235,%r236,%r237}, [%rd258]; // inline asm mov.b32 %f787, %r201; mov.b32 %f788, %r202; @@ -10905,88 +10906,88 @@ BB4_46: max.f32 %f796, %f795, %f794; cvt.rmi.f32.f32 %f797, %f796; cvt.rzi.s32.f32 %r256, %f797; - cvt.s64.s32 %rd33, %r256; - mul.wide.s32 %rd272, %r256, 64; - add.s64 %rd261, %rd237, %rd272; + cvt.s64.s32 %rd34, %r256; + mul.wide.s32 %rd273, %r256, 64; + add.s64 %rd262, %rd238, %rd273; // inline asm - cvta.to.global.u64 %rd260, %rd261; + cvta.to.global.u64 %rd261, %rd262; // inline asm // inline asm - ld.global.v4.u32 {%r238,%r239,%r240,%r241}, [%rd260]; + ld.global.v4.u32 {%r238,%r239,%r240,%r241}, [%rd261]; // inline asm mov.b32 %f1174, %r238; mov.b32 %f1175, %r239; mov.b32 %f1176, %r240; - add.s64 %rd264, %rd261, 16; + add.s64 %rd265, %rd262, 16; // inline asm - cvta.to.global.u64 %rd263, %rd264; + cvta.to.global.u64 %rd264, %rd265; // inline asm // inline asm - ld.global.v4.u32 {%r242,%r243,%r244,%r245}, [%rd263]; + ld.global.v4.u32 {%r242,%r243,%r244,%r245}, [%rd264]; // inline asm mov.b32 %f1177, %r242; mov.b32 %f1178, %r243; mov.b32 %f1179, %r245; - add.s64 %rd267, %rd261, 32; + add.s64 %rd268, %rd262, 32; // inline asm - cvta.to.global.u64 %rd266, %rd267; + cvta.to.global.u64 %rd267, %rd268; // inline asm // inline asm - ld.global.v4.u32 {%r246,%r247,%r248,%r249}, [%rd266]; + ld.global.v4.u32 {%r246,%r247,%r248,%r249}, [%rd267]; // inline asm sub.f32 %f261, %f796, %f797; mov.b32 %f1180, %r247; mov.b32 %f1181, %r248; mov.b32 %f1182, %r249; - add.s64 %rd270, %rd261, 48; + add.s64 %rd271, %rd262, 48; // inline asm - cvta.to.global.u64 %rd269, %rd270; + cvta.to.global.u64 %rd270, %rd271; // inline asm // inline asm - ld.global.v4.u32 {%r250,%r251,%r252,%r253}, [%rd269]; + ld.global.v4.u32 {%r250,%r251,%r252,%r253}, [%rd270]; // inline asm mov.b32 %f1183, %r250; setp.leu.f32 %p29, %f261, 0f00000000; @%p29 bra BB4_48; - shl.b64 %rd285, %rd33, 6; - add.s64 %rd286, %rd285, %rd228; - add.s64 %rd274, %rd286, 96; + shl.b64 %rd286, %rd34, 6; + add.s64 %rd287, %rd286, %rd229; + add.s64 %rd275, %rd287, 96; // inline asm - cvta.to.global.u64 %rd273, %rd274; + cvta.to.global.u64 %rd274, %rd275; // inline asm // inline asm - ld.global.v4.u32 {%r257,%r258,%r259,%r260}, [%rd273]; + ld.global.v4.u32 {%r257,%r258,%r259,%r260}, [%rd274]; // inline asm mov.b32 %f798, %r257; mov.b32 %f799, %r258; mov.b32 %f800, %r259; - add.s64 %rd277, %rd286, 112; + add.s64 %rd278, %rd287, 112; // inline asm - cvta.to.global.u64 %rd276, %rd277; + cvta.to.global.u64 %rd277, %rd278; // inline asm // inline asm - ld.global.v4.u32 {%r261,%r262,%r263,%r264}, [%rd276]; + ld.global.v4.u32 {%r261,%r262,%r263,%r264}, [%rd277]; // inline asm mov.b32 %f801, %r261; mov.b32 %f802, %r262; mov.b32 %f803, %r264; - add.s64 %rd280, %rd286, 128; + add.s64 %rd281, %rd287, 128; // inline asm - cvta.to.global.u64 %rd279, %rd280; + cvta.to.global.u64 %rd280, %rd281; // inline asm // inline asm - ld.global.v4.u32 {%r265,%r266,%r267,%r268}, [%rd279]; + ld.global.v4.u32 {%r265,%r266,%r267,%r268}, [%rd280]; // inline asm mov.b32 %f804, %r266; mov.b32 %f805, %r267; mov.b32 %f806, %r268; - add.s64 %rd283, %rd286, 144; + add.s64 %rd284, %rd287, 144; // inline asm - cvta.to.global.u64 %rd282, %rd283; + cvta.to.global.u64 %rd283, %rd284; // inline asm // inline asm - ld.global.v4.u32 {%r269,%r270,%r271,%r272}, [%rd282]; + ld.global.v4.u32 {%r269,%r270,%r271,%r272}, [%rd283]; // inline asm mov.f32 %f807, 0f3F800000; sub.f32 %f808, %f807, %f261; @@ -11139,36 +11140,36 @@ BB4_41: @%p27 bra BB4_52; // inline asm - call (%rd215), _optix_get_static_transform_from_handle, (%rd213); + call (%rd216), _optix_get_static_transform_from_handle, (%rd214); // inline asm - add.s64 %rd449, %rd215, 64; + add.s64 %rd450, %rd216, 64; BB4_44: // inline asm - cvta.to.global.u64 %rd219, %rd449; + cvta.to.global.u64 %rd220, %rd450; // inline asm // inline asm - ld.global.v4.u32 {%r186,%r187,%r188,%r189}, [%rd219]; + ld.global.v4.u32 {%r186,%r187,%r188,%r189}, [%rd220]; // inline asm mov.b32 %f1199, %r186; mov.b32 %f1200, %r187; mov.b32 %f1201, %r188; - add.s64 %rd223, %rd449, 16; + add.s64 %rd224, %rd450, 16; // inline asm - cvta.to.global.u64 %rd222, %rd223; + cvta.to.global.u64 %rd223, %rd224; // inline asm // inline asm - ld.global.v4.u32 {%r190,%r191,%r192,%r193}, [%rd222]; + ld.global.v4.u32 {%r190,%r191,%r192,%r193}, [%rd223]; // inline asm mov.b32 %f1196, %r190; mov.b32 %f1197, %r191; mov.b32 %f1198, %r192; - add.s64 %rd226, %rd449, 32; + add.s64 %rd227, %rd450, 32; // inline asm - cvta.to.global.u64 %rd225, %rd226; + cvta.to.global.u64 %rd226, %rd227; // inline asm // inline asm - ld.global.v4.u32 {%r194,%r195,%r196,%r197}, [%rd225]; + ld.global.v4.u32 {%r194,%r195,%r196,%r197}, [%rd226]; // inline asm mov.b32 %f1193, %r194; mov.b32 %f1194, %r195; @@ -11241,8 +11242,8 @@ BB4_56: fma.rn.f32 %f1256, %f1258, %f1111, %f962; fma.rn.f32 %f1257, %f1258, %f1107, %f964; fma.rn.f32 %f1258, %f1258, %f1103, %f966; - ld.const.u64 %rd334, [params+112]; - setp.eq.s64 %p33, %rd334, 0; + ld.const.u64 %rd335, [params+112]; + setp.eq.s64 %p33, %rd335, 0; @%p33 bra BB4_58; mul.f32 %f967, %f1250, %f1167; @@ -11283,8 +11284,8 @@ BB4_58: div.rn.f32 %f1090, %f988, %f992; BB4_60: - ld.const.u64 %rd335, [params+184]; - setp.eq.s64 %p35, %rd335, 0; + ld.const.u64 %rd336, [params+184]; + setp.eq.s64 %p35, %rd336, 0; @%p35 bra BB4_62; mul.f32 %f993, %f1247, %f1109; @@ -11307,10 +11308,10 @@ BB4_60: fma.rn.f32 %f1246, %f1246, %f1103, %f1004; BB4_62: - ld.const.u64 %rd336, [params+280]; - ld.const.u64 %rd337, [params+232]; - or.b64 %rd338, %rd336, %rd337; - setp.eq.s64 %p36, %rd338, 0; + ld.const.u64 %rd337, [params+280]; + ld.const.u64 %rd338, [params+232]; + or.b64 %rd339, %rd337, %rd338; + setp.eq.s64 %p36, %rd339, 0; @%p36 bra BB4_64; mul.f32 %f1005, %f1092, %f1109; @@ -11390,168 +11391,168 @@ BB4_62: sub.f32 %f1086, %f1061, %f1073; BB4_64: - st.global.u32 [%rd19], %r31; + st.global.u32 [%rd20], %r31; BB4_65: - ld.const.u64 %rd441, [params+96]; - setp.eq.s64 %p44, %rd441, 0; - cvt.u64.u32 %rd437, %r1; - ld.const.u64 %rd339, [params+328]; - cvta.to.global.u64 %rd340, %rd339; - shl.b64 %rd341, %rd437, 3; - add.s64 %rd342, %rd340, %rd341; - st.global.u64 [%rd342], %rd17; - ld.const.u64 %rd343, [params+336]; - cvta.to.global.u64 %rd344, %rd343; - shl.b64 %rd345, %rd437, 2; - add.s64 %rd346, %rd344, %rd345; - st.global.u32 [%rd346], %r23; - ld.const.u64 %rd347, [params+160]; - cvta.to.global.u64 %rd348, %rd347; - add.s64 %rd349, %rd348, %rd345; - st.global.f32 [%rd349], %f1256; - ld.const.u64 %rd350, [params+168]; - cvta.to.global.u64 %rd351, %rd350; - add.s64 %rd352, %rd351, %rd345; - st.global.f32 [%rd352], %f1257; - ld.const.u64 %rd353, [params+176]; - cvta.to.global.u64 %rd354, %rd353; - add.s64 %rd355, %rd354, %rd345; - st.global.f32 [%rd355], %f1258; - ld.const.u64 %rd356, [params+72]; - cvta.to.global.u64 %rd357, %rd356; - add.s64 %rd358, %rd357, %rd345; - st.global.f32 [%rd358], %f428; + ld.const.u64 %rd442, [params+96]; + setp.eq.s64 %p44, %rd442, 0; + cvt.u64.u32 %rd438, %r1; + ld.const.u64 %rd340, [params+328]; + cvta.to.global.u64 %rd341, %rd340; + shl.b64 %rd342, %rd438, 3; + add.s64 %rd343, %rd341, %rd342; + st.global.u64 [%rd343], %rd18; + ld.const.u64 %rd344, [params+336]; + cvta.to.global.u64 %rd345, %rd344; + shl.b64 %rd346, %rd438, 2; + add.s64 %rd347, %rd345, %rd346; + st.global.u32 [%rd347], %r23; + ld.const.u64 %rd348, [params+160]; + cvta.to.global.u64 %rd349, %rd348; + add.s64 %rd350, %rd349, %rd346; + st.global.f32 [%rd350], %f1256; + ld.const.u64 %rd351, [params+168]; + cvta.to.global.u64 %rd352, %rd351; + add.s64 %rd353, %rd352, %rd346; + st.global.f32 [%rd353], %f1257; + ld.const.u64 %rd354, [params+176]; + cvta.to.global.u64 %rd355, %rd354; + add.s64 %rd356, %rd355, %rd346; + st.global.f32 [%rd356], %f1258; + ld.const.u64 %rd357, [params+72]; + cvta.to.global.u64 %rd358, %rd357; + add.s64 %rd359, %rd358, %rd346; + st.global.f32 [%rd359], %f428; @%p44 bra BB4_67; - ld.const.u64 %rd438, [params+96]; - cvta.to.global.u64 %rd359, %rd438; - add.s64 %rd361, %rd359, %rd345; - st.global.f32 [%rd361], %f1099; - ld.const.u64 %rd362, [params+104]; - cvta.to.global.u64 %rd363, %rd362; - add.s64 %rd364, %rd363, %rd345; - st.global.f32 [%rd364], %f1100; + ld.const.u64 %rd439, [params+96]; + cvta.to.global.u64 %rd360, %rd439; + add.s64 %rd362, %rd360, %rd346; + st.global.f32 [%rd362], %f1099; + ld.const.u64 %rd363, [params+104]; + cvta.to.global.u64 %rd364, %rd363; + add.s64 %rd365, %rd364, %rd346; + st.global.f32 [%rd365], %f1100; BB4_67: - ld.const.u64 %rd36, [params+112]; - setp.eq.s64 %p38, %rd36, 0; + ld.const.u64 %rd37, [params+112]; + setp.eq.s64 %p38, %rd37, 0; @%p38 bra BB4_69; - cvta.to.global.u64 %rd365, %rd36; - add.s64 %rd367, %rd365, %rd345; - st.global.f32 [%rd367], %f1250; - ld.const.u64 %rd368, [params+120]; - cvta.to.global.u64 %rd369, %rd368; - add.s64 %rd370, %rd369, %rd345; - st.global.f32 [%rd370], %f1251; - ld.const.u64 %rd371, [params+128]; - cvta.to.global.u64 %rd372, %rd371; - add.s64 %rd373, %rd372, %rd345; - st.global.f32 [%rd373], %f1252; + cvta.to.global.u64 %rd366, %rd37; + add.s64 %rd368, %rd366, %rd346; + st.global.f32 [%rd368], %f1250; + ld.const.u64 %rd369, [params+120]; + cvta.to.global.u64 %rd370, %rd369; + add.s64 %rd371, %rd370, %rd346; + st.global.f32 [%rd371], %f1251; + ld.const.u64 %rd372, [params+128]; + cvta.to.global.u64 %rd373, %rd372; + add.s64 %rd374, %rd373, %rd346; + st.global.f32 [%rd374], %f1252; BB4_69: @%p5 bra BB4_71; - ld.const.u64 %rd439, [params+136]; - cvta.to.global.u64 %rd374, %rd439; - add.s64 %rd376, %rd374, %rd345; - st.global.f32 [%rd376], %f1092; - ld.const.u64 %rd377, [params+144]; - cvta.to.global.u64 %rd378, %rd377; - add.s64 %rd379, %rd378, %rd345; - st.global.f32 [%rd379], %f1091; - ld.const.u64 %rd380, [params+152]; - cvta.to.global.u64 %rd381, %rd380; - add.s64 %rd382, %rd381, %rd345; - st.global.f32 [%rd382], %f1090; + ld.const.u64 %rd440, [params+136]; + cvta.to.global.u64 %rd375, %rd440; + add.s64 %rd377, %rd375, %rd346; + st.global.f32 [%rd377], %f1092; + ld.const.u64 %rd378, [params+144]; + cvta.to.global.u64 %rd379, %rd378; + add.s64 %rd380, %rd379, %rd346; + st.global.f32 [%rd380], %f1091; + ld.const.u64 %rd381, [params+152]; + cvta.to.global.u64 %rd382, %rd381; + add.s64 %rd383, %rd382, %rd346; + st.global.f32 [%rd383], %f1090; BB4_71: - ld.const.u64 %rd37, [params+184]; - setp.eq.s64 %p40, %rd37, 0; + ld.const.u64 %rd38, [params+184]; + setp.eq.s64 %p40, %rd38, 0; @%p40 bra BB4_73; - cvta.to.global.u64 %rd383, %rd37; - add.s64 %rd385, %rd383, %rd345; - st.global.f32 [%rd385], %f1247; - ld.const.u64 %rd386, [params+192]; - cvta.to.global.u64 %rd387, %rd386; - add.s64 %rd388, %rd387, %rd345; - st.global.f32 [%rd388], %f1248; - ld.const.u64 %rd389, [params+200]; - cvta.to.global.u64 %rd390, %rd389; - add.s64 %rd391, %rd390, %rd345; - st.global.f32 [%rd391], %f1249; - ld.const.u64 %rd392, [params+208]; - cvta.to.global.u64 %rd393, %rd392; - add.s64 %rd394, %rd393, %rd345; - st.global.f32 [%rd394], %f1244; - ld.const.u64 %rd395, [params+216]; - cvta.to.global.u64 %rd396, %rd395; - add.s64 %rd397, %rd396, %rd345; - st.global.f32 [%rd397], %f1245; - ld.const.u64 %rd398, [params+224]; - cvta.to.global.u64 %rd399, %rd398; - add.s64 %rd400, %rd399, %rd345; - st.global.f32 [%rd400], %f1246; + cvta.to.global.u64 %rd384, %rd38; + add.s64 %rd386, %rd384, %rd346; + st.global.f32 [%rd386], %f1247; + ld.const.u64 %rd387, [params+192]; + cvta.to.global.u64 %rd388, %rd387; + add.s64 %rd389, %rd388, %rd346; + st.global.f32 [%rd389], %f1248; + ld.const.u64 %rd390, [params+200]; + cvta.to.global.u64 %rd391, %rd390; + add.s64 %rd392, %rd391, %rd346; + st.global.f32 [%rd392], %f1249; + ld.const.u64 %rd393, [params+208]; + cvta.to.global.u64 %rd394, %rd393; + add.s64 %rd395, %rd394, %rd346; + st.global.f32 [%rd395], %f1244; + ld.const.u64 %rd396, [params+216]; + cvta.to.global.u64 %rd397, %rd396; + add.s64 %rd398, %rd397, %rd346; + st.global.f32 [%rd398], %f1245; + ld.const.u64 %rd399, [params+224]; + cvta.to.global.u64 %rd400, %rd399; + add.s64 %rd401, %rd400, %rd346; + st.global.f32 [%rd401], %f1246; BB4_73: - ld.const.u64 %rd38, [params+232]; - setp.eq.s64 %p41, %rd38, 0; + ld.const.u64 %rd39, [params+232]; + setp.eq.s64 %p41, %rd39, 0; @%p41 bra BB4_75; - cvta.to.global.u64 %rd401, %rd38; - add.s64 %rd403, %rd401, %rd345; - st.global.f32 [%rd403], %f1087; - ld.const.u64 %rd404, [params+240]; - cvta.to.global.u64 %rd405, %rd404; - add.s64 %rd406, %rd405, %rd345; - st.global.f32 [%rd406], %f1088; - ld.const.u64 %rd407, [params+248]; - cvta.to.global.u64 %rd408, %rd407; - add.s64 %rd409, %rd408, %rd345; - st.global.f32 [%rd409], %f1089; - ld.const.u64 %rd410, [params+256]; - cvta.to.global.u64 %rd411, %rd410; - add.s64 %rd412, %rd411, %rd345; - st.global.f32 [%rd412], %f1084; - ld.const.u64 %rd413, [params+264]; - cvta.to.global.u64 %rd414, %rd413; - add.s64 %rd415, %rd414, %rd345; - st.global.f32 [%rd415], %f1085; - ld.const.u64 %rd416, [params+272]; - cvta.to.global.u64 %rd417, %rd416; - add.s64 %rd418, %rd417, %rd345; - st.global.f32 [%rd418], %f1086; + cvta.to.global.u64 %rd402, %rd39; + add.s64 %rd404, %rd402, %rd346; + st.global.f32 [%rd404], %f1087; + ld.const.u64 %rd405, [params+240]; + cvta.to.global.u64 %rd406, %rd405; + add.s64 %rd407, %rd406, %rd346; + st.global.f32 [%rd407], %f1088; + ld.const.u64 %rd408, [params+248]; + cvta.to.global.u64 %rd409, %rd408; + add.s64 %rd410, %rd409, %rd346; + st.global.f32 [%rd410], %f1089; + ld.const.u64 %rd411, [params+256]; + cvta.to.global.u64 %rd412, %rd411; + add.s64 %rd413, %rd412, %rd346; + st.global.f32 [%rd413], %f1084; + ld.const.u64 %rd414, [params+264]; + cvta.to.global.u64 %rd415, %rd414; + add.s64 %rd416, %rd415, %rd346; + st.global.f32 [%rd416], %f1085; + ld.const.u64 %rd417, [params+272]; + cvta.to.global.u64 %rd418, %rd417; + add.s64 %rd419, %rd418, %rd346; + st.global.f32 [%rd419], %f1086; BB4_75: - ld.const.u64 %rd39, [params+280]; - setp.eq.s64 %p42, %rd39, 0; + ld.const.u64 %rd40, [params+280]; + setp.eq.s64 %p42, %rd40, 0; @%p42 bra BB4_77; - cvta.to.global.u64 %rd419, %rd39; - add.s64 %rd421, %rd419, %rd345; - st.global.f32 [%rd421], %f1087; - ld.const.u64 %rd422, [params+288]; - cvta.to.global.u64 %rd423, %rd422; - add.s64 %rd424, %rd423, %rd345; - st.global.f32 [%rd424], %f1088; - ld.const.u64 %rd425, [params+296]; - cvta.to.global.u64 %rd426, %rd425; - add.s64 %rd427, %rd426, %rd345; - st.global.f32 [%rd427], %f1089; - ld.const.u64 %rd428, [params+304]; - cvta.to.global.u64 %rd429, %rd428; - add.s64 %rd430, %rd429, %rd345; - st.global.f32 [%rd430], %f1084; - ld.const.u64 %rd431, [params+312]; - cvta.to.global.u64 %rd432, %rd431; - add.s64 %rd433, %rd432, %rd345; - st.global.f32 [%rd433], %f1085; - ld.const.u64 %rd434, [params+320]; - cvta.to.global.u64 %rd435, %rd434; - add.s64 %rd436, %rd435, %rd345; - st.global.f32 [%rd436], %f1086; + cvta.to.global.u64 %rd420, %rd40; + add.s64 %rd422, %rd420, %rd346; + st.global.f32 [%rd422], %f1087; + ld.const.u64 %rd423, [params+288]; + cvta.to.global.u64 %rd424, %rd423; + add.s64 %rd425, %rd424, %rd346; + st.global.f32 [%rd425], %f1088; + ld.const.u64 %rd426, [params+296]; + cvta.to.global.u64 %rd427, %rd426; + add.s64 %rd428, %rd427, %rd346; + st.global.f32 [%rd428], %f1089; + ld.const.u64 %rd429, [params+304]; + cvta.to.global.u64 %rd430, %rd429; + add.s64 %rd431, %rd430, %rd346; + st.global.f32 [%rd431], %f1084; + ld.const.u64 %rd432, [params+312]; + cvta.to.global.u64 %rd433, %rd432; + add.s64 %rd434, %rd433, %rd346; + st.global.f32 [%rd434], %f1085; + ld.const.u64 %rd435, [params+320]; + cvta.to.global.u64 %rd436, %rd435; + add.s64 %rd437, %rd436, %rd346; + st.global.f32 [%rd437], %f1086; BB4_77: ret; @@ -21136,7 +21137,7 @@ BB10_3: add.u64 %rd6, %SPL, 0; st.local.u32 [%rd6], %r1; st.local.u64 [%rd6+8], %rd4; - mov.u64 %rd7, $str6; + mov.u64 %rd7, $str$6; cvta.global.u64 %rd8, %rd7; // Callseq Start 0 {