From e325059bce0928c128145db3696ee51fcbb2c018 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 12 Apr 2022 18:11:52 +0200 Subject: [PATCH] WIP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` check_args /* Autogenerated: 'src/ExtractionOCaml/word_by_word_montgomery' p256 64 '2^256 - 2^224 + 2^192 + 2^96 - 1' mul --no-wide-int --shiftr-avoid-uint1 --hints-file 'fiat-amd64/boringssl_intel_manual_mul_p256.asm' */ /* curve description: p256 */ /* machine_wordsize = 64 (from "64") */ /* requested operations: mul */ /* m = 0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff (from "2^256 - 2^224 + 2^192 + 2^96 - 1") */ /* */ /* NOTE: In addition to the bounds specified above each function, all */ /* functions synthesized for this Montgomery arithmetic require the */ /* input to be strictly less than the prime modulus (m), and also */ /* require the input to be in the unique saturated representation. */ /* All functions also ensure that these two properties are true of */ /* return values. */ /* */ /* Computed values: */ /* eval z = z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) */ /* bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) */ /* twos_complement_eval z = let x1 := z[0] + (z[1] << 64) + (z[2] << 128) + (z[3] << 192) in */ /* if x1 & (2^256-1) < 2^255 then x1 & (2^256-1) else (x1 & (2^256-1)) - 2^256 */ In fiat_p256_mul: Error while checking for equivalence of syntax tree and assembly: The syntax tree: (λ x1 x2, let x3 := x1[1] (* : uint64_t *) in let x4 := x1[2] (* : uint64_t *) in let x5 := x1[3] (* : uint64_t *) in let x6 := x1[0] (* : uint64_t *) in let x7 := Z.mul_split(2^64, None, (x6, Some [0x0 ~> 0xffffffffffffffff], (x2[3], Some [0x0 ~> 0xffffffffffffffff]))) in let x8 := Z.mul_split(2^64, None, (x6, Some [0x0 ~> 0xffffffffffffffff], (x2[2], Some [0x0 ~> 0xffffffffffffffff]))) in let x9 := Z.mul_split(2^64, None, (x6, Some [0x0 ~> 0xffffffffffffffff], (x2[1], Some [0x0 ~> 0xffffffffffffffff]))) in let x10 := Z.mul_split(2^64, None, (x6, Some [0x0 ~> 0xffffffffffffffff], (x2[0], Some [0x0 ~> 0xffffffffffffffff]))) in let x11 := Z.add_get_carry(2^64, None, (x10₂, Some [0x0 ~> 0xffffffffffffffff], (x9₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x12 := Z.add_with_get_carry(2^64, None, (x11₂, Some [0x0 ~> 0x1], (x9₂, Some [0x0 ~> 0xffffffffffffffff], (x8₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x13 := Z.add_with_get_carry(2^64, None, (x12₂, Some [0x0 ~> 0x1], (x8₂, Some [0x0 ~> 0xffffffffffffffff], (x7₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x14 := x13₂ + x7₂ (* : uint64_t *) in let x15 := Z.mul_split(2^64, None, (x10₁, Some [0x0 ~> 0xffffffffffffffff], (0xffffffff00000001, None))) in let x16 := Z.mul_split(2^64, None, (x10₁, Some [0x0 ~> 0xffffffffffffffff], (2^32-1, None))) in let x17 := Z.mul_split(2^64, None, (x10₁, Some [0x0 ~> 0xffffffffffffffff], (2^64-1, None))) in let x18 := Z.add_get_carry(2^64, None, (x17₂, Some [0x0 ~> 0xffffffffffffffff], (x16₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x19 := x18₂ + x16₂ (* : uint64_t *) in let x20 := Z.add_get_carry(2^64, None, (x10₁, Some [0x0 ~> 0xffffffffffffffff], (x17₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x21 := Z.add_with_get_carry(2^64, None, (x20₂, Some [0x0 ~> 0x1], (x11₁, Some [0x0 ~> 0xffffffffffffffff], (x18₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x22 := Z.add_with_get_carry(2^64, None, (x21₂, Some [0x0 ~> 0x1], (x12₁, Some [0x0 ~> 0xffffffffffffffff], (x19, Some [0x0 ~> 0xffffffffffffffff])))) in let x23 := Z.add_with_get_carry(2^64, None, (x22₂, Some [0x0 ~> 0x1], (x13₁, Some [0x0 ~> 0xffffffffffffffff], (x15₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x24 := Z.add_with_get_carry(2^64, None, (x23₂, Some [0x0 ~> 0x1], (x14, Some [0x0 ~> 0xffffffffffffffff], (x15₂, Some [0x0 ~> 0xffffffffffffffff])))) in let x25 := Z.mul_split(2^64, None, (x3, Some [0x0 ~> 0xffffffffffffffff], (x2[3], Some [0x0 ~> 0xffffffffffffffff]))) in let x26 := Z.mul_split(2^64, None, (x3, Some [0x0 ~> 0xffffffffffffffff], (x2[2], Some [0x0 ~> 0xffffffffffffffff]))) in let x27 := Z.mul_split(2^64, None, (x3, Some [0x0 ~> 0xffffffffffffffff], (x2[1], Some [0x0 ~> 0xffffffffffffffff]))) in let x28 := Z.mul_split(2^64, None, (x3, Some [0x0 ~> 0xffffffffffffffff], (x2[0], Some [0x0 ~> 0xffffffffffffffff]))) in let x29 := Z.add_get_carry(2^64, None, (x28₂, Some [0x0 ~> 0xffffffffffffffff], (x27₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x30 := Z.add_with_get_carry(2^64, None, (x29₂, Some [0x0 ~> 0x1], (x27₂, Some [0x0 ~> 0xffffffffffffffff], (x26₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x31 := Z.add_with_get_carry(2^64, None, (x30₂, Some [0x0 ~> 0x1], (x26₂, Some [0x0 ~> 0xffffffffffffffff], (x25₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x32 := x31₂ + x25₂ (* : uint64_t *) in let x33 := Z.add_get_carry(2^64, None, (x21₁, Some [0x0 ~> 0xffffffffffffffff], (x28₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x34 := Z.add_with_get_carry(2^64, None, (x33₂, Some [0x0 ~> 0x1], (x22₁, Some [0x0 ~> 0xffffffffffffffff], (x29₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x35 := Z.add_with_get_carry(2^64, None, (x34₂, Some [0x0 ~> 0x1], (x23₁, Some [0x0 ~> 0xffffffffffffffff], (x30₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x36 := Z.add_with_get_carry(2^64, None, (x35₂, Some [0x0 ~> 0x1], (x24₁, Some [0x0 ~> 0xffffffffffffffff], (x31₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x37 := Z.add_with_get_carry(2^64, None, (x36₂, Some [0x0 ~> 0x1], (x24₂, Some [0x0 ~> 0x1], (x32, Some [0x0 ~> 0xffffffffffffffff])))) in let x38 := Z.mul_split(2^64, None, (x33₁, Some [0x0 ~> 0xffffffffffffffff], (0xffffffff00000001, None))) in let x39 := Z.mul_split(2^64, None, (x33₁, Some [0x0 ~> 0xffffffffffffffff], (2^32-1, None))) in let x40 := Z.mul_split(2^64, None, (x33₁, Some [0x0 ~> 0xffffffffffffffff], (2^64-1, None))) in let x41 := Z.add_get_carry(2^64, None, (x40₂, Some [0x0 ~> 0xffffffffffffffff], (x39₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x42 := x41₂ + x39₂ (* : uint64_t *) in let x43 := Z.add_get_carry(2^64, None, (x33₁, Some [0x0 ~> 0xffffffffffffffff], (x40₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x44 := Z.add_with_get_carry(2^64, None, (x43₂, Some [0x0 ~> 0x1], (x34₁, Some [0x0 ~> 0xffffffffffffffff], (x41₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x45 := Z.add_with_get_carry(2^64, None, (x44₂, Some [0x0 ~> 0x1], (x35₁, Some [0x0 ~> 0xffffffffffffffff], (x42, Some [0x0 ~> 0xffffffffffffffff])))) in let x46 := Z.add_with_get_carry(2^64, None, (x45₂, Some [0x0 ~> 0x1], (x36₁, Some [0x0 ~> 0xffffffffffffffff], (x38₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x47 := Z.add_with_get_carry(2^64, None, (x46₂, Some [0x0 ~> 0x1], (x37₁, Some [0x0 ~> 0xffffffffffffffff], (x38₂, Some [0x0 ~> 0xffffffffffffffff])))) in let x48 := x47₂ + x37₂ (* : uint64_t *) in let x49 := Z.mul_split(2^64, None, (x4, Some [0x0 ~> 0xffffffffffffffff], (x2[3], Some [0x0 ~> 0xffffffffffffffff]))) in let x50 := Z.mul_split(2^64, None, (x4, Some [0x0 ~> 0xffffffffffffffff], (x2[2], Some [0x0 ~> 0xffffffffffffffff]))) in let x51 := Z.mul_split(2^64, None, (x4, Some [0x0 ~> 0xffffffffffffffff], (x2[1], Some [0x0 ~> 0xffffffffffffffff]))) in let x52 := Z.mul_split(2^64, None, (x4, Some [0x0 ~> 0xffffffffffffffff], (x2[0], Some [0x0 ~> 0xffffffffffffffff]))) in let x53 := Z.add_get_carry(2^64, None, (x52₂, Some [0x0 ~> 0xffffffffffffffff], (x51₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x54 := Z.add_with_get_carry(2^64, None, (x53₂, Some [0x0 ~> 0x1], (x51₂, Some [0x0 ~> 0xffffffffffffffff], (x50₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x55 := Z.add_with_get_carry(2^64, None, (x54₂, Some [0x0 ~> 0x1], (x50₂, Some [0x0 ~> 0xffffffffffffffff], (x49₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x56 := x55₂ + x49₂ (* : uint64_t *) in let x57 := Z.add_get_carry(2^64, None, (x44₁, Some [0x0 ~> 0xffffffffffffffff], (x52₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x58 := Z.add_with_get_carry(2^64, None, (x57₂, Some [0x0 ~> 0x1], (x45₁, Some [0x0 ~> 0xffffffffffffffff], (x53₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x59 := Z.add_with_get_carry(2^64, None, (x58₂, Some [0x0 ~> 0x1], (x46₁, Some [0x0 ~> 0xffffffffffffffff], (x54₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x60 := Z.add_with_get_carry(2^64, None, (x59₂, Some [0x0 ~> 0x1], (x47₁, Some [0x0 ~> 0xffffffffffffffff], (x55₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x61 := Z.add_with_get_carry(2^64, None, (x60₂, Some [0x0 ~> 0x1], (x48, Some [0x0 ~> 0xffffffffffffffff], (x56, Some [0x0 ~> 0xffffffffffffffff])))) in let x62 := Z.mul_split(2^64, None, (x57₁, Some [0x0 ~> 0xffffffffffffffff], (0xffffffff00000001, None))) in let x63 := Z.mul_split(2^64, None, (x57₁, Some [0x0 ~> 0xffffffffffffffff], (2^32-1, None))) in let x64 := Z.mul_split(2^64, None, (x57₁, Some [0x0 ~> 0xffffffffffffffff], (2^64-1, None))) in let x65 := Z.add_get_carry(2^64, None, (x64₂, Some [0x0 ~> 0xffffffffffffffff], (x63₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x66 := x65₂ + x63₂ (* : uint64_t *) in let x67 := Z.add_get_carry(2^64, None, (x57₁, Some [0x0 ~> 0xffffffffffffffff], (x64₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x68 := Z.add_with_get_carry(2^64, None, (x67₂, Some [0x0 ~> 0x1], (x58₁, Some [0x0 ~> 0xffffffffffffffff], (x65₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x69 := Z.add_with_get_carry(2^64, None, (x68₂, Some [0x0 ~> 0x1], (x59₁, Some [0x0 ~> 0xffffffffffffffff], (x66, Some [0x0 ~> 0xffffffffffffffff])))) in let x70 := Z.add_with_get_carry(2^64, None, (x69₂, Some [0x0 ~> 0x1], (x60₁, Some [0x0 ~> 0xffffffffffffffff], (x62₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x71 := Z.add_with_get_carry(2^64, None, (x70₂, Some [0x0 ~> 0x1], (x61₁, Some [0x0 ~> 0xffffffffffffffff], (x62₂, Some [0x0 ~> 0xffffffffffffffff])))) in let x72 := x71₂ + x61₂ (* : uint64_t *) in let x73 := Z.mul_split(2^64, None, (x5, Some [0x0 ~> 0xffffffffffffffff], (x2[3], Some [0x0 ~> 0xffffffffffffffff]))) in let x74 := Z.mul_split(2^64, None, (x5, Some [0x0 ~> 0xffffffffffffffff], (x2[2], Some [0x0 ~> 0xffffffffffffffff]))) in let x75 := Z.mul_split(2^64, None, (x5, Some [0x0 ~> 0xffffffffffffffff], (x2[1], Some [0x0 ~> 0xffffffffffffffff]))) in let x76 := Z.mul_split(2^64, None, (x5, Some [0x0 ~> 0xffffffffffffffff], (x2[0], Some [0x0 ~> 0xffffffffffffffff]))) in let x77 := Z.add_get_carry(2^64, None, (x76₂, Some [0x0 ~> 0xffffffffffffffff], (x75₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x78 := Z.add_with_get_carry(2^64, None, (x77₂, Some [0x0 ~> 0x1], (x75₂, Some [0x0 ~> 0xffffffffffffffff], (x74₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x79 := Z.add_with_get_carry(2^64, None, (x78₂, Some [0x0 ~> 0x1], (x74₂, Some [0x0 ~> 0xffffffffffffffff], (x73₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x80 := x79₂ + x73₂ (* : uint64_t *) in let x81 := Z.add_get_carry(2^64, None, (x68₁, Some [0x0 ~> 0xffffffffffffffff], (x76₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x82 := Z.add_with_get_carry(2^64, None, (x81₂, Some [0x0 ~> 0x1], (x69₁, Some [0x0 ~> 0xffffffffffffffff], (x77₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x83 := Z.add_with_get_carry(2^64, None, (x82₂, Some [0x0 ~> 0x1], (x70₁, Some [0x0 ~> 0xffffffffffffffff], (x78₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x84 := Z.add_with_get_carry(2^64, None, (x83₂, Some [0x0 ~> 0x1], (x71₁, Some [0x0 ~> 0xffffffffffffffff], (x79₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x85 := Z.add_with_get_carry(2^64, None, (x84₂, Some [0x0 ~> 0x1], (x72, Some [0x0 ~> 0xffffffffffffffff], (x80, Some [0x0 ~> 0xffffffffffffffff])))) in let x86 := Z.mul_split(2^64, None, (x81₁, Some [0x0 ~> 0xffffffffffffffff], (0xffffffff00000001, None))) in let x87 := Z.mul_split(2^64, None, (x81₁, Some [0x0 ~> 0xffffffffffffffff], (2^32-1, None))) in let x88 := Z.mul_split(2^64, None, (x81₁, Some [0x0 ~> 0xffffffffffffffff], (2^64-1, None))) in let x89 := Z.add_get_carry(2^64, None, (x88₂, Some [0x0 ~> 0xffffffffffffffff], (x87₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x90 := x89₂ + x87₂ (* : uint64_t *) in let x91 := Z.add_get_carry(2^64, None, (x81₁, Some [0x0 ~> 0xffffffffffffffff], (x88₁, Some [0x0 ~> 0xffffffffffffffff]))) in let x92 := Z.add_with_get_carry(2^64, None, (x91₂, Some [0x0 ~> 0x1], (x82₁, Some [0x0 ~> 0xffffffffffffffff], (x89₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x93 := Z.add_with_get_carry(2^64, None, (x92₂, Some [0x0 ~> 0x1], (x83₁, Some [0x0 ~> 0xffffffffffffffff], (x90, Some [0x0 ~> 0xffffffffffffffff])))) in let x94 := Z.add_with_get_carry(2^64, None, (x93₂, Some [0x0 ~> 0x1], (x84₁, Some [0x0 ~> 0xffffffffffffffff], (x86₁, Some [0x0 ~> 0xffffffffffffffff])))) in let x95 := Z.add_with_get_carry(2^64, None, (x94₂, Some [0x0 ~> 0x1], (x85₁, Some [0x0 ~> 0xffffffffffffffff], (x86₂, Some [0x0 ~> 0xffffffffffffffff])))) in let x96 := x95₂ + x85₂ (* : uint64_t *) in let x97 := Z.sub_with_get_borrow(2^64, None, (0, None, (x92₁, Some [0x0 ~> 0xffffffffffffffff], (2^64-1, None)))) in let x98 := Z.sub_with_get_borrow(2^64, None, (x97₂, Some [0x0 ~> 0x1], (x93₁, Some [0x0 ~> 0xffffffffffffffff], (2^32-1, None)))) in let x99 := Z.sub_with_get_borrow(2^64, None, (x98₂, Some [0x0 ~> 0x1], (x94₁, Some [0x0 ~> 0xffffffffffffffff], (0, None)))) in let x100 := Z.sub_with_get_borrow(2^64, None, (x99₂, Some [0x0 ~> 0x1], (x95₁, Some [0x0 ~> 0xffffffffffffffff], (0xffffffff00000001, None)))) in let x101 := Z.sub_with_get_borrow(2^64, None, (x100₂, Some [0x0 ~> 0x1], (x96, Some [0x0 ~> 0xffffffffffffffff], (0, None)))) in let x102 := Z.zselect(x101₂, Some [0x0 ~> 0x1], (x97₁, Some [0x0 ~> 0xffffffffffffffff], (x92₁, Some [0x0 ~> 0xffffffffffffffff]))) (* : uint64_t *) in let x103 := Z.zselect(x101₂, Some [0x0 ~> 0x1], (x98₁, Some [0x0 ~> 0xffffffffffffffff], (x93₁, Some [0x0 ~> 0xffffffffffffffff]))) (* : uint64_t *) in let x104 := Z.zselect(x101₂, Some [0x0 ~> 0x1], (x99₁, Some [0x0 ~> 0xffffffffffffffff], (x94₁, Some [0x0 ~> 0xffffffffffffffff]))) (* : uint64_t *) in let x105 := Z.zselect(x101₂, Some [0x0 ~> 0x1], (x100₁, Some [0x0 ~> 0xffffffffffffffff], (x95₁, Some [0x0 ~> 0xffffffffffffffff]))) (* : uint64_t *) in x102 :: x103 :: x104 :: x105 :: [] ) which can be pretty-printed as: /* * Input Bounds: * arg1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] * arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]] * Output Bounds: * out1: None */ void f(uint64_t out1[4], const uint64_t arg1[4], const uint64_t arg2[4]) { uint64_t x1; uint64_t x2; uint64_t x3; uint64_t x4; uint64_t x5; uint64_t x6; uint64_t x7; uint64_t x8; uint64_t x9; uint64_t x10; uint64_t x11; uint64_t x12; uint64_t x13; uint1 x14; uint64_t x15; uint1 x16; uint64_t x17; uint1 x18; uint64_t x19; uint64_t x20; uint64_t x21; uint64_t x22; uint64_t x23; uint64_t x24; uint64_t x25; uint64_t x26; uint1 x27; uint64_t x28; uint64_t x29; uint1 x30; uint64_t x31; uint1 x32; uint64_t x33; uint1 x34; uint64_t x35; uint1 x36; uint64_t x37; uint1 x38; uint64_t x39; uint64_t x40; uint64_t x41; uint64_t x42; uint64_t x43; uint64_t x44; uint64_t x45; uint64_t x46; uint64_t x47; uint1 x48; uint64_t x49; uint1 x50; uint64_t x51; uint1 x52; uint64_t x53; uint64_t x54; uint1 x55; uint64_t x56; uint1 x57; uint64_t x58; uint1 x59; uint64_t x60; uint1 x61; uint64_t x62; uint1 x63; uint64_t x64; uint64_t x65; uint64_t x66; uint64_t x67; uint64_t x68; uint64_t x69; uint64_t x70; uint1 x71; uint64_t x72; uint64_t x73; uint1 x74; uint64_t x75; uint1 x76; uint64_t x77; uint1 x78; uint64_t x79; uint1 x80; uint64_t x81; uint1 x82; uint64_t x83; uint64_t x84; uint64_t x85; uint64_t x86; uint64_t x87; uint64_t x88; uint64_t x89; uint64_t x90; uint64_t x91; uint64_t x92; uint1 x93; uint64_t x94; uint1 x95; uint64_t x96; uint1 x97; uint64_t x98; uint64_t x99; uint1 x100; uint64_t x101; uint1 x102; uint64_t x103; uint1 x104; uint64_t x105; uint1 x106; uint64_t x107; uint1 x108; uint64_t x109; uint64_t x110; uint64_t x111; uint64_t x112; uint64_t x113; uint64_t x114; uint64_t x115; uint1 x116; uint64_t x117; uint64_t x118; uint1 x119; uint64_t x120; uint1 x121; uint64_t x122; uint1 x123; uint64_t x124; uint1 x125; uint64_t x126; uint1 x127; uint64_t x128; uint64_t x129; uint64_t x130; uint64_t x131; uint64_t x132; uint64_t x133; uint64_t x134; uint64_t x135; uint64_t x136; uint64_t x137; uint1 x138; uint64_t x139; uint1 x140; uint64_t x141; uint1 x142; uint64_t x143; uint64_t x144; uint1 x145; uint64_t x146; uint1 x147; uint64_t x148; uint1 x149; uint64_t x150; uint1 x151; uint64_t x152; uint1 x153; uint64_t x154; uint64_t x155; uint64_t x156; uint64_t x157; uint64_t x158; uint64_t x159; uint64_t x160; uint1 x161; uint64_t x162; uint64_t x163; uint1 x164; uint64_t x165; uint1 x166; uint64_t x167; uint1 x168; uint64_t x169; uint1 x170; uint64_t x171; uint1 x172; uint64_t x173; uint64_t x174; uint1 x175; uint64_t x176; uint1 x177; uint64_t x178; uint1 x179; uint64_t x180; uint1 x181; uint64_t x182; uint1 x183; uint64_t x184; uint64_t x185; uint64_t x186; uint64_t x187; x1 = (arg1[1]); x2 = (arg1[2]); x3 = (arg1[3]); x4 = (arg1[0]); mulx_u64(&x5, &x6, x4, (arg2[3])); mulx_u64(&x7, &x8, x4, (arg2[2])); mulx_u64(&x9, &x10, x4, (arg2[1])); mulx_u64(&x11, &x12, x4, (arg2[0])); addcarryx_u64(&x13, &x14, 0x0, x12, x9); addcarryx_u64(&x15, &x16, x14, x10, x7); addcarryx_u64(&x17, &x18, x16, x8, x5); x19 = (x18 + x6); mulx_u64(&x20, &x21, x11, UINT64_C(0xffffffff00000001)); mulx_u64(&x22, &x23, x11, UINT32_C(0xffffffff)); mulx_u64(&x24, &x25, x11, UINT64_C(0xffffffffffffffff)); addcarryx_u64(&x26, &x27, 0x0, x25, x22); x28 = (x27 + x23); addcarryx_u64(&x29, &x30, 0x0, x11, x24); addcarryx_u64(&x31, &x32, x30, x13, x26); addcarryx_u64(&x33, &x34, x32, x15, x28); addcarryx_u64(&x35, &x36, x34, x17, x20); addcarryx_u64(&x37, &x38, x36, x19, x21); mulx_u64(&x39, &x40, x1, (arg2[3])); mulx_u64(&x41, &x42, x1, (arg2[2])); mulx_u64(&x43, &x44, x1, (arg2[1])); mulx_u64(&x45, &x46, x1, (arg2[0])); addcarryx_u64(&x47, &x48, 0x0, x46, x43); addcarryx_u64(&x49, &x50, x48, x44, x41); addcarryx_u64(&x51, &x52, x50, x42, x39); x53 = (x52 + x40); addcarryx_u64(&x54, &x55, 0x0, x31, x45); addcarryx_u64(&x56, &x57, x55, x33, x47); addcarryx_u64(&x58, &x59, x57, x35, x49); addcarryx_u64(&x60, &x61, x59, x37, x51); addcarryx_u64(&x62, &x63, x61, x38, x53); mulx_u64(&x64, &x65, x54, UINT64_C(0xffffffff00000001)); mulx_u64(&x66, &x67, x54, UINT32_C(0xffffffff)); mulx_u64(&x68, &x69, x54, UINT64_C(0xffffffffffffffff)); addcarryx_u64(&x70, &x71, 0x0, x69, x66); x72 = (x71 + x67); addcarryx_u64(&x73, &x74, 0x0, x54, x68); addcarryx_u64(&x75, &x76, x74, x56, x70); addcarryx_u64(&x77, &x78, x76, x58, x72); addcarryx_u64(&x79, &x80, x78, x60, x64); addcarryx_u64(&x81, &x82, x80, x62, x65); x83 = ((uint64_t)x82 + x63); mulx_u64(&x84, &x85, x2, (arg2[3])); mulx_u64(&x86, &x87, x2, (arg2[2])); mulx_u64(&x88, &x89, x2, (arg2[1])); mulx_u64(&x90, &x91, x2, (arg2[0])); addcarryx_u64(&x92, &x93, 0x0, x91, x88); addcarryx_u64(&x94, &x95, x93, x89, x86); addcarryx_u64(&x96, &x97, x95, x87, x84); x98 = (x97 + x85); addcarryx_u64(&x99, &x100, 0x0, x75, x90); addcarryx_u64(&x101, &x102, x100, x77, x92); addcarryx_u64(&x103, &x104, x102, x79, x94); addcarryx_u64(&x105, &x106, x104, x81, x96); addcarryx_u64(&x107, &x108, x106, x83, x98); mulx_u64(&x109, &x110, x99, UINT64_C(0xffffffff00000001)); mulx_u64(&x111, &x112, x99, UINT32_C(0xffffffff)); mulx_u64(&x113, &x114, x99, UINT64_C(0xffffffffffffffff)); addcarryx_u64(&x115, &x116, 0x0, x114, x111); x117 = (x116 + x112); addcarryx_u64(&x118, &x119, 0x0, x99, x113); addcarryx_u64(&x120, &x121, x119, x101, x115); addcarryx_u64(&x122, &x123, x121, x103, x117); addcarryx_u64(&x124, &x125, x123, x105, x109); addcarryx_u64(&x126, &x127, x125, x107, x110); x128 = ((uint64_t)x127 + x108); mulx_u64(&x129, &x130, x3, (arg2[3])); mulx_u64(&x131, &x132, x3, (arg2[2])); mulx_u64(&x133, &x134, x3, (arg2[1])); mulx_u64(&x135, &x136, x3, (arg2[0])); addcarryx_u64(&x137, &x138, 0x0, x136, x133); addcarryx_u64(&x139, &x140, x138, x134, x131); addcarryx_u64(&x141, &x142, x140, x132, x129); x143 = (x142 + x130); addcarryx_u64(&x144, &x145, 0x0, x120, x135); addcarryx_u64(&x146, &x147, x145, x122, x137); addcarryx_u64(&x148, &x149, x147, x124, x139); addcarryx_u64(&x150, &x151, x149, x126, x141); addcarryx_u64(&x152, &x153, x151, x128, x143); mulx_u64(&x154, &x155, x144, UINT64_C(0xffffffff00000001)); mulx_u64(&x156, &x157, x144, UINT32_C(0xffffffff)); mulx_u64(&x158, &x159, x144, UINT64_C(0xffffffffffffffff)); addcarryx_u64(&x160, &x161, 0x0, x159, x156); x162 = (x161 + x157); addcarryx_u64(&x163, &x164, 0x0, x144, x158); addcarryx_u64(&x165, &x166, x164, x146, x160); addcarryx_u64(&x167, &x168, x166, x148, x162); addcarryx_u64(&x169, &x170, x168, x150, x154); addcarryx_u64(&x171, &x172, x170, x152, x155); x173 = ((uint64_t)x172 + x153); subborrowx_u64(&x174, &x175, 0x0, x165, UINT64_C(0xffffffffffffffff)); subborrowx_u64(&x176, &x177, x175, x167, UINT32_C(0xffffffff)); subborrowx_u64(&x178, &x179, x177, x169, 0x0); subborrowx_u64(&x180, &x181, x179, x171, UINT64_C(0xffffffff00000001)); subborrowx_u64(&x182, &x183, x181, x173, 0x0); cmovznz_u64(&x184, x183, x174, x165); cmovznz_u64(&x185, x183, x176, x167); cmovznz_u64(&x186, x183, x178, x169); cmovznz_u64(&x187, x183, x180, x171); out1[0] = x184; out1[1] = x185; out1[2] = x186; out1[3] = x187; } Assembly: ecp_nistz256_mul_mont: ;push rbp ;push rbx ;push r12 ;push r13 ;push r14 ;push r15 mov rbx, rdx mov rax, QWORD PTR [rbx] mov r9, QWORD PTR [rsi] mov r10, QWORD PTR [rsi + 0x08 * 1] mov r11, QWORD PTR [rsi + 0x08 * 2] mov r12, QWORD PTR [rsi + 0x08 * 3] mov rbp, rax mul r9 mov r14, 4294967295 mov r8, rax mov rax, rbp mov r9, rdx mul r10 mov r15, 18446744069414584321 add r9, rax mov rax, rbp adc rdx, 0 mov r10, rdx mul r11 add r10, rax mov rax, rbp adc rdx, 0 mov r11, rdx mul r12 add r11, rax mov rax, r8 adc rdx, 0 xor r13, r13 mov r12, rdx mov rbp, r8 shl r8, 32 mul r15 shr rbp, 32 add r9, r8 adc r10, rbp adc r11, rax mov rax, QWORD PTR [rbx + 0x08 * 1] adc r12, rdx adc r13, 0 xor r8, r8 mov rbp, rax mul QWORD PTR [rsi] add r9, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 1] add r10, rcx adc rdx, 0 add r10, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 2] add r11, rcx adc rdx, 0 add r11, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 3] add r12, rcx adc rdx, 0 add r12, rax mov rax, r9 adc r13, rdx adc r8, 0 mov rbp, r9 shl r9, 32 mul r15 shr rbp, 32 add r10, r9 adc r11, rbp adc r12, rax mov rax, QWORD PTR [rbx + 0x08 * 2] adc r13, rdx adc r8, 0 xor r9, r9 mov rbp, rax mul QWORD PTR [rsi] add r10, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 1] add r11, rcx adc rdx, 0 add r11, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 2] add r12, rcx adc rdx, 0 add r12, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 3] add r13, rcx adc rdx, 0 add r13, rax mov rax, r10 adc r8, rdx adc r9, 0 mov rbp, r10 shl r10, 32 mul r15 shr rbp, 32 add r11, r10 adc r12, rbp adc r13, rax mov rax, QWORD PTR [rbx + 0x08 * 3] adc r8, rdx adc r9, 0 xor r10, r10 mov rbp, rax mul QWORD PTR [rsi] add r11, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 1] add r12, rcx adc rdx, 0 add r12, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 2] add r13, rcx adc rdx, 0 add r13, rax mov rax, rbp adc rdx, 0 mov rcx, rdx mul QWORD PTR [rsi + 0x08 * 3] add r8, rcx adc rdx, 0 add r8, rax mov rax, r11 adc r9, rdx adc r10, 0 mov rbp, r11 shl r11, 32 mul r15 shr rbp, 32 add r12, r11 adc r13, rbp mov rcx, r12 adc r8, rax adc r9, rdx mov rbp, r13 adc r10, 0 sub r12, 18446744073709551615 mov rbx, r8 sbb r13, r14 sbb r8, 0 mov rdx, r9 sbb r9, r15 sbb r10, 0 cmovb r12, rcx cmovb r13, rbp mov QWORD PTR [rdi], r12 cmovb r8, rbx mov QWORD PTR [rdi + 0x08 * 1], r13 cmovb r9, rdx mov QWORD PTR [rdi + 0x08 * 2], r8 mov QWORD PTR [rdi + 0x08 * 3], r9 ;mov r15,QWORD PTR [rsp] ;mov r14,QWORD PTR [rsp+0x8] ;mov r13,QWORD PTR [rsp+0x10] ;mov r12,QWORD PTR [rsp+0x18] ;mov rbx,QWORD PTR [rsp+0x20] ;mov rbp,QWORD PTR [rsp+0x28] ;lea rsp,[rsp+0x30] ret Equivalence checking error: Unable to unify: In environment: (*symbolic_state*) {| dag_state := (*dag*)[ (*0*) (old 64 0, []); (*1*) (old 64 1, []); (*2*) (old 64 2, []); (*3*) (old 64 3, []); (*4*) (old 64 4, []); (*5*) (old 64 5, []); (*6*) (old 64 6, []); (*7*) (old 64 7, []); (*8*) (const 0, []); (*9*) (const 18446744073709551616, []); (*10*) (const 64, []); (*11*) (mulZ, [3, 4]); (*12*) (mul 64, [3, 4]); (*13*) (shrZ, [11, 10]); (*14*) (shr 64, [11, 10]); (*15*) (mulZ, [3, 5]); (*16*) (mul 64, [3, 5]); (*17*) (shrZ, [15, 10]); (*18*) (shr 64, [15, 10]); (*19*) (mulZ, [3, 6]); (*20*) (mul 64, [3, 6]); (*21*) (shrZ, [19, 10]); (*22*) (shr 64, [19, 10]); (*23*) (mulZ, [3, 7]); (*24*) (mul 64, [3, 7]); (*25*) (shrZ, [23, 10]); (*26*) (shr 64, [23, 10]); (*27*) (add 64, [20, 26]); (*28*) (addcarryZ 64, [26, 20]); (*29*) (addcarry 64, [20, 26]); (*30*) (add 64, [16, 22, 29]); (*31*) (addcarryZ 64, [29, 22, 16]); (*32*) (addcarry 64, [16, 22, 29]); (*33*) (add 64, [12, 18, 32]); (*34*) (addcarryZ 64, [32, 18, 12]); (*35*) (addcarry 64, [12, 18, 32]); (*36*) (addZ, [35, 14]); (*37*) (add 64, [14, 35]); (*38*) (const 18446744069414584321, []); (*39*) (mulZ, [24, 38]); (*40*) (mul 64, [3, 7, 38]); (*41*) (shrZ, [39, 10]); (*42*) (shr 64, [39, 10]); (*43*) (const 4294967295, []); (*44*) (mulZ, [24, 43]); (*45*) (mul 64, [3, 7, 43]); (*46*) (shrZ, [44, 10]); (*47*) (shr 64, [44, 10]); (*48*) (const 18446744073709551615, []); (*49*) (mulZ, [24, 48]); (*50*) (mul 64, [3, 7, 48]); (*51*) (shrZ, [49, 10]); (*52*) (shr 64, [49, 10]); (*53*) (add 64, [45, 52]); (*54*) (addcarryZ 64, [52, 45]); (*55*) (addcarry 64, [45, 52]); (*56*) (addZ, [55, 47]); (*57*) (add 64, [47, 55]); (*58*) (mul 64, [3, 7, 8]); (*59*) (add 64, [58]); (*60*) (addcarryZ 64, [24, 50]); (*61*) (addcarry 64, [24, 50]); (*62*) (add 64, [20, 26, 45, 52, 61]); (*63*) (addcarryZ 64, [61, 27, 53]); (*64*) (addcarry 64, [27, 53, 61]); (*65*) (add 64, [16, 22, 29, 47, 55, 64]); (*66*) (addcarryZ 64, [64, 30, 57]); (*67*) (addcarry 64, [30, 57, 64]); (*68*) (add 64, [12, 18, 32, 40, 67]); (*69*) (addcarryZ 64, [67, 33, 40]); (*70*) (addcarry 64, [33, 40, 67]); (*71*) (add 64, [14, 35, 42, 70]); (*72*) (addcarryZ 64, [70, 37, 42]); (*73*) (addcarry 64, [37, 42, 70]); (*74*) (mulZ, [2, 4]); (*75*) (mul 64, [2, 4]); (*76*) (shrZ, [74, 10]); (*77*) (shr 64, [74, 10]); (*78*) (mulZ, [2, 5]); (*79*) (mul 64, [2, 5]); (*80*) (shrZ, [78, 10]); (*81*) (shr 64, [78, 10]); (*82*) (mulZ, [2, 6]); (*83*) (mul 64, [2, 6]); (*84*) (shrZ, [82, 10]); (*85*) (shr 64, [82, 10]); (*86*) (mulZ, [2, 7]); (*87*) (mul 64, [2, 7]); (*88*) (shrZ, [86, 10]); (*89*) (shr 64, [86, 10]); (*90*) (add 64, [83, 89]); (*91*) (addcarryZ 64, [89, 83]); (*92*) (addcarry 64, [83, 89]); (*93*) (add 64, [79, 85, 92]); (*94*) (addcarryZ 64, [92, 85, 79]); (*95*) (addcarry 64, [79, 85, 92]); (*96*) (add 64, [75, 81, 95]); (*97*) (addcarryZ 64, [95, 81, 75]); (*98*) (addcarry 64, [75, 81, 95]); (*99*) (addZ, [98, 77]); (*100*) (add 64, [77, 98]); (*101*) (add 64, [20, 26, 45, 52, 61, 87]); (*102*) (addcarryZ 64, [62, 87]); (*103*) (addcarry 64, [62, 87]); (*104*) (add 64, [16, 22, 29, 47, 55, 64, 83, 89, 103]); (*105*) (addcarryZ 64, [103, 65, 90]); (*106*) (addcarry 64, [65, 90, 103]); (*107*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106]); (*108*) (addcarryZ 64, [106, 68, 93]); (*109*) (addcarry 64, [68, 93, 106]); (*110*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109]); (*111*) (addcarryZ 64, [109, 71, 96]); (*112*) (addcarry 64, [71, 96, 109]); (*113*) (add 64, [73, 77, 98, 112]); (*114*) (addcarryZ 64, [112, 73, 100]); (*115*) (addcarry 64, [73, 100, 112]); (*116*) (mulZ, [38, 101]); (*117*) (mul 64, [38, 101]); (*118*) (shrZ, [116, 10]); (*119*) (shr 64, [116, 10]); (*120*) (mulZ, [43, 101]); (*121*) (mul 64, [43, 101]); (*122*) (shrZ, [120, 10]); (*123*) (shr 64, [120, 10]); (*124*) (mulZ, [48, 101]); (*125*) (mul 64, [48, 101]); (*126*) (shrZ, [124, 10]); (*127*) (shr 64, [124, 10]); (*128*) (add 64, [121, 127]); (*129*) (addcarryZ 64, [127, 121]); (*130*) (addcarry 64, [121, 127]); (*131*) (addZ, [130, 123]); (*132*) (add 64, [123, 130]); (*133*) (add 64, [20, 26, 45, 52, 61, 87, 125]); (*134*) (addcarryZ 64, [101, 125]); (*135*) (addcarry 64, [101, 125]); (*136*) (add 64, [16, 22, 29, 47, 55, 64, 83, 89, 103, 121, 127, 135]); (*137*) (addcarryZ 64, [135, 104, 128]); (*138*) (addcarry 64, [104, 128, 135]); (*139*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106, 123, 130, 138]); (*140*) (addcarryZ 64, [138, 107, 132]); (*141*) (addcarry 64, [107, 132, 138]); (*142*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141]); (*143*) (addcarryZ 64, [141, 110, 117]); (*144*) (addcarry 64, [110, 117, 141]); (*145*) (add 64, [73, 77, 98, 112, 119, 144]); (*146*) (addcarryZ 64, [144, 113, 119]); (*147*) (addcarry 64, [113, 119, 144]); (*148*) (addZ, [147, 115]); (*149*) (add 64, [115, 147]); (*150*) (mulZ, [1, 4]); (*151*) (mul 64, [1, 4]); (*152*) (shrZ, [150, 10]); (*153*) (shr 64, [150, 10]); (*154*) (mulZ, [1, 5]); (*155*) (mul 64, [1, 5]); (*156*) (shrZ, [154, 10]); (*157*) (shr 64, [154, 10]); (*158*) (mulZ, [1, 6]); (*159*) (mul 64, [1, 6]); (*160*) (shrZ, [158, 10]); (*161*) (shr 64, [158, 10]); (*162*) (mulZ, [1, 7]); (*163*) (mul 64, [1, 7]); (*164*) (shrZ, [162, 10]); (*165*) (shr 64, [162, 10]); (*166*) (add 64, [159, 165]); (*167*) (addcarryZ 64, [165, 159]); (*168*) (addcarry 64, [159, 165]); (*169*) (add 64, [155, 161, 168]); (*170*) (addcarryZ 64, [168, 161, 155]); (*171*) (addcarry 64, [155, 161, 168]); (*172*) (add 64, [151, 157, 171]); (*173*) (addcarryZ 64, [171, 157, 151]); (*174*) (addcarry 64, [151, 157, 171]); (*175*) (addZ, [174, 153]); (*176*) (add 64, [153, 174]); (*177*) (add 64, [16, 22, 29, 47, 55, 64, 83, 89, 103, 121, 127, 135, 163]); (*178*) (addcarryZ 64, [136, 163]); (*179*) (addcarry 64, [136, 163]); (*180*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106, 123, 130, 138, 159, 165, 179]); (*181*) (addcarryZ 64, [179, 139, 166]); (*182*) (addcarry 64, [139, 166, 179]); (*183*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141, 155, 161, 168, 182]); (*184*) (addcarryZ 64, [182, 142, 169]); (*185*) (addcarry 64, [142, 169, 182]); (*186*) (add 64, [73, 77, 98, 112, 119, 144, 151, 157, 171, 185]); (*187*) (addcarryZ 64, [185, 145, 172]); (*188*) (addcarry 64, [145, 172, 185]); (*189*) (add 64, [115, 147, 153, 174, 188]); (*190*) (addcarryZ 64, [188, 149, 176]); (*191*) (addcarry 64, [149, 176, 188]); (*192*) (mulZ, [38, 177]); (*193*) (mul 64, [38, 177]); (*194*) (shrZ, [192, 10]); (*195*) (shr 64, [192, 10]); (*196*) (mulZ, [43, 177]); (*197*) (mul 64, [43, 177]); (*198*) (shrZ, [196, 10]); (*199*) (shr 64, [196, 10]); (*200*) (mulZ, [48, 177]); (*201*) (mul 64, [48, 177]); (*202*) (shrZ, [200, 10]); (*203*) (shr 64, [200, 10]); (*204*) (add 64, [197, 203]); (*205*) (addcarryZ 64, [203, 197]); (*206*) (addcarry 64, [197, 203]); (*207*) (addZ, [206, 199]); (*208*) (add 64, [199, 206]); (*209*) (add 64, [16, 22, 29, 47, 55, 64, 83, 89, 103, 121, 127, 135, 163, 201]); (*210*) (addcarryZ 64, [177, 201]); (*211*) (addcarry 64, [177, 201]); (*212*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106, 123, 130, 138, 159, 165, 179, 197, 203, 211]); (*213*) (addcarryZ 64, [211, 180, 204]); (*214*) (addcarry 64, [180, 204, 211]); (*215*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141, 155, 161, 168, 182, 199, 206, 214]); (*216*) (addcarryZ 64, [214, 183, 208]); (*217*) (addcarry 64, [183, 208, 214]); (*218*) (add 64, [73, 77, 98, 112, 119, 144, 151, 157, 171, 185, 193, 217]); (*219*) (addcarryZ 64, [217, 186, 193]); (*220*) (addcarry 64, [186, 193, 217]); (*221*) (add 64, [115, 147, 153, 174, 188, 195, 220]); (*222*) (addcarryZ 64, [220, 189, 195]); (*223*) (addcarry 64, [189, 195, 220]); (*224*) (addZ, [223, 191]); (*225*) (add 64, [191, 223]); (*226*) (mulZ, [0, 4]); (*227*) (mul 64, [0, 4]); (*228*) (shrZ, [226, 10]); (*229*) (shr 64, [226, 10]); (*230*) (mulZ, [0, 5]); (*231*) (mul 64, [0, 5]); (*232*) (shrZ, [230, 10]); (*233*) (shr 64, [230, 10]); (*234*) (mulZ, [0, 6]); (*235*) (mul 64, [0, 6]); (*236*) (shrZ, [234, 10]); (*237*) (shr 64, [234, 10]); (*238*) (mulZ, [0, 7]); (*239*) (mul 64, [0, 7]); (*240*) (shrZ, [238, 10]); (*241*) (shr 64, [238, 10]); (*242*) (add 64, [235, 241]); (*243*) (addcarryZ 64, [241, 235]); (*244*) (addcarry 64, [235, 241]); (*245*) (add 64, [231, 237, 244]); (*246*) (addcarryZ 64, [244, 237, 231]); (*247*) (addcarry 64, [231, 237, 244]); (*248*) (add 64, [227, 233, 247]); (*249*) (addcarryZ 64, [247, 233, 227]); (*250*) (addcarry 64, [227, 233, 247]); (*251*) (addZ, [250, 229]); (*252*) (add 64, [229, 250]); (*253*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106, 123, 130, 138, 159, 165, 179, 197, 203, 211, 239]); (*254*) (addcarryZ 64, [212, 239]); (*255*) (addcarry 64, [212, 239]); (*256*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141, 155, 161, 168, 182, 199, 206, 214, 235, 241, 255]); (*257*) (addcarryZ 64, [255, 215, 242]); (*258*) (addcarry 64, [215, 242, 255]); (*259*) (add 64, [73, 77, 98, 112, 119, 144, 151, 157, 171, 185, 193, 217, 231, 237, 244, 258]); (*260*) (addcarryZ 64, [258, 218, 245]); (*261*) (addcarry 64, [218, 245, 258]); (*262*) (add 64, [115, 147, 153, 174, 188, 195, 220, 227, 233, 247, 261]); (*263*) (addcarryZ 64, [261, 221, 248]); (*264*) (addcarry 64, [221, 248, 261]); (*265*) (add 64, [191, 223, 229, 250, 264]); (*266*) (addcarryZ 64, [264, 225, 252]); (*267*) (addcarry 64, [225, 252, 264]); (*268*) (mulZ, [38, 253]); (*269*) (mul 64, [38, 253]); (*270*) (shrZ, [268, 10]); (*271*) (shr 64, [268, 10]); (*272*) (mulZ, [43, 253]); (*273*) (mul 64, [43, 253]); (*274*) (shrZ, [272, 10]); (*275*) (shr 64, [272, 10]); (*276*) (mulZ, [48, 253]); (*277*) (mul 64, [48, 253]); (*278*) (shrZ, [276, 10]); (*279*) (shr 64, [276, 10]); (*280*) (add 64, [273, 279]); (*281*) (addcarryZ 64, [279, 273]); (*282*) (addcarry 64, [273, 279]); (*283*) (addZ, [282, 275]); (*284*) (add 64, [275, 282]); (*285*) (add 64, [12, 18, 32, 40, 67, 79, 85, 92, 106, 123, 130, 138, 159, 165, 179, 197, 203, 211, 239, 277]); (*286*) (addcarryZ 64, [253, 277]); (*287*) (addcarry 64, [253, 277]); (*288*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141, 155, 161, 168, 182, 199, 206, 214, 235, 241, 255, 273, 279, 287]); (*289*) (addcarryZ 64, [287, 256, 280]); (*290*) (addcarry 64, [256, 280, 287]); (*291*) (add 64, [73, 77, 98, 112, 119, 144, 151, 157, 171, 185, 193, 217, 231, 237, 244, 258, 275, 282, 290]); (*292*) (addcarryZ 64, [290, 259, 284]); (*293*) (addcarry 64, [259, 284, 290]); (*294*) (add 64, [115, 147, 153, 174, 188, 195, 220, 227, 233, 247, 261, 269, 293]); (*295*) (addcarryZ 64, [293, 262, 269]); (*296*) (addcarry 64, [262, 269, 293]); (*297*) (add 64, [191, 223, 229, 250, 264, 271, 296]); (*298*) (addcarryZ 64, [296, 265, 271]); (*299*) (addcarry 64, [265, 271, 296]); (*300*) (addZ, [299, 267]); (*301*) (add 64, [267, 299]); (*302*) (const 1, []); (*303*) (add 64, [14, 35, 42, 70, 75, 81, 95, 109, 117, 141, 155, 161, 168, 182, 199, 206, 214, 235, 241, 255, 273, 279, 287, 302]); (*304*) (subborrowZ 64, [288, 48]); (*305*) (subborrow 64, [288, 48]); (*306*) (neg 64, [305]); (*307*) (add 64, [38, 73, 77, 98, 112, 119, 144, 151, 157, 171, 185, 193, 217, 231, 237, 244, 258, 275, 282, 290, 306]); (*308*) (subborrowZ 64, [291, 43, 305]); (*309*) (subborrow 64, [291, 43, 305]); (*310*) (neg 64, [309]); (*311*) (add 64, [115, 147, 153, 174, 188, 195, 220, 227, 233, 247, 261, 269, 293, 310]); (*312*) (subborrowZ 64, [294, 309]); (*313*) (subborrow 64, [294, 309]); (*314*) (neg 64, [313]); (*315*) (add 64, [43, 191, 223, 229, 250, 264, 271, 296, 314]); (*316*) (subborrowZ 64, [297, 38, 313]); (*317*) (subborrow 64, [297, 38, 313]); (*318*) (neg 64, [317]); (*319*) (add 64, [267, 299, 318]); (*320*) (subborrowZ 64, [301, 317]); (*321*) (subborrow 64, [301, 317]); (*322*) (selectznz, [321, 303, 288]); (*323*) (selectznz, [321, 307, 291]); (*324*) (selectznz, [321, 311, 294]); (*325*) (selectznz, [321, 315, 297]); (*326*) (old 64 326, []); (*327*) (old 64 327, []); (*328*) (old 64 328, []); (*329*) (old 64 329, []); (*330*) (old 64 330, []); (*331*) (old 64 331, []); (*332*) (old 64 332, []); (*333*) (old 64 333, []); (*334*) (old 64 334, []); (*335*) (old 64 335, []); (*336*) (old 64 336, []); (*337*) (old 64 337, []); (*338*) (old 64 338, []); (*339*) (old 64 339, []); (*340*) (old 64 340, []); (*341*) (old 64 341, []); (*342*) (old 64 342, []); (*343*) (old 64 343, []); (*344*) (old 64 344, []); (*345*) (old 64 345, []); (*346*) (old 64 346, []); (*347*) (const 8, []); (*348*) (add 64, [346, 347]); (*349*) (const 16, []); (*350*) (add 64, [346, 349]); (*351*) (const 24, []); (*352*) (add 64, [346, 351]); (*353*) (old 64 353, []); (*354*) (add 64, [347, 353]); (*355*) (add 64, [349, 353]); (*356*) (add 64, [351, 353]); (*357*) (old 64 357, []); (*358*) (add 64, [347, 357]); (*359*) (add 64, [349, 357]); (*360*) (add 64, [351, 357]); (*361*) (old 64 361, []); (*362*) (add 64, [26, 87]); (*363*) (addcarry 64, [26, 87]); (*364*) (addoverflow 64, [26, 87]); (*365*) (add 64, [89, 363]); (*366*) (addcarry 64, [89, 363]); (*367*) (addoverflow 64, [89, 363]); (*368*) (add 64, [89, 163, 363]); (*369*) (addcarry 64, [163, 365]); (*370*) (addoverflow 64, [163, 365]); (*371*) (add 64, [165, 369]); (*372*) (addcarry 64, [165, 369]); (*373*) (addoverflow 64, [165, 369]); (*374*) (add 64, [165, 239, 369]); (*375*) (addcarry 64, [239, 371]); (*376*) (addoverflow 64, [239, 371]); (*377*) (add 64, [241, 375]); (*378*) (addcarry 64, [241, 375]); (*379*) (addoverflow 64, [241, 375]); (*380*) (xorZ, [339, 339]); (*381*) (const 32, []); (*382*) (const 63, []); (*383*) (const 4294967296, []); (*384*) (mul 64, [3, 7, 383]); (*385*) (shr 64, [24, 381]); (*386*) (add 64, [26, 87, 384]); (*387*) (addcarry 64, [362, 384]); (*388*) (addoverflow 64, [362, 384]); (*389*) (add 64, [89, 163, 363, 385, 387]); (*390*) (addcarry 64, [368, 385, 387]); (*391*) (addoverflow 64, [368, 385, 387]); (*392*) (add 64, [40, 165, 239, 369, 390]); (*393*) (addcarry 64, [40, 374, 390]); (*394*) (addoverflow 64, [40, 374, 390]); (*395*) (add 64, [42, 241, 375, 393]); (*396*) (addcarry 64, [42, 377, 393]); (*397*) (addoverflow 64, [42, 377, 393]); (*398*) (xorZ, [384, 384]); (*399*) (add 64, [20, 26, 87, 384]); (*400*) (addcarry 64, [20, 386]); (*401*) (addoverflow 64, [20, 386]); (*402*) (add 64, [22, 400]); (*403*) (addcarry 64, [22, 400]); (*404*) (addoverflow 64, [22, 400]); (*405*) (add 64, [22, 89, 163, 363, 385, 387, 400]); (*406*) (addcarry 64, [389, 402]); (*407*) (addoverflow 64, [389, 402]); (*408*) (add 64, [85, 406]); (*409*) (addcarry 64, [85, 406]); (*410*) (addoverflow 64, [85, 406]); (*411*) (add 64, [22, 83, 89, 163, 363, 385, 387, 400]); (*412*) (addcarry 64, [83, 405]); (*413*) (addoverflow 64, [83, 405]); (*414*) (add 64, [85, 406, 412]); (*415*) (addcarry 64, [408, 412]); (*416*) (addoverflow 64, [408, 412]); (*417*) (add 64, [40, 85, 165, 239, 369, 390, 406, 412]); (*418*) (addcarry 64, [392, 414]); (*419*) (addoverflow 64, [392, 414]); (*420*) (add 64, [161, 418]); (*421*) (addcarry 64, [161, 418]); (*422*) (addoverflow 64, [161, 418]); (*423*) (add 64, [40, 85, 159, 165, 239, 369, 390, 406, 412]); (*424*) (addcarry 64, [159, 417]); (*425*) (addoverflow 64, [159, 417]); (*426*) (add 64, [161, 418, 424]); (*427*) (addcarry 64, [420, 424]); (*428*) (addoverflow 64, [420, 424]); (*429*) (add 64, [42, 161, 241, 375, 393, 418, 424]); (*430*) (addcarry 64, [395, 426]); (*431*) (addoverflow 64, [395, 426]); (*432*) (add 64, [237, 430]); (*433*) (addcarry 64, [237, 430]); (*434*) (addoverflow 64, [237, 430]); (*435*) (add 64, [42, 161, 235, 241, 375, 393, 418, 424]); (*436*) (addcarry 64, [235, 429]); (*437*) (addoverflow 64, [235, 429]); (*438*) (add 64, [237, 396, 430, 436]); (*439*) (addcarry 64, [396, 432, 436]); (*440*) (addoverflow 64, [396, 432, 436]); (*441*) (mul 64, [383, 399]); (*442*) (mulZ, [38, 399]); (*443*) (shrZ, [442, 10]); (*444*) (mul 64, [38, 399]); (*445*) (shr 64, [442, 10]); (*446*) (shr 64, [399, 381]); (*447*) (add 64, [22, 83, 89, 163, 363, 385, 387, 400, 441]); (*448*) (addcarry 64, [411, 441]); (*449*) (addoverflow 64, [411, 441]); (*450*) (add 64, [40, 85, 159, 165, 239, 369, 390, 406, 412, 446, 448]); (*451*) (addcarry 64, [423, 446, 448]); (*452*) (addoverflow 64, [423, 446, 448]); (*453*) (add 64, [42, 161, 235, 241, 375, 393, 418, 424, 444, 451]); (*454*) (addcarry 64, [435, 444, 451]); (*455*) (addoverflow 64, [435, 444, 451]); (*456*) (add 64, [237, 396, 430, 436, 445, 454]); (*457*) (addcarry 64, [438, 445, 454]); (*458*) (addoverflow 64, [438, 445, 454]); (*459*) (add 64, [439, 457]); (*460*) (xorZ, [441, 441]); (*461*) (add 64, [16, 22, 83, 89, 163, 363, 385, 387, 400, 441]); (*462*) (addcarry 64, [16, 447]); (*463*) (addoverflow 64, [16, 447]); (*464*) (add 64, [18, 462]); (*465*) (addcarry 64, [18, 462]); (*466*) (addoverflow 64, [18, 462]); (*467*) (add 64, [18, 40, 85, 159, 165, 239, 369, 390, 406, 412, 446, 448, 462]); (*468*) (addcarry 64, [450, 464]); (*469*) (addoverflow 64, [450, 464]); (*470*) (add 64, [81, 468]); (*471*) (addcarry 64, [81, 468]); (*472*) (addoverflow 64, [81, 468]); (*473*) (add 64, [18, 40, 79, 85, 159, 165, 239, 369, 390, 406, 412, 446, 448, 462]); (*474*) (addcarry 64, [79, 467]); (*475*) (addoverflow 64, [79, 467]); (*476*) (add 64, [81, 468, 474]); (*477*) (addcarry 64, [470, 474]); (*478*) (addoverflow 64, [470, 474]); (*479*) (add 64, [42, 81, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474]); (*480*) (addcarry 64, [453, 476]); (*481*) (addoverflow 64, [453, 476]); (*482*) (add 64, [157, 480]); (*483*) (addcarry 64, [157, 480]); (*484*) (addoverflow 64, [157, 480]); (*485*) (add 64, [42, 81, 155, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474]); (*486*) (addcarry 64, [155, 479]); (*487*) (addoverflow 64, [155, 479]); (*488*) (add 64, [157, 480, 486]); (*489*) (addcarry 64, [482, 486]); (*490*) (addoverflow 64, [482, 486]); (*491*) (add 64, [157, 237, 396, 430, 436, 445, 454, 480, 486]); (*492*) (addcarry 64, [456, 488]); (*493*) (addoverflow 64, [456, 488]); (*494*) (add 64, [233, 492]); (*495*) (addcarry 64, [233, 492]); (*496*) (addoverflow 64, [233, 492]); (*497*) (add 64, [157, 231, 237, 396, 430, 436, 445, 454, 480, 486]); (*498*) (addcarry 64, [231, 491]); (*499*) (addoverflow 64, [231, 491]); (*500*) (add 64, [233, 439, 457, 492, 498]); (*501*) (addcarry 64, [459, 494, 498]); (*502*) (addoverflow 64, [459, 494, 498]); (*503*) (mul 64, [383, 461]); (*504*) (mulZ, [38, 461]); (*505*) (shrZ, [504, 10]); (*506*) (mul 64, [38, 461]); (*507*) (shr 64, [504, 10]); (*508*) (shr 64, [461, 381]); (*509*) (add 64, [18, 40, 79, 85, 159, 165, 239, 369, 390, 406, 412, 446, 448, 462, 503]); (*510*) (addcarry 64, [473, 503]); (*511*) (addoverflow 64, [473, 503]); (*512*) (add 64, [42, 81, 155, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474, 508, 510]); (*513*) (addcarry 64, [485, 508, 510]); (*514*) (addoverflow 64, [485, 508, 510]); (*515*) (add 64, [157, 231, 237, 396, 430, 436, 445, 454, 480, 486, 506, 513]); (*516*) (addcarry 64, [497, 506, 513]); (*517*) (addoverflow 64, [497, 506, 513]); (*518*) (add 64, [233, 439, 457, 492, 498, 507, 516]); (*519*) (addcarry 64, [500, 507, 516]); (*520*) (addoverflow 64, [500, 507, 516]); (*521*) (add 64, [501, 519]); (*522*) (xorZ, [503, 503]); (*523*) (add 64, [12, 18, 40, 79, 85, 159, 165, 239, 369, 390, 406, 412, 446, 448, 462, 503]); (*524*) (addcarry 64, [12, 509]); (*525*) (addoverflow 64, [12, 509]); (*526*) (add 64, [14, 524]); (*527*) (addcarry 64, [14, 524]); (*528*) (addoverflow 64, [14, 524]); (*529*) (add 64, [14, 42, 81, 155, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474, 508, 510, 524]); (*530*) (addcarry 64, [512, 526]); (*531*) (addoverflow 64, [512, 526]); (*532*) (add 64, [77, 530]); (*533*) (addcarry 64, [77, 530]); (*534*) (addoverflow 64, [77, 530]); (*535*) (add 64, [14, 42, 75, 81, 155, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474, 508, 510, 524]); (*536*) (addcarry 64, [75, 529]); (*537*) (addoverflow 64, [75, 529]); (*538*) (add 64, [77, 530, 536]); (*539*) (addcarry 64, [532, 536]); (*540*) (addoverflow 64, [532, 536]); (*541*) (add 64, [77, 157, 231, 237, 396, 430, 436, 445, 454, 480, 486, 506, 513, 530, 536]); (*542*) (addcarry 64, [515, 538]); (*543*) (addoverflow 64, [515, 538]); (*544*) (add 64, [153, 542]); (*545*) (addcarry 64, [153, 542]); (*546*) (addoverflow 64, [153, 542]); (*547*) (add 64, [77, 151, 157, 231, 237, 396, 430, 436, 445, 454, 480, 486, 506, 513, 530, 536]); (*548*) (addcarry 64, [151, 541]); (*549*) (addoverflow 64, [151, 541]); (*550*) (add 64, [153, 542, 548]); (*551*) (addcarry 64, [544, 548]); (*552*) (addoverflow 64, [544, 548]); (*553*) (add 64, [153, 233, 439, 457, 492, 498, 507, 516, 542, 548]); (*554*) (addcarry 64, [518, 550]); (*555*) (addoverflow 64, [518, 550]); (*556*) (add 64, [229, 554]); (*557*) (addcarry 64, [229, 554]); (*558*) (addoverflow 64, [229, 554]); (*559*) (add 64, [153, 227, 233, 439, 457, 492, 498, 507, 516, 542, 548]); (*560*) (addcarry 64, [227, 553]); (*561*) (addoverflow 64, [227, 553]); (*562*) (add 64, [229, 501, 519, 554, 560]); (*563*) (addcarry 64, [521, 556, 560]); (*564*) (addoverflow 64, [521, 556, 560]); (*565*) (mul 64, [383, 523]); (*566*) (mulZ, [38, 523]); (*567*) (shrZ, [566, 10]); (*568*) (mul 64, [38, 523]); (*569*) (shr 64, [566, 10]); (*570*) (shr 64, [523, 381]); (*571*) (add 64, [14, 42, 75, 81, 155, 161, 235, 241, 375, 393, 418, 424, 444, 451, 468, 474, 508, 510, 524, 565]); (*572*) (addcarry 64, [535, 565]); (*573*) (addoverflow 64, [535, 565]); (*574*) (add 64, [77, 151, 157, 231, 237, 396, 430, 436, 445, 454, 480, 486, 506, 513, 530, 536, 570, 572]); (*575*) (addcarry 64, [547, 570, 572]); (*576*) (addoverflow 64, [547, 570, 572]); (*577*) (add 64, [153, 227, 233, 439, 457, 492, 498, 507, 516, 542, 548, 568, 575]); (*578*) (addcarry 64, [559, 568, 575]); (*579*) (addoverflow 64, [559, 568, 575]); (*580*) (add 64, [229, 501, 519, 554, 560, 569, 578]); (*581*) (addcarry 64, [562, 569, 578]); (*582*) (addoverflow 64, [562, 569, 578]); (*583*) (add 64, [563, 581]); (*584*) (add 64, [14, 42, 75, 81, 155, 161, 235, 241, 302, 375, 393, 418, 424, 444, 451, 468, 474, 508, 510, 524, 565]); (*585*) (subborrow 64, [571, 48]); (*586*) (neg 64, [585]); (*587*) (add 64, [38, 77, 151, 157, 231, 237, 396, 430, 436, 445, 454, 480, 486, 506, 513, 530, 536, 570, 572, 586]); (*588*) (subborrow 64, [574, 43, 585]); (*589*) (neg 64, [588]); (*590*) (add 64, [153, 227, 233, 439, 457, 492, 498, 507, 516, 542, 548, 568, 575, 589]); (*591*) (subborrow 64, [577, 588]); (*592*) (neg 64, [591]); (*593*) (add 64, [43, 229, 501, 519, 554, 560, 569, 578, 592]); (*594*) (subborrow 64, [580, 38, 591]); (*595*) (neg 64, [594]); (*596*) (add 64, [563, 581, 595]); (*597*) (subborrow 64, [583, 594]); (*598*) (selectznz, [597, 584, 571]); (*599*) (selectznz, [597, 587, 574]); (*600*) (selectznz, [597, 590, 577]); (*601*) (selectznz, [597, 593, 580]); ] ; symbolic_reg_state := [(rax, 568), (rcx, 571), (rdx, 580), (rbx, 577), (rsp, 361), (rbp, 574), (rsi, 346), (rdi, 357), (r8, 600), (r9, 601), (r10, 596), (r11, 565), (r12, 598), (r13, 599), (r14, 43), (r15, 38)]; symbolic_flag_state := (*flag_state*)(CF=Some 597 PF=None AF=None ZF=None SF=None ZF=None OF=None); symbolic_mem_state := [] ; |} Unable to unify: [inr [598, 599, 600, 601]] == [inr [322, 323, 324, 325]] Could not unify the values at index 0: [#598, #599, #600, #601] ≠ [#322, #323, #324, #325] index 0: #598 ≠ #322 (selectznz, [#597, #584, #571]) ≠ (selectznz, [#321, #303, #288]) index 0: #597 ≠ #321 (subborrow 64, [#583, #594]) ≠ (subborrow 64, [#301, #317]) index 0: #583 ≠ #301 (add 64, [#563, #581]) ≠ (add 64, [#267, #299]) index 0: #563 ≠ #267 (addcarry 64, [#521, #556, #560]) ≠ (addcarry 64, [#225, #252, #264]) index 0: #521 ≠ #225 (add 64, [#501, #519]) ≠ (add 64, [#191, #223]) index 0: #501 ≠ #191 (addcarry 64, [#459, #494, #498]) ≠ (addcarry 64, [#149, #176, #188]) index 0: #459 ≠ #149 (add 64, [#439, #457]) ≠ (add 64, [#115, #147]) index 0: #439 ≠ #115 (addcarry 64, [#396, #432, #436]) ≠ (addcarry 64, [#73, #100, #112]) index 0: #396 ≠ #73 (addcarry 64, [#42, #377, #393]) ≠ (addcarry 64, [#37, #42, #70]) index 1: #377 ≠ #37 (add 64, [#241, #375]) ≠ (add 64, [#14, #35]) index 0: #241 ≠ #14 (shr 64, [#238, #10]) ≠ (shr 64, [#11, #10]) index 0: #238 ≠ #11 (mulZ, [#0, #7]) ≠ (mulZ, [#3, #4]) index 0: #0 ≠ #3 (old 64 0, []) ≠ (old 64 3, []) (old 64 0, []) ≠ (old 64 3, []) Operation mismatch: old 64 0 ≠ old 64 3 0 is a special value no longer present in the symbolic machine state at the end of execution. 3 is a special value no longer present in the symbolic machine state at the end of execution. Fatal error: exception Failure("Synthesis failed") ``` --- .../boringssl_intel_manual_mul_p256.asm | 25 ++- fiat-amd64/boringssl_nasm_full_mul_p256.asm | 167 ++++++++++++++++++ 2 files changed, 190 insertions(+), 2 deletions(-) diff --git a/fiat-amd64/boringssl_intel_manual_mul_p256.asm b/fiat-amd64/boringssl_intel_manual_mul_p256.asm index 5316c4c6a51..7c5c6e5256a 100644 --- a/fiat-amd64/boringssl_intel_manual_mul_p256.asm +++ b/fiat-amd64/boringssl_intel_manual_mul_p256.asm @@ -1,4 +1,18 @@ -__ecp_nistz256_mul_montq: +SECTION .text + GLOBAL ecp_nistz256_mul_mont +ecp_nistz256_mul_mont: +;push rbp +;push rbx +;push r12 +;push r13 +;push r14 +;push r15 +mov rbx,rdx +mov rax,QWORD PTR [rbx] +mov r9,QWORD PTR [rsi] +mov r10,QWORD PTR [rsi+0x8] +mov r11,QWORD PTR [rsi+0x10] +mov r12,QWORD PTR [rsi+0x18] mov rbp,rax mul r9 mov r14,0x00000000ffffffff @@ -162,4 +176,11 @@ mov QWORD PTR [rdi+0x8],r13 cmovb r9,rdx mov QWORD PTR [rdi+0x10],r8 mov QWORD PTR [rdi+0x18],r9 -repz ret +;mov r15,QWORD PTR [rsp] +;mov r14,QWORD PTR [rsp+0x8] +;mov r13,QWORD PTR [rsp+0x10] +;mov r12,QWORD PTR [rsp+0x18] +;mov rbx,QWORD PTR [rsp+0x20] +;mov rbp,QWORD PTR [rsp+0x28] +;lea rsp,[rsp+0x30] +ret diff --git a/fiat-amd64/boringssl_nasm_full_mul_p256.asm b/fiat-amd64/boringssl_nasm_full_mul_p256.asm index 13de3447a4f..8a2104a8f23 100644 --- a/fiat-amd64/boringssl_nasm_full_mul_p256.asm +++ b/fiat-amd64/boringssl_nasm_full_mul_p256.asm @@ -298,3 +298,170 @@ __ecp_nistz256_mul_montq: mov QWORD[24+rdi],r9 DB 0F3h,0C3h ;repret + + +ALIGN 32 +__ecp_nistz256_mul_montx: + + + + mulx r9,r8,r9 + mulx r10,rcx,r10 + mov r14,32 + xor r13,r13 + mulx r11,rbp,r11 + mov r15,QWORD[(($L$poly+24))] + adc r9,rcx + mulx r12,rcx,r12 + mov rdx,r8 + adc r10,rbp + shlx rbp,r8,r14 + adc r11,rcx + shrx rcx,r8,r14 + adc r12,0 + + + + add r9,rbp + adc r10,rcx + + mulx rbp,rcx,r15 + mov rdx,QWORD[8+rbx] + adc r11,rcx + adc r12,rbp + adc r13,0 + xor r8,r8 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r9,rcx + adox r10,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r9 + adcx r12,rcx + shlx rcx,r9,r14 + adox r13,rbp + shrx rbp,r9,r14 + + adcx r13,r8 + adox r8,r8 + adc r8,0 + + + + add r10,rcx + adc r11,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD[16+rbx] + adc r12,rcx + adc r13,rbp + adc r8,0 + xor r9,r9 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r10,rcx + adox r11,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r10 + adcx r13,rcx + shlx rcx,r10,r14 + adox r8,rbp + shrx rbp,r10,r14 + + adcx r8,r9 + adox r9,r9 + adc r9,0 + + + + add r11,rcx + adc r12,rbp + + mulx rbp,rcx,r15 + mov rdx,QWORD[24+rbx] + adc r13,rcx + adc r8,rbp + adc r9,0 + xor r10,r10 + + + + mulx rbp,rcx,QWORD[((0+128))+rsi] + adcx r11,rcx + adox r12,rbp + + mulx rbp,rcx,QWORD[((8+128))+rsi] + adcx r12,rcx + adox r13,rbp + + mulx rbp,rcx,QWORD[((16+128))+rsi] + adcx r13,rcx + adox r8,rbp + + mulx rbp,rcx,QWORD[((24+128))+rsi] + mov rdx,r11 + adcx r8,rcx + shlx rcx,r11,r14 + adox r9,rbp + shrx rbp,r11,r14 + + adcx r9,r10 + adox r10,r10 + adc r10,0 + + + + add r12,rcx + adc r13,rbp + + mulx rbp,rcx,r15 + mov rbx,r12 + mov r14,QWORD[(($L$poly+8))] + adc r8,rcx + mov rdx,r13 + adc r9,rbp + adc r10,0 + + + + xor eax,eax + mov rcx,r8 + sbb r12,-1 + sbb r13,r14 + sbb r8,0 + mov rbp,r9 + sbb r9,r15 + sbb r10,0 + + cmovc r12,rbx + cmovc r13,rdx + mov QWORD[rdi],r12 + cmovc r8,rcx + mov QWORD[8+rdi],r13 + cmovc r9,rbp + mov QWORD[16+rdi],r8 + mov QWORD[24+rdi],r9 + + DB 0F3h,0C3h ;repret