Support Nim 2.0.2 (#323)

* nim 2.0.0 & 2.0.2: param should not use _ anymore * nim 2.0.0 & 2.0.2: emit explicit dereferencing of var param unneeded * x86 assembler: generate quoted constraint * nim-v2: rename to noExplicitVarDeref * nim-v2 asm var deref: workaround nim-lang/Nim#23114 * GMP destructors breaking downstream, workaround to double-free: subsetpark/nim-gmp#1 * nim v2: quoted constraints for Intel syntax * nim v2: static-for, enums don't lose their type anymore in macros * threadpool needs explicit zero exception can leak in spawn * enable nim 2.0 branch in CI * nim v2: compatible GMP has not been tagged yet * Readme: remove notice about nim v2 being incompatible with Constantine * nim v2: fix double-and-add type mismatch on 32-bit platforms * nim v2 - windows: don't use the parallel test runner in CI nim-lang/Nim#23118 * Partly revert commit 9243a78, upstream GMP destructors reverted, also ensure all mpz_init ar followed by mpz_clear
mratsim · Dec 23, 2023 · 634063d · 634063d
1 parent 777cf55
commit 634063d
Show file tree

Hide file tree

Showing 27 changed files with 325 additions and 179 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ jobs:
       fail-fast: false
       max-parallel: 20
       matrix:
-        nim_version: [version-1-6] # [version-1-4, devel]
+        nim_version: [version-1-6, version-2-0] # devel
         rust_toolchain: [stable] # [beta, nightly]
         go_toolchain: [stable]
         target:
@@ -261,7 +261,7 @@ jobs:
         run: |
           pacman -S --needed --noconfirm mingw-w64-x86_64-gmp mingw-w64-x86_64-llvm
           nimble refresh --verbose -y
-          nimble install --verbose -y gmp jsony asynctools yaml@1.1.0
+          nimble install --verbose -y gmp@#head jsony asynctools yaml@1.1.0
 
           cd constantine
           go mod download -modfile=go_test.mod
@@ -271,7 +271,7 @@ jobs:
         shell: bash
         run: |
           nimble refresh --verbose -y
-          nimble install --verbose -y gmp jsony asynctools yaml@1.1.0
+          nimble install --verbose -y gmp@#head jsony asynctools yaml@1.1.0
 
           cd constantine
           go mod download -modfile=go_test.mod
@@ -352,16 +352,26 @@ jobs:
       - name: Run Constantine in-depth tests (Windows - no GMP, with Assembly)
         # So "test_bindings" uses C and can find GMP
         # but nim-gmp cannot find GMP on Windows CI
+        # Also need to workaround asynctools not being able to create pipes https://github.com/nim-lang/Nim/issues/23118
         if: runner.os == 'Windows' && matrix.target.BACKEND == 'ASM'
         shell: msys2 {0}
         run: |
           cd constantine
-          nimble test_parallel_no_gmp --verbose
+          if [[ '${{  matrix.nim_version }}' == 'version-2-0' ]]; then
+            nimble test_no_gmp --verbose
+          else
+            nimble test_parallel_no_gmp --verbose
+          fi
       - name: Run Constantine in-depth tests (Windows - no GMP, no Assembly)
         # So "test_bindings" uses C and can find GMP
         # but nim-gmp cannot find GMP on Windows CI
+        # Also need to workaround asynctools not being able to create pipes https://github.com/nim-lang/Nim/issues/23118
         if: runner.os == 'Windows' && matrix.target.BACKEND == 'NO_ASM'
         shell: msys2 {0}
         run: |
           cd constantine
-          CTT_ASM=0 nimble test_parallel_no_gmp --verbose
+          if [[ '${{  matrix.nim_version }}' == 'version-2-0' ]]; then
+            CTT_ASM=0 nimble test_no_gmp --verbose
+          else
+            CTT_ASM=0 nimble test_parallel_no_gmp --verbose
+          fi
diff --git a/README.md b/README.md
@@ -130,11 +130,9 @@ See the following documents on the threadpool performance details, design and re
 
 ## Installation
 
-|                                                                                                                                                           |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| :exclamation: At the moment Nim v2.0 cannot compile Constantine.<br />Use nim v1.6.16 instead or Nim devel.<br />The upcoming Nim v2.0.2 will have a fix. |
-
-
+|                                                                                                                               |
+|:-----------------------------------------------------------------------------------------------------------------------------:|
+| :exclamation: Constantine can be compiled by Nim v1.6.x or v2.0.2 but not Nim v2.0.0 due to a compile-time integer regression |
 
 ### From Rust
 
@@ -151,10 +149,6 @@ See the following documents on the threadpool performance details, design and re
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Test both:
     - the experimental ZK Accel API (ZAL) for Halo2-KZG
     - Ethereum EIP4844 KZG polynomial commitments
@@ -212,10 +206,6 @@ and modify Constantine's [`build.rs`](https://github.com/mratsim/constantine/blo
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Compile Constantine as a static (and shared) library in `./include`
     ```
     cd constantine
@@ -241,10 +231,6 @@ and modify Constantine's [`build.rs`](https://github.com/mratsim/constantine/blo
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Compile the dynamic and static library.
     - Recommended: \
       `CC=clang nimble make_lib`

diff --git a/benchmarks-threadpool/dfs/threadpool_dfs.nim b/benchmarks-threadpool/dfs/threadpool_dfs.nim
@@ -17,7 +17,7 @@ when not defined(windows):
 
 var tp: Threadpool
 
-proc dfs(depth, breadth: int): uint32 {.gcsafe.} =
+proc dfs(depth, breadth: int): uint32 {.gcsafe, raises: [].} =
   if depth == 0:
     return 1
 

diff --git a/benchmarks-threadpool/fibonacci/threadpool_fib.nim b/benchmarks-threadpool/fibonacci/threadpool_fib.nim
@@ -10,7 +10,7 @@ when not defined(windows):
 
 var tp: Threadpool
 
-proc fib(n: int): int =
+proc fib(n: int): int {.gcsafe, raises: [].} =
   # int64 on x86-64
   if n < 2:
     return n

diff --git a/benchmarks-threadpool/heat/threadpool_heat.nim b/benchmarks-threadpool/heat/threadpool_heat.nim
@@ -132,7 +132,7 @@ var
   odd: Matrix[float64]
   even: Matrix[float64]
 
-proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe.}=
+proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe, raises: [].}=
   # TODO to allow awaiting `heat` we return a dummy bool
   # The parallel spawns are updating the same matrix cells otherwise
   if iu - il > 1:
@@ -159,7 +159,7 @@ proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe.}=
       row[j] = f(xu + i*dx, yu + j*dy)
     row[ny - 1] = randb(xu + i*dx, 0)
 
-proc diffuse(output: Matrix[float64], input: Matrix[float64], il, iu: int32, t: float64): bool {.discardable, gcsafe.} =
+proc diffuse(output: Matrix[float64], input: Matrix[float64], il, iu: int32, t: float64): bool {.discardable, gcsafe, raises:[].} =
   # TODO to allow awaiting `diffuse` we return a dummy bool
   # The parallel spawns are updating the same matrix cells otherwise
   if iu - il > 1:

diff --git a/benchmarks-threadpool/nqueens/threadpool_nqueens.nim b/benchmarks-threadpool/nqueens/threadpool_nqueens.nim
@@ -111,7 +111,7 @@ proc nqueens_ser(n, j: int32, a: CharArray): int32 =
     if isValid(j+1, a):
       result += nqueens_ser(n, j+1, a)
 
-proc nqueens_par(n, j: int32, a: CharArray): int32 {.gcsafe.} =
+proc nqueens_par(n, j: int32, a: CharArray): int32 {.gcsafe, raises:[].} =
 
   if n == j:
     # Good solution, count it

diff --git a/benchmarks/bench_gmp_modexp.nim b/benchmarks/bench_gmp_modexp.nim
@@ -103,6 +103,11 @@ for i in 0 ..< 5:
     echo "  r GMP:               ", r.toHex()
     echo "  elapsed GMP:         ", elapsedGMP, " ns"
 
+    mpz_clear(rr)
+    mpz_clear(mm)
+    mpz_clear(ee)
+    mpz_clear(aa)
+
   # echo &"\n  ratio Stint/Constantine: {float64(elapsedStint)/float64(elapsedCtt):.3f}x"
   echo &"  ratio GMP/Constantine: {float64(elapsedGMP)/float64(elapsedCtt):.3f}x"
   echo "---------------------------------------------------------"
diff --git a/benchmarks/bench_gmp_modmul.nim b/benchmarks/bench_gmp_modmul.nim
@@ -61,6 +61,11 @@ proc main() =
   mpz_init(rMod)
   mpz_init(a)
   mpz_init(b)
+  defer:
+    mpz_clear(b)
+    mpz_clear(a)
+    mpz_clear(rMod)
+    mpz_clear(r)
 
   testSizes(rBits, aBits, bBits):
     # echo "--------------------------------------------------------------------------------"

diff --git a/constantine.nimble b/constantine.nimble
@@ -876,6 +876,12 @@ task test_nvidia, "Run all tests for Nvidia GPUs":
 task bench_powmod, "Run modular exponentiation benchmark with your CC compiler":
   runBench("bench_powmod")
 
+task bench_gmp_modmul, "Run modular multiplication benchmarks vs GMP":
+  runBench("bench_gmp_modmul")
+
+task bench_gmp_modexp, "Run modular exponentiation benchmarks vs GMP":
+  runBench("bench_gmp_modexp")
+
 # Finite field 𝔽p
 # ------------------------------------------
 

diff --git a/constantine/mac/mac_poly1305.nim b/constantine/mac/mac_poly1305.nim
@@ -322,7 +322,7 @@ func clear*(ctx: var Poly1305_CTX) =
   ctx.bufIdx = 0
 
 func mac*(
-       _: type poly1305,
+       T: type poly1305,
        tag: var array[16, byte],
        message: openArray[byte],
        nonReusedKey: array[32, byte],
@@ -339,7 +339,7 @@ func mac*(
     ctx.clear()
 
 func mac*(
-       _: type poly1305,
+       T: type poly1305,
        message: openArray[byte],
        nonReusedKey: array[32, byte],
        clearMem = false): array[16, byte]{.noInit, genCharAPI.}=

diff --git a/constantine/math/arithmetic/limbs_montgomery.nim b/constantine/math/arithmetic/limbs_montgomery.nim
@@ -212,7 +212,7 @@ func mulMont_CIOS_sparebit(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipF
     discard t.csub(M, not(t < M))
   r = t
 
-func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} =
+func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) {.used.} =
   ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS)
   # - Analyzing and Comparing Montgomery Multiplication Algorithms
   #   Cetin Kaya Koc and Tolga Acar and Burton S. Kaliski Jr.
@@ -257,7 +257,8 @@ func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} =
   # t[N+1] can only be non-zero in the intermediate computation
   # since it is immediately reduce to t[N] at the end of each "i" iteration
   # However if t[N] is non-zero we have t > M
-  discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)ʷ
+  when not skipFinalSub:
+    discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)ʷ
   r = t
 
 func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) =
@@ -721,7 +722,7 @@ func powMontSquarings(
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring

diff --git a/constantine/math/elliptic/ec_scalar_mul.nim b/constantine/math/elliptic/ec_scalar_mul.nim
@@ -117,7 +117,7 @@ func scalarMulDoubling[EC](
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k doublings

diff --git a/constantine/math/extension_fields/exponentiations.nim b/constantine/math/extension_fields/exponentiations.nim
@@ -92,7 +92,7 @@ func powSquarings[F](
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring

diff --git a/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim b/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim
@@ -282,7 +282,7 @@ func powMontSquarings(
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring

diff --git a/constantine/platforms/constant_time/multiplexers.nim b/constantine/platforms/constant_time/multiplexers.nim
@@ -53,7 +53,7 @@ func ccopy_fallback[T](ctl: CTBool[T], x: var T, y: T) {.inline.}=
 
 const
   nim_v2 = (NimMajor, NimMinor) > (1, 6)
-  noExplicitPtrDeref = defined(cpp) or nim_v2
+  noExplicitVarDeref = defined(cpp) or nim_v2
 
 template mux_x86_impl() {.dirty.} =
   static: doAssert(X86)
@@ -111,58 +111,73 @@ func ccopy_x86[T](ctl: CTBool[T], x: var T, y: T) {.inline.}=
   static: doAssert(X86)
   static: doAssert(GCC_Compatible)
 
+  # Due to https://github.com/nim-lang/Nim/issues/23114
+  # We don't use asm statement with `var` param
+
   when UseAsmSyntaxIntel:
-    when noExplicitPtrDeref:
-      asm """
-        test %[ctl], %[ctl]
-        cmovnz %[x], %[y]
-        : [x] "+r" (`x`)
-        : [ctl] "r" (`ctl`), [y] "r" (`y`)
-        : "cc"
-      """
+    when noExplicitVarDeref:
+      {.emit:[
+        """
+        asm volatile(
+          "test %[ctl], %[ctl]\n"
+          "cmovnz %[x], %[y]\n"
+          : [x] "+r" (""", x, """)
+          : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+          : "cc"
+        );"""].}
     else:
-      asm """
-        test %[ctl], %[ctl]
-        cmovnz %[x], %[y]
-        : [x] "+r" (*`x`)
-        : [ctl] "r" (`ctl`), [y] "r" (`y`)
-        : "cc"
-      """
+      {.emit:[
+        """
+        asm volatile(
+          "test %[ctl], %[ctl]\n"
+          "cmovnz %[x], %[y]\n"
+          : [x] "+r" (*""", x, """)
+          : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+          : "cc"
+        );"""].}
   else:
     when sizeof(T) == 8:
-      when noExplicitPtrDeref:
-        asm """
-          testq %[ctl], %[ctl]
-          cmovnzq %[y], %[x]
-          : [x] "+r" (`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+      when noExplicitVarDeref:
+        {.emit:[
+          """
+          asm volatile(
+            "testq %[ctl], %[ctl]\n"
+            "cmovnzq %[y], %[x]\n"
+            : [x] "+r" (""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
       else:
-        asm """
-          testq %[ctl], %[ctl]
-          cmovnzq %[y], %[x]
-          : [x] "+r" (*`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+        {.emit:[
+          """
+          asm volatile(
+            "testq %[ctl], %[ctl]\n"
+            "cmovnzq %[y], %[x]\n"
+            : [x] "+r" (*""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
     else:
-      when noExplicitPtrDeref:
-        asm """
-          testl %[ctl], %[ctl]
-          cmovnzl %[y], %[x]
-          : [x] "+r" (`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+      when noExplicitVarDeref:
+        {.emit:[
+          """
+          asm volatile(
+            "testl %[ctl], %[ctl]\n"
+            "cmovnzl %[y], %[x]\n"
+            : [x] "+r" (""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
       else:
-        asm """
-          testl %[ctl], %[ctl]
-          cmovnzl %[y], %[x]
-          : [x] "+r" (*`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+        {.emit:[
+          """
+          asm volatile(
+            "testl %[ctl], %[ctl]\n"
+            "cmovnzl %[y], %[x]\n"
+            : [x] "+r" (*""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
 
 # Public functions
 # ------------------------------------------------------------