From 634063d88c0dd37cc45030740dc6cf5216a6ea3e Mon Sep 17 00:00:00 2001
From: Mamy Ratsimbazafy <mamy_github@numforge.co>
Date: Sat, 23 Dec 2023 19:33:25 +0100
Subject: [PATCH] Support Nim 2.0.2 (#323)

* nim 2.0.0 & 2.0.2: param should not use _ anymore

* nim 2.0.0 & 2.0.2: emit explicit dereferencing of var param unneeded

* x86 assembler: generate quoted constraint

* nim-v2: rename to noExplicitVarDeref

* nim-v2 asm var deref: workaround https://github.com/nim-lang/Nim/issues/23114

* GMP destructors breaking downstream, workaround to double-free: https://github.com/subsetpark/nim-gmp/pull/1

* nim v2: quoted constraints for Intel syntax

* nim v2: static-for, enums don't lose their type anymore in macros

* threadpool needs explicit zero exception can leak in spawn

* enable nim 2.0 branch in CI

* nim v2: compatible GMP has not been tagged yet

* Readme: remove notice about nim v2 being incompatible with Constantine

* nim v2: fix double-and-add type mismatch on 32-bit platforms

* nim v2 - windows: don't use the parallel test runner in CI https://github.com/nim-lang/Nim/issues/23118

* Partly revert commit 9243a784554f20cb338e2b71484fd3d3cbaf88e2, upstream GMP destructors reverted, also ensure all mpz_init ar followed by mpz_clear
---
 .github/workflows/ci.yml                      |  20 ++-
 README.md                                     |  20 +--
 benchmarks-threadpool/dfs/threadpool_dfs.nim  |   2 +-
 .../fibonacci/threadpool_fib.nim              |   2 +-
 .../heat/threadpool_heat.nim                  |   4 +-
 .../nqueens/threadpool_nqueens.nim            |   2 +-
 benchmarks/bench_gmp_modexp.nim               |   5 +
 benchmarks/bench_gmp_modmul.nim               |   5 +
 constantine.nimble                            |   6 +
 constantine/mac/mac_poly1305.nim              |   4 +-
 .../math/arithmetic/limbs_montgomery.nim      |   7 +-
 constantine/math/elliptic/ec_scalar_mul.nim   |   2 +-
 .../math/extension_fields/exponentiations.nim |   2 +-
 .../arithmetic/limbs_montgomery.nim           |   2 +-
 .../platforms/constant_time/multiplexers.nim  | 107 +++++++++-------
 .../intrinsics/addcarry_subborrow.nim         |  17 ++-
 .../extended_precision_64bit_uint128.nim      |  17 +--
 .../intrinsics/extended_precision_vartime.nim |   4 +-
 .../platforms/isa/macro_assembler_x86_att.nim | 114 ++++++++++++------
 .../isa/macro_assembler_x86_intel.nim         | 114 ++++++++++++------
 constantine/platforms/static_for.nim          |  10 +-
 examples-c/t_libctt_bls12_381.c               |   5 +
 examples-threadpool/e01_simple_tasks.nim      |   2 +-
 .../t_bigints_mod_vs_gmp.nim                  |   4 +-
 .../t_bigints_powmod_vs_gmp.nim               |   6 +-
 .../math_fields/t_finite_fields_mulsquare.nim |  16 ++-
 tests/math_fields/t_finite_fields_vs_gmp.nim  |   5 +
 27 files changed, 325 insertions(+), 179 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 041c55c9..e502f8df 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ jobs:
       fail-fast: false
       max-parallel: 20
       matrix:
-        nim_version: [version-1-6] # [version-1-4, devel]
+        nim_version: [version-1-6, version-2-0] # devel
         rust_toolchain: [stable] # [beta, nightly]
         go_toolchain: [stable]
         target:
@@ -261,7 +261,7 @@ jobs:
         run: |
           pacman -S --needed --noconfirm mingw-w64-x86_64-gmp mingw-w64-x86_64-llvm
           nimble refresh --verbose -y
-          nimble install --verbose -y gmp jsony asynctools yaml@1.1.0
+          nimble install --verbose -y gmp@#head jsony asynctools yaml@1.1.0
 
           cd constantine
           go mod download -modfile=go_test.mod
@@ -271,7 +271,7 @@ jobs:
         shell: bash
         run: |
           nimble refresh --verbose -y
-          nimble install --verbose -y gmp jsony asynctools yaml@1.1.0
+          nimble install --verbose -y gmp@#head jsony asynctools yaml@1.1.0
 
           cd constantine
           go mod download -modfile=go_test.mod
@@ -352,16 +352,26 @@ jobs:
       - name: Run Constantine in-depth tests (Windows - no GMP, with Assembly)
         # So "test_bindings" uses C and can find GMP
         # but nim-gmp cannot find GMP on Windows CI
+        # Also need to workaround asynctools not being able to create pipes https://github.com/nim-lang/Nim/issues/23118
         if: runner.os == 'Windows' && matrix.target.BACKEND == 'ASM'
         shell: msys2 {0}
         run: |
           cd constantine
-          nimble test_parallel_no_gmp --verbose
+          if [[ '${{  matrix.nim_version }}' == 'version-2-0' ]]; then
+            nimble test_no_gmp --verbose
+          else
+            nimble test_parallel_no_gmp --verbose
+          fi
       - name: Run Constantine in-depth tests (Windows - no GMP, no Assembly)
         # So "test_bindings" uses C and can find GMP
         # but nim-gmp cannot find GMP on Windows CI
+        # Also need to workaround asynctools not being able to create pipes https://github.com/nim-lang/Nim/issues/23118
         if: runner.os == 'Windows' && matrix.target.BACKEND == 'NO_ASM'
         shell: msys2 {0}
         run: |
           cd constantine
-          CTT_ASM=0 nimble test_parallel_no_gmp --verbose
+          if [[ '${{  matrix.nim_version }}' == 'version-2-0' ]]; then
+            CTT_ASM=0 nimble test_no_gmp --verbose
+          else
+            CTT_ASM=0 nimble test_parallel_no_gmp --verbose
+          fi
\ No newline at end of file
diff --git a/README.md b/README.md
index 647c4367..16b57edf 100644
--- a/README.md
+++ b/README.md
@@ -130,11 +130,9 @@ See the following documents on the threadpool performance details, design and re
 
 ## Installation
 
-|                                                                                                                                                           |
-|:---------------------------------------------------------------------------------------------------------------------------------------------------------:|
-| :exclamation: At the moment Nim v2.0 cannot compile Constantine.<br />Use nim v1.6.16 instead or Nim devel.<br />The upcoming Nim v2.0.2 will have a fix. |
-
-
+|                                                                                                                               |
+|:-----------------------------------------------------------------------------------------------------------------------------:|
+| :exclamation: Constantine can be compiled by Nim v1.6.x or v2.0.2 but not Nim v2.0.0 due to a compile-time integer regression |
 
 ### From Rust
 
@@ -151,10 +149,6 @@ See the following documents on the threadpool performance details, design and re
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Test both:
     - the experimental ZK Accel API (ZAL) for Halo2-KZG
     - Ethereum EIP4844 KZG polynomial commitments
@@ -212,10 +206,6 @@ and modify Constantine's [`build.rs`](https://github.com/mratsim/constantine/blo
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Compile Constantine as a static (and shared) library in `./include`
     ```
     cd constantine
@@ -241,10 +231,6 @@ and modify Constantine's [`build.rs`](https://github.com/mratsim/constantine/blo
     - Debian/Ubuntu `sudo apt install nim`
     - Archlinux `pacman -S nim`
 
-    |                                                                                                                                       |
-    |---------------------------------------------------------------------------------------------------------------------------------------|
-    | :exclamation:  Until the Nim v2 situation is solved, you can use https://github.com/dom96/choosenim to install nim v1.6.16 toolchain. |
-
 3. Compile the dynamic and static library.
     - Recommended: \
       `CC=clang nimble make_lib`
diff --git a/benchmarks-threadpool/dfs/threadpool_dfs.nim b/benchmarks-threadpool/dfs/threadpool_dfs.nim
index 2c49933c..173ecc2b 100644
--- a/benchmarks-threadpool/dfs/threadpool_dfs.nim
+++ b/benchmarks-threadpool/dfs/threadpool_dfs.nim
@@ -17,7 +17,7 @@ when not defined(windows):
 
 var tp: Threadpool
 
-proc dfs(depth, breadth: int): uint32 {.gcsafe.} =
+proc dfs(depth, breadth: int): uint32 {.gcsafe, raises: [].} =
   if depth == 0:
     return 1
 
diff --git a/benchmarks-threadpool/fibonacci/threadpool_fib.nim b/benchmarks-threadpool/fibonacci/threadpool_fib.nim
index 5c9c7b75..3592a934 100644
--- a/benchmarks-threadpool/fibonacci/threadpool_fib.nim
+++ b/benchmarks-threadpool/fibonacci/threadpool_fib.nim
@@ -10,7 +10,7 @@ when not defined(windows):
 
 var tp: Threadpool
 
-proc fib(n: int): int =
+proc fib(n: int): int {.gcsafe, raises: [].} =
   # int64 on x86-64
   if n < 2:
     return n
diff --git a/benchmarks-threadpool/heat/threadpool_heat.nim b/benchmarks-threadpool/heat/threadpool_heat.nim
index 686ee9e0..d32a9187 100644
--- a/benchmarks-threadpool/heat/threadpool_heat.nim
+++ b/benchmarks-threadpool/heat/threadpool_heat.nim
@@ -132,7 +132,7 @@ var
   odd: Matrix[float64]
   even: Matrix[float64]
 
-proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe.}=
+proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe, raises: [].}=
   # TODO to allow awaiting `heat` we return a dummy bool
   # The parallel spawns are updating the same matrix cells otherwise
   if iu - il > 1:
@@ -159,7 +159,7 @@ proc heat(m: Matrix[float64], il, iu: int32): bool {.discardable, gcsafe.}=
       row[j] = f(xu + i*dx, yu + j*dy)
     row[ny - 1] = randb(xu + i*dx, 0)
 
-proc diffuse(output: Matrix[float64], input: Matrix[float64], il, iu: int32, t: float64): bool {.discardable, gcsafe.} =
+proc diffuse(output: Matrix[float64], input: Matrix[float64], il, iu: int32, t: float64): bool {.discardable, gcsafe, raises:[].} =
   # TODO to allow awaiting `diffuse` we return a dummy bool
   # The parallel spawns are updating the same matrix cells otherwise
   if iu - il > 1:
diff --git a/benchmarks-threadpool/nqueens/threadpool_nqueens.nim b/benchmarks-threadpool/nqueens/threadpool_nqueens.nim
index e4fabad8..f975872a 100644
--- a/benchmarks-threadpool/nqueens/threadpool_nqueens.nim
+++ b/benchmarks-threadpool/nqueens/threadpool_nqueens.nim
@@ -111,7 +111,7 @@ proc nqueens_ser(n, j: int32, a: CharArray): int32 =
     if isValid(j+1, a):
       result += nqueens_ser(n, j+1, a)
 
-proc nqueens_par(n, j: int32, a: CharArray): int32 {.gcsafe.} =
+proc nqueens_par(n, j: int32, a: CharArray): int32 {.gcsafe, raises:[].} =
 
   if n == j:
     # Good solution, count it
diff --git a/benchmarks/bench_gmp_modexp.nim b/benchmarks/bench_gmp_modexp.nim
index 341f81a6..880ebcd5 100644
--- a/benchmarks/bench_gmp_modexp.nim
+++ b/benchmarks/bench_gmp_modexp.nim
@@ -103,6 +103,11 @@ for i in 0 ..< 5:
     echo "  r GMP:               ", r.toHex()
     echo "  elapsed GMP:         ", elapsedGMP, " ns"
 
+    mpz_clear(rr)
+    mpz_clear(mm)
+    mpz_clear(ee)
+    mpz_clear(aa)
+
   # echo &"\n  ratio Stint/Constantine: {float64(elapsedStint)/float64(elapsedCtt):.3f}x"
   echo &"  ratio GMP/Constantine: {float64(elapsedGMP)/float64(elapsedCtt):.3f}x"
   echo "---------------------------------------------------------"
\ No newline at end of file
diff --git a/benchmarks/bench_gmp_modmul.nim b/benchmarks/bench_gmp_modmul.nim
index a4617f75..73e61665 100644
--- a/benchmarks/bench_gmp_modmul.nim
+++ b/benchmarks/bench_gmp_modmul.nim
@@ -61,6 +61,11 @@ proc main() =
   mpz_init(rMod)
   mpz_init(a)
   mpz_init(b)
+  defer:
+    mpz_clear(b)
+    mpz_clear(a)
+    mpz_clear(rMod)
+    mpz_clear(r)
 
   testSizes(rBits, aBits, bBits):
     # echo "--------------------------------------------------------------------------------"
diff --git a/constantine.nimble b/constantine.nimble
index 820daa5f..fd007509 100644
--- a/constantine.nimble
+++ b/constantine.nimble
@@ -876,6 +876,12 @@ task test_nvidia, "Run all tests for Nvidia GPUs":
 task bench_powmod, "Run modular exponentiation benchmark with your CC compiler":
   runBench("bench_powmod")
 
+task bench_gmp_modmul, "Run modular multiplication benchmarks vs GMP":
+  runBench("bench_gmp_modmul")
+
+task bench_gmp_modexp, "Run modular exponentiation benchmarks vs GMP":
+  runBench("bench_gmp_modexp")
+
 # Finite field 𝔽p
 # ------------------------------------------
 
diff --git a/constantine/mac/mac_poly1305.nim b/constantine/mac/mac_poly1305.nim
index 39ba542b..4839b6c0 100644
--- a/constantine/mac/mac_poly1305.nim
+++ b/constantine/mac/mac_poly1305.nim
@@ -322,7 +322,7 @@ func clear*(ctx: var Poly1305_CTX) =
   ctx.bufIdx = 0
 
 func mac*(
-       _: type poly1305,
+       T: type poly1305,
        tag: var array[16, byte],
        message: openArray[byte],
        nonReusedKey: array[32, byte],
@@ -339,7 +339,7 @@ func mac*(
     ctx.clear()
 
 func mac*(
-       _: type poly1305,
+       T: type poly1305,
        message: openArray[byte],
        nonReusedKey: array[32, byte],
        clearMem = false): array[16, byte]{.noInit, genCharAPI.}=
diff --git a/constantine/math/arithmetic/limbs_montgomery.nim b/constantine/math/arithmetic/limbs_montgomery.nim
index 9a651466..15049589 100644
--- a/constantine/math/arithmetic/limbs_montgomery.nim
+++ b/constantine/math/arithmetic/limbs_montgomery.nim
@@ -212,7 +212,7 @@ func mulMont_CIOS_sparebit(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipF
     discard t.csub(M, not(t < M))
   r = t
 
-func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} =
+func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) {.used.} =
   ## Montgomery Multiplication using Coarse Grained Operand Scanning (CIOS)
   # - Analyzing and Comparing Montgomery Multiplication Algorithms
   #   Cetin Kaya Koc and Tolga Acar and Burton S. Kaliski Jr.
@@ -257,7 +257,8 @@ func mulMont_CIOS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType) {.used.} =
   # t[N+1] can only be non-zero in the intermediate computation
   # since it is immediately reduce to t[N] at the end of each "i" iteration
   # However if t[N] is non-zero we have t > M
-  discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)ʷ
+  when not skipFinalSub:
+    discard t.csub(M, tN.isNonZero() or not(t < M)) # TODO: (t >= M) is unnecessary for prime in the form (2^64)ʷ
   r = t
 
 func mulMont_FIPS(r: var Limbs, a, b, M: Limbs, m0ninv: BaseType, skipFinalSub: static bool = false) =
@@ -721,7 +722,7 @@ func powMontSquarings(
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring
diff --git a/constantine/math/elliptic/ec_scalar_mul.nim b/constantine/math/elliptic/ec_scalar_mul.nim
index de8fddee..a13e2822 100644
--- a/constantine/math/elliptic/ec_scalar_mul.nim
+++ b/constantine/math/elliptic/ec_scalar_mul.nim
@@ -117,7 +117,7 @@ func scalarMulDoubling[EC](
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k doublings
diff --git a/constantine/math/extension_fields/exponentiations.nim b/constantine/math/extension_fields/exponentiations.nim
index be9dfbc1..a9986b3b 100644
--- a/constantine/math/extension_fields/exponentiations.nim
+++ b/constantine/math/extension_fields/exponentiations.nim
@@ -92,7 +92,7 @@ func powSquarings[F](
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring
diff --git a/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim b/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim
index 4e9d47a4..05f85fab 100644
--- a/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim
+++ b/constantine/math_arbitrary_precision/arithmetic/limbs_montgomery.nim
@@ -282,7 +282,7 @@ func powMontSquarings(
     else: # Drained all exponent bits
       k = acc_len
 
-  let bits = (acc shr (acc_len - k)) and ((1'u32 shl k) - 1)
+  let bits = (acc shr (acc_len - k)) and ((1'u shl k) - 1)
   acc_len -= k
 
   # We have k bits and can do k squaring
diff --git a/constantine/platforms/constant_time/multiplexers.nim b/constantine/platforms/constant_time/multiplexers.nim
index c90a1c0f..27640ea3 100644
--- a/constantine/platforms/constant_time/multiplexers.nim
+++ b/constantine/platforms/constant_time/multiplexers.nim
@@ -53,7 +53,7 @@ func ccopy_fallback[T](ctl: CTBool[T], x: var T, y: T) {.inline.}=
 
 const
   nim_v2 = (NimMajor, NimMinor) > (1, 6)
-  noExplicitPtrDeref = defined(cpp) or nim_v2
+  noExplicitVarDeref = defined(cpp) or nim_v2
 
 template mux_x86_impl() {.dirty.} =
   static: doAssert(X86)
@@ -111,58 +111,73 @@ func ccopy_x86[T](ctl: CTBool[T], x: var T, y: T) {.inline.}=
   static: doAssert(X86)
   static: doAssert(GCC_Compatible)
 
+  # Due to https://github.com/nim-lang/Nim/issues/23114
+  # We don't use asm statement with `var` param
+
   when UseAsmSyntaxIntel:
-    when noExplicitPtrDeref:
-      asm """
-        test %[ctl], %[ctl]
-        cmovnz %[x], %[y]
-        : [x] "+r" (`x`)
-        : [ctl] "r" (`ctl`), [y] "r" (`y`)
-        : "cc"
-      """
+    when noExplicitVarDeref:
+      {.emit:[
+        """
+        asm volatile(
+          "test %[ctl], %[ctl]\n"
+          "cmovnz %[x], %[y]\n"
+          : [x] "+r" (""", x, """)
+          : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+          : "cc"
+        );"""].}
     else:
-      asm """
-        test %[ctl], %[ctl]
-        cmovnz %[x], %[y]
-        : [x] "+r" (*`x`)
-        : [ctl] "r" (`ctl`), [y] "r" (`y`)
-        : "cc"
-      """
+      {.emit:[
+        """
+        asm volatile(
+          "test %[ctl], %[ctl]\n"
+          "cmovnz %[x], %[y]\n"
+          : [x] "+r" (*""", x, """)
+          : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+          : "cc"
+        );"""].}
   else:
     when sizeof(T) == 8:
-      when noExplicitPtrDeref:
-        asm """
-          testq %[ctl], %[ctl]
-          cmovnzq %[y], %[x]
-          : [x] "+r" (`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+      when noExplicitVarDeref:
+        {.emit:[
+          """
+          asm volatile(
+            "testq %[ctl], %[ctl]\n"
+            "cmovnzq %[y], %[x]\n"
+            : [x] "+r" (""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
       else:
-        asm """
-          testq %[ctl], %[ctl]
-          cmovnzq %[y], %[x]
-          : [x] "+r" (*`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+        {.emit:[
+          """
+          asm volatile(
+            "testq %[ctl], %[ctl]\n"
+            "cmovnzq %[y], %[x]\n"
+            : [x] "+r" (*""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
     else:
-      when noExplicitPtrDeref:
-        asm """
-          testl %[ctl], %[ctl]
-          cmovnzl %[y], %[x]
-          : [x] "+r" (`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+      when noExplicitVarDeref:
+        {.emit:[
+          """
+          asm volatile(
+            "testl %[ctl], %[ctl]\n"
+            "cmovnzl %[y], %[x]\n"
+            : [x] "+r" (""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
       else:
-        asm """
-          testl %[ctl], %[ctl]
-          cmovnzl %[y], %[x]
-          : [x] "+r" (*`x`)
-          : [ctl] "r" (`ctl`), [y] "r" (`y`)
-          : "cc"
-        """
+        {.emit:[
+          """
+          asm volatile(
+            "testl %[ctl], %[ctl]\n"
+            "cmovnzl %[y], %[x]\n"
+            : [x] "+r" (*""", x, """)
+            : [ctl] "r" (""", ctl, """), [y] "r" (""", y, """)
+            : "cc"
+          );"""].}
 
 # Public functions
 # ------------------------------------------------------------
diff --git a/constantine/platforms/intrinsics/addcarry_subborrow.nim b/constantine/platforms/intrinsics/addcarry_subborrow.nim
index 8109550c..4feb8d1c 100644
--- a/constantine/platforms/intrinsics/addcarry_subborrow.nim
+++ b/constantine/platforms/intrinsics/addcarry_subborrow.nim
@@ -27,7 +27,10 @@ import
 # On other CPU architectures inline assembly might be desirable.
 # A compiler proof-of-concept is available in the "research" folder.
 #
-# See https://gcc.godbolt.org/z/2h768y
+# See https://gcc.godbolt.org/z/2h768y (Mar 2020 compilers)
+#     https://gcc.godbolt.org/z/WP38PzsMs
+#    (Dec 2023 compilers improved but __builtin_addcll leads to very poor GCC codegen)
+#
 # ```C
 # #include <stdint.h>
 # #include <x86intrin.h>
@@ -103,6 +106,11 @@ when X86:
 #
 # ############################################################
 
+const
+  nim_v2 = (NimMajor, NimMinor) > (1, 6)
+  noExplicitVarDeref {.used.} = defined(cpp) or nim_v2
+    ## In C++ mode or with Nim v2, emit of `var` params is auto-deref by Nim.
+
 func addC*(cOut: var Carry, sum: var Ct[uint32], a, b: Ct[uint32], cIn: Carry) {.inline.} =
   ## Addition with carry
   ## (CarryOut, Sum) <- a + b + CarryIn
@@ -138,8 +146,7 @@ func addC*(cOut: var Carry, sum: var Ct[uint64], a, b: Ct[uint64], cIn: Carry) {
       var dblPrec {.noInit.}: uint128
       {.emit:[dblPrec, " = (unsigned __int128)", a," + (unsigned __int128)", b, " + (unsigned __int128)",cIn,";"].}
 
-      # Don't forget to dereference the var param in C mode
-      when defined(cpp):
+      when noExplicitVarDeref:
         {.emit:[cOut, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
         {.emit:[sum, " = (NU64)", dblPrec,";"].}
       else:
@@ -160,9 +167,7 @@ func subB*(bOut: var Borrow, diff: var Ct[uint64], a, b: Ct[uint64], bIn: Borrow
       var dblPrec {.noInit.}: uint128
       {.emit:[dblPrec, " = (unsigned __int128)", a," - (unsigned __int128)", b, " - (unsigned __int128)",bIn,";"].}
 
-      # Don't forget to dereference the var param in C mode
-      # On borrow the high word will be 0b1111...1111 and needs to be masked
-      when defined(cpp):
+      when noExplicitVarDeref:
         {.emit:[bOut, " = (NU64)(", dblPrec," >> ", 64'u64, ") & 1;"].}
         {.emit:[diff, " = (NU64)", dblPrec,";"].}
       else:
diff --git a/constantine/platforms/intrinsics/extended_precision_64bit_uint128.nim b/constantine/platforms/intrinsics/extended_precision_64bit_uint128.nim
index 53894be6..345acd8f 100644
--- a/constantine/platforms/intrinsics/extended_precision_64bit_uint128.nim
+++ b/constantine/platforms/intrinsics/extended_precision_64bit_uint128.nim
@@ -20,6 +20,11 @@ static:
   doAssert GCC_Compatible
   doAssert sizeof(int) == 8
 
+const
+  nim_v2 = (NimMajor, NimMinor) > (1, 6)
+  noExplicitVarDeref {.used.} = defined(cpp) or nim_v2
+    ## In C++ mode or with Nim v2, emit of `var` params is auto-deref by Nim.
+
 func mul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} =
   ## Extended precision multiplication
   ## (hi, lo) <- a*b
@@ -30,8 +35,7 @@ func mul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} =
     var dblPrec {.noInit.}: uint128
     {.emit:[dblPrec, " = (unsigned __int128)", a," * (unsigned __int128)", b,";"].}
 
-    # Don't forget to dereference the var param in C mode
-    when defined(cpp):
+    when noExplicitVarDeref:
       {.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
       {.emit:[lo, " = (NU64)", dblPrec,";"].}
     else:
@@ -51,8 +55,7 @@ func muladd1*(hi, lo: var Ct[uint64], a, b, c: Ct[uint64]) {.inline.} =
     var dblPrec {.noInit.}: uint128
     {.emit:[dblPrec, " = (unsigned __int128)", a," * (unsigned __int128)", b, " + (unsigned __int128)",c,";"].}
 
-    # Don't forget to dereference the var param in C mode
-    when defined(cpp):
+    when noExplicitVarDeref:
       {.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
       {.emit:[lo, " = (NU64)", dblPrec,";"].}
     else:
@@ -74,8 +77,7 @@ func muladd2*(hi, lo: var Ct[uint64], a, b, c1, c2: Ct[uint64]) {.inline.}=
                " + (unsigned __int128)",c1," + (unsigned __int128)",c2,";"
     ].}
 
-    # Don't forget to dereference the var param in C mode
-    when defined(cpp):
+    when noExplicitVarDeref:
       {.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
       {.emit:[lo, " = (NU64)", dblPrec,";"].}
     else:
@@ -96,8 +98,7 @@ func smul*(hi, lo: var Ct[uint64], a, b: Ct[uint64]) {.inline.} =
     # We need to cast to int64 then sign-extended to int128
     {.emit:[dblPrec, " = (__int128)", cast[int64](a)," * (__int128)", cast[int64](b),";"].}
 
-    # Don't forget to dereference the var param in C mode
-    when defined(cpp):
+    when noExplicitVarDeref:
       {.emit:[hi, " = (NU64)(", dblPrec," >> ", 64'u64, ");"].}
       {.emit:[lo, " = (NU64)", dblPrec,";"].}
     else:
diff --git a/constantine/platforms/intrinsics/extended_precision_vartime.nim b/constantine/platforms/intrinsics/extended_precision_vartime.nim
index 24a34251..e215836a 100644
--- a/constantine/platforms/intrinsics/extended_precision_vartime.nim
+++ b/constantine/platforms/intrinsics/extended_precision_vartime.nim
@@ -80,7 +80,7 @@ elif not(CTT_32) and GCC_Compatible:
 
   const
     newerNim = (NimMajor, NimMinor) > (1, 6)
-    noExplicitPtrDeref = defined(cpp) or newerNim
+    noExplicitVarDeref = defined(cpp) or newerNim
 
   func div2n1n_128_vartime(q, r: var uint64, n_hi, n_lo, d: uint64) {.inline, tags:[VarTime].}=
     ## Division uint128 by uint64
@@ -91,7 +91,7 @@ elif not(CTT_32) and GCC_Compatible:
     {.emit:[dblPrec, " = (unsigned __int128)", n_hi," << 64 | (unsigned __int128)",n_lo,";"].}
 
     # Don't forget to dereference the var param in C mode
-    when noExplicitPtrDeref:
+    when noExplicitVarDeref:
       {.emit:[q, " = (NU64)(", dblPrec," / ", d, ");"].}
       {.emit:[r, " = (NU64)(", dblPrec," % ", d, ");"].}
     else:
diff --git a/constantine/platforms/isa/macro_assembler_x86_att.nim b/constantine/platforms/isa/macro_assembler_x86_att.nim
index 9ade07dc..bc824e9c 100644
--- a/constantine/platforms/isa/macro_assembler_x86_att.nim
+++ b/constantine/platforms/isa/macro_assembler_x86_att.nim
@@ -7,7 +7,7 @@
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
 import
-  std/[macros, strutils, sets, hashes, algorithm],
+  std/[macros, strutils, sets, hashes, algorithm, sequtils],
   ../config
 
 # A compile-time inline assembler
@@ -103,8 +103,8 @@ type
     nimSymbol: NimNode     # a   - Nim nimSymbol
     rm: RM
     constraint: Constraint
-    constraintString: string          # C emit for example a->limbs
-    memClobbered: seq[(MemIndirectAccess, string)]
+    constraintDesc: seq[NimNode] # C emit for example `[a] "r" (a->limbs)`
+    memClobbered: seq[(MemIndirectAccess, seq[NimNode])]
 
   OperandArray* = object
     nimSymbol: NimNode
@@ -176,42 +176,78 @@ func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x8
   result.wordSize = sizeof(Word)
   result.wordBitWidth = result.wordSize * 8
 
-func setConstraintString(desc: OperandDesc, symbolString: string) =
+func escapeConstraint(asmDesc: string, symbol: NimNode): seq[NimNode] =
+  # Input:
+  # The assembly symbol + constraint + opening '(' + modifiers like reference/dereference/array-ranges
+  # The Nim symbol
+  #
+  # The closing ')' is automatically appended.
+  @[
+    newLit(asmDesc),
+    symbol,
+    newLit ")",
+  ]
+
+func setConstraintDesc(desc: OperandDesc, symbol: NimNode) =
+  # [a] "rbx" (`a`) for specific registers
+  # [a] "+r" (`*a_ptr`) for pointer to memory
+  # [a] "+r" (`a[0]`) for array cells
+  desc.constraintDesc = escapeConstraint(
+    desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" & " (",
+    symbol
+  )
+
+func setConstraintDesc(desc: OperandDesc, modifier: string, symbol: NimNode) =
   # [a] "rbx" (`a`) for specific registers
   # [a] "+r" (`*a_ptr`) for pointer to memory
   # [a] "+r" (`a[0]`) for array cells
-  desc.constraintString = desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" &
-          " (`" & symbolString & "`)"
+  desc.constraintDesc = escapeConstraint(
+    desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" & " (" & modifier,
+    symbol
+  )
 
-func genMemClobber(nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): string =
+func genRawMemClobber(nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): seq[NimNode] =
+  ## Create a raw memory clobber, for use in clobber list
   let baseType = nimSymbol.getTypeImpl()[2].getTypeImpl()[0]
   let cBaseType = if baseType.sameType(getType(uint64)): "NU64"
                   else: "NU32"
 
-  let symStr = nimSymbol.toString()
+  case memIndirect
+  of memRead:
+    return escapeConstraint("\"o\" (*(const " & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  of memWrite:
+    return escapeConstraint("\"=o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  of memReadWrite:
+    return escapeConstraint("\"+o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  else:
+    doAssert false, "Indirect access kind not specified"
+
+func genConstraintMemClobber(asmSymbol: string, nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): seq[NimNode] =
+  ## Create a constraint memory clobber, for use in constraint list
+  let baseType = nimSymbol.getTypeImpl()[2].getTypeImpl()[0]
+  let cBaseType = if baseType.sameType(getType(uint64)): "NU64"
+                  else: "NU32"
 
   case memIndirect
   of memRead:
-    return "\"o\" (`*(const " & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"o\" (*(const " & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   of memWrite:
-    return "\"=o\" (`*(" & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"=o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   of memReadWrite:
-    return "\"+o\" (`*(" & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"+o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   else:
     doAssert false, "Indirect access kind not specified"
 
 func asmValue*(nimSymbol: NimNode, rm: RM, constraint: Constraint): Operand =
-  let symStr = $nimSymbol
-
   let desc = OperandDesc(
-        asmId: "[" & symStr & "]",
+        asmId: "[" & $nimSymbol & "]",
         nimSymbol: nimSymbol,
         rm: rm,
         constraint: constraint)
   if rm in {Mem, MemOffsettable}:
-    desc.setConstraintString("*&" & symStr)
+    desc.setConstraintDesc("*&", nimSymbol)
   else:
-    desc.setConstraintString(symStr)
+    desc.setConstraintDesc(nimSymbol)
   return Operand(desc: desc)
 
 func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, memIndirect = memNoAccess): OperandArray =
@@ -229,8 +265,8 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: nimSymbol,
                   rm: rm,
                   constraint: constraint,
-                  memClobbered: @[(memIndirect, genMemClobber(nimSymbol, len, memIndirect))])
-    desc.setConstraintString(symStr)
+                  memClobbered: @[(memIndirect, genRawMemClobber(nimSymbol, len, memIndirect))])
+    desc.setConstraintDesc(nimSymbol)
 
     for i in 0 ..< len:
       result.buf[i] = Operand(
@@ -256,7 +292,7 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: nimSymbol,
                   rm: rm,
                   constraint: constraint,
-                  constraintString: "[" & symStr & "] " & genMemClobber(nimSymbol, len, memIndirect))
+                  constraintDesc: genConstraintMemClobber(symStr, nimSymbol, len, memIndirect))
 
     for i in 0 ..< len:
       # let desc = OperandDesc(
@@ -280,7 +316,7 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: ident(symStr & $i),
                   rm: rm,
                   constraint: constraint)
-      desc.setConstraintString(symStr & "[" & $i & "]")
+      desc.setConstraintDesc(nnkBracketExpr.newTree(nimSymbol, newLit i))
       result.buf[i] = Operand(
         desc: desc,
         kind: kRegister)
@@ -293,7 +329,7 @@ func asArrayAddr*(op: Operand, memPointer: NimNode, len: int, memIndirect: MemIn
     desc: nil,
     buf: newSeq[Operand](len))
 
-  op.desc.memClobbered.add (memIndirect, genMemClobber(memPointer, len, memIndirect))
+  op.desc.memClobbered.add (memIndirect, genRawMemClobber(memPointer, len, memIndirect))
 
   for i in 0 ..< len:
     result.buf[i] = Operand(
@@ -313,7 +349,7 @@ func asArrayAddr*(op: Register, memPointer: NimNode, len: int, memIndirect: MemI
         rm: ClobberedReg,
         constraint: asmClobberedRegister)
 
-  desc.memClobbered = @[(memIndirect, genMemClobber(memPointer, len, memIndirect))]
+  desc.memClobbered = @[(memIndirect, genRawMemClobber(memPointer, len, memIndirect))]
 
   for i in 0 ..< len:
     result.buf[i] = Operand(
@@ -330,7 +366,7 @@ func as2dArrayAddr*(op: Operand, memPointer: NimNode, rows, cols: int, memIndire
     dims: [rows, cols],
     buf2d: newSeq[Operand](rows*cols))
 
-  op.desc.memClobbered.add (memIndirect, genMemClobber(memPointer, rows*cols, memIndirect))
+  op.desc.memClobbered.add (memIndirect, genRawMemClobber(memPointer, rows*cols, memIndirect))
 
   for i in 0 ..< rows*cols:
     result.buf2d[i] = Operand(
@@ -354,7 +390,7 @@ func setToCarryFlag*(a: var Assembler_x86, carry: NimNode) =
     nimSymbol: ident(symStr),
     rm: CarryFlag,
     constraint: asmOutputOverwrite)
-  desc.setConstraintString(symStr)
+  desc.setConstraintDesc(nimSymbol)
   a.operands.incl(desc)
 
 func generate*(a: Assembler_x86): NimNode =
@@ -362,15 +398,15 @@ func generate*(a: Assembler_x86): NimNode =
   ## the desired instruction
 
   var
-    outOperands: seq[string]
-    inOperands: seq[string]
+    outOperands: seq[seq[NimNode]]
+    inOperands: seq[seq[NimNode]]
     memClobbered = false
 
   for odesc in a.operands.items():
     if odesc.constraint in {asmInput, asmInputCommutative}:
-      inOperands.add odesc.constraintString
+      inOperands.add odesc.constraintDesc
     else:
-      outOperands.add odesc.constraintString
+      outOperands.add odesc.constraintDesc
 
     for (memIndirect, memDesc) in odesc.memClobbered:
       # TODO: precise clobbering.
@@ -387,9 +423,9 @@ func generate*(a: Assembler_x86): NimNode =
       # else:
       #   outOperands.add memDesc
 
-  var params: string
-  params.add ": " & outOperands.join(", ") & '\n'
-  params.add ": " & inOperands.join(", ") & '\n'
+  var params: seq[NimNode]
+  params.add newLit(": ") & outOperands.foldl(a & newLit(", ") & b) & newLit("\n")
+  params.add newLit(": ") &  inOperands.foldl(a & newLit(", ") & b) & newLit("\n")
 
   let clobbers = [(a.isStackClobbered, "sp"),
                   (a.areFlagsClobbered, "cc"),
@@ -408,7 +444,7 @@ func generate*(a: Assembler_x86): NimNode =
     else:
       clobberList.add ", \"" & $reg & '\"'
 
-  params.add clobberList
+  params.add newLit(clobberList)
 
   # GCC will optimize ASM away if there are no
   # memory operand or volatile + memory clobber
@@ -422,12 +458,20 @@ func generate*(a: Assembler_x86): NimNode =
   var asmStmt = "\"" & a.code.replace("\n", "\\n\"\n\"")
   asmStmt.setLen(asmStmt.len - 1) # drop the last quote
 
+  var emitStmt = nnkBracket.newTree(
+        newLit("\nasm volatile(\n"),
+        newLit(asmStmt),
+  )
+
+  for node in params:
+    emitStmt.add node
+
+  emitStmt.add newLit(");")
+
   result = nnkPragma.newTree(
     nnkExprColonExpr.newTree(
       ident"emit",
-      newLit(
-        "asm volatile(\n" & asmStmt & params & ");"
-      )
+      emitStmt
     )
   )
   result = nnkBlockStmt.newTree(
diff --git a/constantine/platforms/isa/macro_assembler_x86_intel.nim b/constantine/platforms/isa/macro_assembler_x86_intel.nim
index 31f9065d..f016daf1 100644
--- a/constantine/platforms/isa/macro_assembler_x86_intel.nim
+++ b/constantine/platforms/isa/macro_assembler_x86_intel.nim
@@ -7,7 +7,7 @@
 # at your option. This file may not be copied, modified, or distributed except according to those terms.
 
 import
-  std/[macros, strutils, sets, hashes, algorithm],
+  std/[macros, strutils, sets, hashes, algorithm, sequtils],
   ../config
 
 # A compile-time inline assembler
@@ -103,8 +103,8 @@ type
     nimSymbol: NimNode     # a   - Nim nimSymbol
     rm: RM
     constraint: Constraint
-    constraintString: string          # C emit for example a->limbs
-    memClobbered: seq[(MemIndirectAccess, string)]
+    constraintDesc: seq[NimNode] # C emit for example `[a] "r" (a->limbs)`
+    memClobbered: seq[(MemIndirectAccess, seq[NimNode])]
 
   OperandArray* = object
     nimSymbol: NimNode
@@ -176,42 +176,78 @@ func init*(T: type Assembler_x86, Word: typedesc[SomeUnsignedInt]): Assembler_x8
   result.wordSize = sizeof(Word)
   result.wordBitWidth = result.wordSize * 8
 
-func setConstraintString(desc: OperandDesc, symbolString: string) =
+func escapeConstraint(asmDesc: string, symbol: NimNode): seq[NimNode] =
+  # Input:
+  # The assembly symbol + constraint + opening '(' + modifiers like reference/dereference/array-ranges
+  # The Nim symbol
+  #
+  # The closing ')' is automatically appended.
+  @[
+    newLit(asmDesc),
+    symbol,
+    newLit ")",
+  ]
+
+func setConstraintDesc(desc: OperandDesc, symbol: NimNode) =
+  # [a] "rbx" (`a`) for specific registers
+  # [a] "+r" (`*a_ptr`) for pointer to memory
+  # [a] "+r" (`a[0]`) for array cells
+  desc.constraintDesc = escapeConstraint(
+    desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" & " (",
+    symbol
+  )
+
+func setConstraintDesc(desc: OperandDesc, modifier: string, symbol: NimNode) =
   # [a] "rbx" (`a`) for specific registers
   # [a] "+r" (`*a_ptr`) for pointer to memory
   # [a] "+r" (`a[0]`) for array cells
-  desc.constraintString = desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" &
-          " (`" & symbolString & "`)"
+  desc.constraintDesc = escapeConstraint(
+    desc.asmId & "\"" & $desc.constraint & $desc.rm & "\"" & " (" & modifier,
+    symbol
+  )
 
-func genMemClobber(nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): string =
+func genRawMemClobber(nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): seq[NimNode] =
+  ## Create a raw memory clobber, for use in clobber list
   let baseType = nimSymbol.getTypeImpl()[2].getTypeImpl()[0]
   let cBaseType = if baseType.sameType(getType(uint64)): "NU64"
                   else: "NU32"
 
-  let symStr = nimSymbol.toString()
+  case memIndirect
+  of memRead:
+    return escapeConstraint("\"o\" (*(const " & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  of memWrite:
+    return escapeConstraint("\"=o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  of memReadWrite:
+    return escapeConstraint("\"+o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
+  else:
+    doAssert false, "Indirect access kind not specified"
+
+func genConstraintMemClobber(asmSymbol: string, nimSymbol: NimNode, len: int, memIndirect: MemIndirectAccess): seq[NimNode] =
+  ## Create a constraint memory clobber, for use in constraint list
+  let baseType = nimSymbol.getTypeImpl()[2].getTypeImpl()[0]
+  let cBaseType = if baseType.sameType(getType(uint64)): "NU64"
+                  else: "NU32"
 
   case memIndirect
   of memRead:
-    return "\"o\" (`*(const " & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"o\" (*(const " & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   of memWrite:
-    return "\"=o\" (`*(" & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"=o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   of memReadWrite:
-    return "\"+o\" (`*(" & cBaseType & " (*)[" & $len & "]) " & symStr & "`)"
+    return escapeConstraint("[" & asmSymbol & "] \"+o\" (*(" & cBaseType & " (*)[" & $len & "]) ", nimSymbol)
   else:
     doAssert false, "Indirect access kind not specified"
 
 func asmValue*(nimSymbol: NimNode, rm: RM, constraint: Constraint): Operand =
-  let symStr = $nimSymbol
-
   let desc = OperandDesc(
-        asmId: "[" & symStr & "]",
+        asmId: "[" & $nimSymbol & "]",
         nimSymbol: nimSymbol,
         rm: rm,
         constraint: constraint)
   if rm in {Mem, MemOffsettable}:
-    desc.setConstraintString("*&" & symStr)
+    desc.setConstraintDesc("*&", nimSymbol)
   else:
-    desc.setConstraintString(symStr)
+    desc.setConstraintDesc(nimSymbol)
   return Operand(desc: desc)
 
 func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, memIndirect = memNoAccess): OperandArray =
@@ -229,8 +265,8 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: nimSymbol,
                   rm: rm,
                   constraint: constraint,
-                  memClobbered: @[(memIndirect, genMemClobber(nimSymbol, len, memIndirect))])
-    desc.setConstraintString(symStr)
+                  memClobbered: @[(memIndirect, genRawMemClobber(nimSymbol, len, memIndirect))])
+    desc.setConstraintDesc(nimSymbol)
 
     for i in 0 ..< len:
       result.buf[i] = Operand(
@@ -256,7 +292,7 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: nimSymbol,
                   rm: rm,
                   constraint: constraint,
-                  constraintString: "[" & symStr & "] " & genMemClobber(nimSymbol, len, memIndirect))
+                  constraintDesc: genConstraintMemClobber(symStr, nimSymbol, len, memIndirect))
 
     for i in 0 ..< len:
       # let desc = OperandDesc(
@@ -280,7 +316,7 @@ func asmArray*(nimSymbol: NimNode, len: int, rm: RM, constraint: Constraint, mem
                   nimSymbol: ident(symStr & $i),
                   rm: rm,
                   constraint: constraint)
-      desc.setConstraintString(symStr & "[" & $i & "]")
+      desc.setConstraintDesc(nnkBracketExpr.newTree(nimSymbol, newLit i))
       result.buf[i] = Operand(
         desc: desc,
         kind: kRegister)
@@ -293,7 +329,7 @@ func asArrayAddr*(op: Operand, memPointer: NimNode, len: int, memIndirect: MemIn
     desc: nil,
     buf: newSeq[Operand](len))
 
-  op.desc.memClobbered.add (memIndirect, genMemClobber(memPointer, len, memIndirect))
+  op.desc.memClobbered.add (memIndirect, genRawMemClobber(memPointer, len, memIndirect))
 
   for i in 0 ..< len:
     result.buf[i] = Operand(
@@ -313,7 +349,7 @@ func asArrayAddr*(op: Register, memPointer: NimNode, len: int, memIndirect: MemI
         rm: ClobberedReg,
         constraint: asmClobberedRegister)
 
-  desc.memClobbered = @[(memIndirect, genMemClobber(memPointer, len, memIndirect))]
+  desc.memClobbered = @[(memIndirect, genRawMemClobber(memPointer, len, memIndirect))]
 
   for i in 0 ..< len:
     result.buf[i] = Operand(
@@ -330,7 +366,7 @@ func as2dArrayAddr*(op: Operand, memPointer: NimNode, rows, cols: int, memIndire
     dims: [rows, cols],
     buf2d: newSeq[Operand](rows*cols))
 
-  op.desc.memClobbered.add (memIndirect, genMemClobber(memPointer, rows*cols, memIndirect))
+  op.desc.memClobbered.add (memIndirect, genRawMemClobber(memPointer, rows*cols, memIndirect))
 
   for i in 0 ..< rows*cols:
     result.buf2d[i] = Operand(
@@ -354,7 +390,7 @@ func setToCarryFlag*(a: var Assembler_x86, carry: NimNode) =
     nimSymbol: ident(symStr),
     rm: CarryFlag,
     constraint: asmOutputOverwrite)
-  desc.setConstraintString(symStr)
+  desc.setConstraintDesc(nimSymbol)
   a.operands.incl(desc)
 
 func generate*(a: Assembler_x86): NimNode =
@@ -362,15 +398,15 @@ func generate*(a: Assembler_x86): NimNode =
   ## the desired instruction
 
   var
-    outOperands: seq[string]
-    inOperands: seq[string]
+    outOperands: seq[seq[NimNode]]
+    inOperands: seq[seq[NimNode]]
     memClobbered = false
 
   for odesc in a.operands.items():
     if odesc.constraint in {asmInput, asmInputCommutative}:
-      inOperands.add odesc.constraintString
+      inOperands.add odesc.constraintDesc
     else:
-      outOperands.add odesc.constraintString
+      outOperands.add odesc.constraintDesc
 
     for (memIndirect, memDesc) in odesc.memClobbered:
       # TODO: precise clobbering.
@@ -387,9 +423,9 @@ func generate*(a: Assembler_x86): NimNode =
       # else:
       #   outOperands.add memDesc
 
-  var params: string
-  params.add ": " & outOperands.join(", ") & '\n'
-  params.add ": " & inOperands.join(", ") & '\n'
+  var params: seq[NimNode]
+  params.add newLit(": ") & outOperands.foldl(a & newLit(", ") & b) & newLit("\n")
+  params.add newLit(": ") &  inOperands.foldl(a & newLit(", ") & b) & newLit("\n")
 
   let clobbers = [(a.isStackClobbered, "sp"),
                   (a.areFlagsClobbered, "cc"),
@@ -408,7 +444,7 @@ func generate*(a: Assembler_x86): NimNode =
     else:
       clobberList.add ", \"" & $reg & '\"'
 
-  params.add clobberList
+  params.add newLit(clobberList)
 
   # GCC will optimize ASM away if there are no
   # memory operand or volatile + memory clobber
@@ -422,12 +458,20 @@ func generate*(a: Assembler_x86): NimNode =
   var asmStmt = "\"" & a.code.replace("\n", "\\n\"\n\"")
   asmStmt.setLen(asmStmt.len - 1) # drop the last quote
 
+  var emitStmt = nnkBracket.newTree(
+        newLit("\nasm volatile(\n"),
+        newLit(asmStmt),
+  )
+
+  for node in params:
+    emitStmt.add node
+
+  emitStmt.add newLit(");")
+
   result = nnkPragma.newTree(
     nnkExprColonExpr.newTree(
       ident"emit",
-      newLit(
-        "asm volatile(\n" & asmStmt & params & ");"
-      )
+      emitStmt
     )
   )
   result = nnkBlockStmt.newTree(
diff --git a/constantine/platforms/static_for.nim b/constantine/platforms/static_for.nim
index 3c896f9a..7f5aa8f0 100644
--- a/constantine/platforms/static_for.nim
+++ b/constantine/platforms/static_for.nim
@@ -43,6 +43,8 @@ macro staticForCountdown*(idx: untyped{nkIdent}, start, stopIncl: static int, bo
 
 {.experimental: "dynamicBindSym".}
 
+const nim_v2 = (NimMajor, NimMinor) > (1, 6)
+
 macro staticFor*(ident: untyped{nkIdent}, choices: typed, body: untyped): untyped =
   ## matches
   ##   staticFor(curve, TestCurves):
@@ -51,7 +53,11 @@ macro staticFor*(ident: untyped{nkIdent}, choices: typed, body: untyped): untype
 
   let choices = if choices.kind == nnkSym:
                   # Unpack symbol
-                  choices.getImpl()
+                  let impl = choices.getImpl()
+                  when nim_v2:
+                    impl[2] # nnkConstDef
+                  else:
+                    impl
                 else:
                   choices.expectKind(nnkBracket)
                   choices
@@ -59,5 +65,5 @@ macro staticFor*(ident: untyped{nkIdent}, choices: typed, body: untyped): untype
   result = newStmtList()
   for choice in choices:
     result.add nnkBlockStmt.newTree(
-      ident($ident & "_" & $choice.intVal),
+      nnkAccQuoted.newTree(ident, ident("_"), choice),
       body.replaceNodes(ident, choice))
diff --git a/examples-c/t_libctt_bls12_381.c b/examples-c/t_libctt_bls12_381.c
index 612c1cc1..7b53692c 100644
--- a/examples-c/t_libctt_bls12_381.c
+++ b/examples-c/t_libctt_bls12_381.c
@@ -260,6 +260,11 @@ int main(){
   // }
   // printf(" SUCCESS square root\n");
 
+  mpz_clear(r);
+  mpz_clear(p);
+  mpz_clear(b);
+  mpz_clear(a);
+
   return 0;
 
 }
\ No newline at end of file
diff --git a/examples-threadpool/e01_simple_tasks.nim b/examples-threadpool/e01_simple_tasks.nim
index 731fe6a4..f7b580b7 100644
--- a/examples-threadpool/e01_simple_tasks.nim
+++ b/examples-threadpool/e01_simple_tasks.nim
@@ -26,7 +26,7 @@ block: # Async without result
 block: # Async/Await
   var tp: Threadpool
 
-  proc asyncFib(n: int): int =
+  proc asyncFib(n: int): int {.gcsafe, raises: [].} =
     if n < 2:
       return n
 
diff --git a/tests/math_arbitrary_precision/t_bigints_mod_vs_gmp.nim b/tests/math_arbitrary_precision/t_bigints_mod_vs_gmp.nim
index 906af033..8b1e7b82 100644
--- a/tests/math_arbitrary_precision/t_bigints_mod_vs_gmp.nim
+++ b/tests/math_arbitrary_precision/t_bigints_mod_vs_gmp.nim
@@ -173,8 +173,8 @@ proc main() =
       "  Constantine:    " & rCttVartime.toHex() & "\n" &
       "(Note that GMP aligns bytes left while constantine aligns bytes right)"
 
-  mpz_clear(a)
-  mpz_clear(m)
   mpz_clear(r)
+  mpz_clear(m)
+  mpz_clear(a)
 
 main()
diff --git a/tests/math_arbitrary_precision/t_bigints_powmod_vs_gmp.nim b/tests/math_arbitrary_precision/t_bigints_powmod_vs_gmp.nim
index 1e34a71d..cc5f20ea 100644
--- a/tests/math_arbitrary_precision/t_bigints_powmod_vs_gmp.nim
+++ b/tests/math_arbitrary_precision/t_bigints_powmod_vs_gmp.nim
@@ -80,10 +80,10 @@ proc test(rng: var RngState) =
   var rWritten: csize
   discard rGMP[0].addr.mpz_export(rWritten.addr, GMP_LeastSignificantWordFirst, sizeof(SecretWord), GMP_WordNativeEndian, 0, rr)
 
-  mpz_clear(aa)
-  mpz_clear(ee)
-  mpz_clear(mm)
   mpz_clear(rr)
+  mpz_clear(mm)
+  mpz_clear(ee)
+  mpz_clear(aa)
 
   let
     aBits = a.getBits_LE_vartime()
diff --git a/tests/math_fields/t_finite_fields_mulsquare.nim b/tests/math_fields/t_finite_fields_mulsquare.nim
index 0326c5f3..2c8f1ef9 100644
--- a/tests/math_fields/t_finite_fields_mulsquare.nim
+++ b/tests/math_fields/t_finite_fields_mulsquare.nim
@@ -107,7 +107,9 @@ proc mainSelectCases() =
         r_mul.prod(a, a)
         r_sqr.square(a)
 
-        doAssert bool(r_mul == r_sqr)
+        doAssert bool(r_mul == r_sqr), block:
+          "\nMul: " & r_mul.toHex() &
+          "\nSqr: " & r_sqr.toHex()
 
 mainSelectCases()
 
@@ -119,7 +121,9 @@ proc randomCurve(C: static Curve) =
   r_mul.prod(a, a)
   r_sqr.square(a)
 
-  doAssert bool(r_mul == r_sqr)
+  doAssert bool(r_mul == r_sqr), block:
+    "\nMul: " & r_mul.toHex() &
+    "\nSqr: " & r_sqr.toHex()
 
 proc randomHighHammingWeight(C: static Curve) =
   let a = rng.random_highHammingWeight(Fp[C])
@@ -129,7 +133,9 @@ proc randomHighHammingWeight(C: static Curve) =
   r_mul.prod(a, a)
   r_sqr.square(a)
 
-  doAssert bool(r_mul == r_sqr)
+  doAssert bool(r_mul == r_sqr), block:
+    "\nMul: " & r_mul.toHex() &
+    "\nSqr: " & r_sqr.toHex()
 
 proc random_long01Seq(C: static Curve) =
   let a = rng.random_long01Seq(Fp[C])
@@ -139,7 +145,9 @@ proc random_long01Seq(C: static Curve) =
   r_mul.prod(a, a)
   r_sqr.square(a)
 
-  doAssert bool(r_mul == r_sqr)
+  doAssert bool(r_mul == r_sqr), block:
+    "\nMul: " & r_mul.toHex() &
+    "\nSqr: " & r_sqr.toHex()
 
 suite "Random Modular Squaring is consistent with Modular Multiplication" & " [" & $WordBitWidth & "-bit words]":
   test "Random squaring mod P-224 [FastSquaring = " & $(Fp[P224].getSpareBits() >= 2) & "]":
diff --git a/tests/math_fields/t_finite_fields_vs_gmp.nim b/tests/math_fields/t_finite_fields_vs_gmp.nim
index 890fb68f..3d42e2be 100644
--- a/tests/math_fields/t_finite_fields_vs_gmp.nim
+++ b/tests/math_fields/t_finite_fields_vs_gmp.nim
@@ -240,6 +240,11 @@ template testSetup {.dirty.} =
   mpz_init(b)
   mpz_init(p)
   mpz_init(r)
+  defer:
+    mpz_clear(r)
+    mpz_clear(p)
+    mpz_clear(b)
+    mpz_clear(a)
 
 proc mainMul() =
   testSetup()