diff --git a/doc/safegcd_implementation.md b/doc/safegcd_implementation.md
index 5216231e5..5dbbb7bbd 100644
--- a/doc/safegcd_implementation.md
+++ b/doc/safegcd_implementation.md
@@ -1,7 +1,7 @@
 # The safegcd implementation in libsecp256k1 explained
 
-This document explains the modular inverse implementation in the `src/modinv*.h` files. It is based
-on the paper
+This document explains the modular inverse and Jacobi symbol implementations in the `src/modinv*.h` files.
+It is based on the paper
 ["Fast constant-time gcd computation and modular inversion"](https://gcd.cr.yp.to/papers.html#safegcd)
 by Daniel J. Bernstein and Bo-Yin Yang. The references below are for the Date: 2019.04.13 version.
 
@@ -769,3 +769,51 @@ def modinv_var(M, Mi, x):
         d, e = update_de(d, e, t, M, Mi)
     return normalize(f, d, Mi)
 ```
+
+## 8. From GCDs to Jacobi symbol
+
+We can also use a similar approach to calculate Jacobi symbol *(x | M)* by keeping track of an
+extra variable *j*, for which at every step *(x | M) = j (g | f)*. As we update *f* and *g*, we
+make corresponding updates to *j* using
+[properties of the Jacobi symbol](https://en.wikipedia.org/wiki/Jacobi_symbol#Properties):
+* *((g/2) | f)* is either *(g | f)* or *-(g | f)*, depending on the value of *f mod 8* (negating if it's *3* or *5*).
+* *(f | g)* is either *(g | f)* or *-(g | f)*, depending on *f mod 4* and *g mod 4* (negating if both are *3*).
+
+These updates depend only on the values of *f* and *g* modulo *4* or *8*, and can thus be applied
+very quickly, as long as we keep track of a few additional bits of *f* and *g*. Overall, this
+calculation is slightly simpler than the one for the modular inverse because we no longer need to
+keep track of *d* and *e*.
+
+However, one difficulty of this approach is that the Jacobi symbol *(a | n)* is only defined for
+positive odd integers *n*, whereas in the original safegcd algorithm, *f, g* can take negative
+values. We resolve this by using the following modified steps:
+
+```python
+        # Before
+        if delta > 0 and g & 1:
+            delta, f, g = 1 - delta, g, (g - f) // 2
+
+        # After
+        if delta > 0 and g & 1:
+            delta, f, g = 1 - delta, g, (g + f) // 2
+```
+
+The algorithm is still correct, since the changed divstep, called a "posdivstep" (see section 8.4
+and E.5 in the paper) preserves *gcd(f, g)*. However, there's no proof that the modified algorithm
+will converge. The justification for posdivsteps is completely empirical: in practice, it appears
+that the vast majority of nonzero inputs converge to *f=g=gcd(f<sub>0</sub>, g<sub>0</sub>)* in a
+number of steps proportional to their logarithm.
+
+Note that:
+- We require inputs to satisfy *gcd(x, M) = 1*, as otherwise *f=1* is not reached.
+- We require inputs *x &neq; 0*, because applying posdivstep with *g=0* has no effect.
+- We need to update the termination condition from *g=0* to *f=1*.
+
+We account for the possibility of nonconvergence by only performing a bounded number of
+posdivsteps, and then falling back to square-root based Jacobi calculation if a solution has not
+yet been found.
+
+The optimizations in sections 3-7 above are described in the context of the original divsteps, but
+in the C implementation we also adapt most of them (not including "avoiding modulus operations",
+since it's not necessary to track *d, e*, and "constant-time operation", since we never calculate
+Jacobi symbols for secret data) to the posdivsteps version.
diff --git a/src/bench_internal.c b/src/bench_internal.c
index e1ef14fdb..c248ab8eb 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -219,6 +219,19 @@ static void bench_field_sqrt(void* arg, int iters) {
     CHECK(j <= iters);
 }
 
+static void bench_field_is_square_var(void* arg, int iters) {
+    int i, j = 0;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_fe t = data->fe[0];
+
+    for (i = 0; i < iters; i++) {
+        j += secp256k1_fe_is_square_var(&t);
+        secp256k1_fe_add(&t, &data->fe[1]);
+        secp256k1_fe_normalize_var(&t);
+    }
+    CHECK(j <= iters);
+}
+
 static void bench_group_double_var(void* arg, int iters) {
     int i;
     bench_inv *data = (bench_inv*)arg;
@@ -371,6 +384,7 @@ int main(int argc, char **argv) {
     if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "mul")) run_benchmark("field_mul", bench_field_mul, bench_setup, NULL, &data, 10, iters*10);
     if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse", bench_field_inverse, bench_setup, NULL, &data, 10, iters);
     if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "inverse")) run_benchmark("field_inverse_var", bench_field_inverse_var, bench_setup, NULL, &data, 10, iters);
+    if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "issquare")) run_benchmark("field_is_square_var", bench_field_is_square_var, bench_setup, NULL, &data, 10, iters);
     if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqrt")) run_benchmark("field_sqrt", bench_field_sqrt, bench_setup, NULL, &data, 10, iters);
 
     if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "double")) run_benchmark("group_double_var", bench_group_double_var, bench_setup, NULL, &data, 10, iters*10);
diff --git a/src/field.h b/src/field.h
index 4b57548be..ccb4327ee 100644
--- a/src/field.h
+++ b/src/field.h
@@ -135,4 +135,7 @@ static void secp256k1_fe_half(secp256k1_fe *r);
  *  magnitude set to 'm' and is normalized if (and only if) 'm' is zero. */
 static void secp256k1_fe_get_bounds(secp256k1_fe *r, int m);
 
+/** Determine whether a is a square (modulo p). */
+static int secp256k1_fe_is_square_var(const secp256k1_fe *a);
+
 #endif /* SECP256K1_FIELD_H */
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 5e6c63cf4..72c56df02 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -1365,4 +1365,31 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
     VERIFY_CHECK(secp256k1_fe_normalizes_to_zero(r) == secp256k1_fe_normalizes_to_zero(&tmp));
 }
 
+static int secp256k1_fe_is_square_var(const secp256k1_fe *x) {
+    secp256k1_fe tmp;
+    secp256k1_modinv32_signed30 s;
+    int jac, ret;
+
+    tmp = *x;
+    secp256k1_fe_normalize_var(&tmp);
+    /* secp256k1_jacobi32_maybe_var cannot deal with input 0. */
+    if (secp256k1_fe_is_zero(&tmp)) return 1;
+    secp256k1_fe_to_signed30(&s, &tmp);
+    jac = secp256k1_jacobi32_maybe_var(&s, &secp256k1_const_modinfo_fe);
+    if (jac == 0) {
+        /* secp256k1_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back
+         * to computing a square root. This should be extremely rare with random
+         * input (except in VERIFY mode, where a lower iteration count is used). */
+        secp256k1_fe dummy;
+        ret = secp256k1_fe_sqrt(&dummy, &tmp);
+    } else {
+#ifdef VERIFY
+        secp256k1_fe dummy;
+        VERIFY_CHECK(jac == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1);
+#endif
+        ret = jac >= 0;
+    }
+    return ret;
+}
+
 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 985c486ae..538991c7a 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -664,4 +664,31 @@ static void secp256k1_fe_inv_var(secp256k1_fe *r, const secp256k1_fe *x) {
 #endif
 }
 
+static int secp256k1_fe_is_square_var(const secp256k1_fe *x) {
+    secp256k1_fe tmp;
+    secp256k1_modinv64_signed62 s;
+    int jac, ret;
+
+    tmp = *x;
+    secp256k1_fe_normalize_var(&tmp);
+    /* secp256k1_jacobi64_maybe_var cannot deal with input 0. */
+    if (secp256k1_fe_is_zero(&tmp)) return 1;
+    secp256k1_fe_to_signed62(&s, &tmp);
+    jac = secp256k1_jacobi64_maybe_var(&s, &secp256k1_const_modinfo_fe);
+    if (jac == 0) {
+        /* secp256k1_jacobi64_maybe_var failed to compute the Jacobi symbol. Fall back
+         * to computing a square root. This should be extremely rare with random
+         * input (except in VERIFY mode, where a lower iteration count is used). */
+        secp256k1_fe dummy;
+        ret = secp256k1_fe_sqrt(&dummy, &tmp);
+    } else {
+#ifdef VERIFY
+        secp256k1_fe dummy;
+        VERIFY_CHECK(jac == 2*secp256k1_fe_sqrt(&dummy, &tmp) - 1);
+#endif
+        ret = jac >= 0;
+    }
+    return ret;
+}
+
 #endif /* SECP256K1_FIELD_REPR_IMPL_H */
diff --git a/src/int128.h b/src/int128.h
index 8c1c061a8..5355fbfae 100644
--- a/src/int128.h
+++ b/src/int128.h
@@ -80,10 +80,10 @@ static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_
 /* Compare two 128-bit values for equality. */
 static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b);
 
-/* Tests if r is equal to 2^n.
+/* Tests if r is equal to sign*2^n (sign must be 1 or -1).
  * n must be strictly less than 127.
  */
-static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n);
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign);
 
 #endif
 
diff --git a/src/int128_native_impl.h b/src/int128_native_impl.h
index 30c26f2d3..996e542cf 100644
--- a/src/int128_native_impl.h
+++ b/src/int128_native_impl.h
@@ -84,9 +84,10 @@ static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, con
    return *a == *b;
 }
 
-static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) {
    VERIFY_CHECK(n < 127);
-   return (*r == (int128_t)1 << n);
+   VERIFY_CHECK(sign == 1 || sign == -1);
+   return (*r == (int128_t)((uint128_t)sign << n));
 }
 
 #endif
diff --git a/src/int128_struct_impl.h b/src/int128_struct_impl.h
index 298a7bb1a..2eb337cb5 100644
--- a/src/int128_struct_impl.h
+++ b/src/int128_struct_impl.h
@@ -189,10 +189,11 @@ static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, con
    return a->hi == b->hi && a->lo == b->lo;
 }
 
-static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) {
-   VERIFY_CHECK(n < 127);
-   return n >= 64 ? r->hi == (uint64_t)1 << (n - 64) && r->lo == 0
-                  : r->hi == 0 && r->lo == (uint64_t)1 << n;
+static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n, int sign) {
+    VERIFY_CHECK(n < 127);
+    VERIFY_CHECK(sign == 1 || sign == -1);
+    return n >= 64 ? r->hi == (uint64_t)sign << (n - 64) && r->lo == 0
+                   : r->hi == (uint64_t)((sign - 1) >> 1) && r->lo == (uint64_t)sign << n;
 }
 
 #endif
diff --git a/src/modinv32.h b/src/modinv32.h
index d1e6e23c1..846c642f8 100644
--- a/src/modinv32.h
+++ b/src/modinv32.h
@@ -35,4 +35,9 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
 /* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
 static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
 
+/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
+ * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
+ * cannot be computed. */
+static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
+
 #endif /* SECP256K1_MODINV32_H */
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 661c5fc04..028a5701e 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
     return zeta;
 }
 
+/* inv256[i] = -(2*i+1)^-1 (mod 256) */
+static const uint8_t secp256k1_modinv32_inv256[128] = {
+    0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
+    0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
+    0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
+    0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
+    0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
+    0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
+    0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
+    0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
+    0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
+    0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
+    0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
+};
+
 /* Compute the transition matrix and eta for 30 divsteps (variable time).
  *
  * Input:  eta: initial eta
@@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
  * Implements the divsteps_n_matrix_var function from the explanation.
  */
 static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
-    /* inv256[i] = -(2*i+1)^-1 (mod 256) */
-    static const uint8_t inv256[128] = {
-        0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
-        0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
-        0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
-        0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
-        0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
-        0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
-        0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
-        0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
-        0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
-        0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
-        0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
-    };
-
     /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
     uint32_t u = 1, v = 0, q = 0, r = 1;
     uint32_t f = f0, g = g0, m;
@@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
         VERIFY_CHECK(limit > 0 && limit <= 30);
         m = (UINT32_MAX >> (32 - limit)) & 255U;
         /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
-        w = (g * inv256[(f >> 1) & 127]) & m;
+        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
         /* Do so. */
         g += f * w;
         q += u * w;
@@ -317,6 +317,86 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
     return eta;
 }
 
+/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
+ * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
+ * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
+ *
+ * Input:        eta: initial eta
+ *               f0:  bottom limb of initial f
+ *               g0:  bottom limb of initial g
+ * Output:       t: transition matrix
+ * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
+ *               by applying the returned transformation matrix to it. The other bits of *jacp may
+ *               change, but are meaningless.
+ * Return: final eta
+ */
+static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
+    /* Transformation matrix. */
+    uint32_t u = 1, v = 0, q = 0, r = 1;
+    uint32_t f = f0, g = g0, m;
+    uint16_t w;
+    int i = 30, limit, zeros;
+    int jac = *jacp;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+        /* Update the bottom bit of jac: when dividing g by an odd power of 2,
+         * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
+        jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
+        /* We're done once we've done 30 posdivsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
+        /* If eta is negative, negate it and replace f,g with g,f. */
+        if (eta < 0) {
+            uint32_t tmp;
+            eta = -eta;
+            /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
+             * if both f and g are 3 mod 4. */
+            jac ^= ((f & g) >> 1);
+            tmp = f; f = g; g = tmp;
+            tmp = u; u = q; q = tmp;
+            tmp = v; v = r; r = tmp;
+        }
+        /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
+         * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
+         * can be done as its sign will flip once that happens. */
+        limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+        /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
+        VERIFY_CHECK(limit > 0 && limit <= 30);
+        m = (UINT32_MAX >> (32 - limit)) & 255U;
+        /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
+        w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
+        /* Do so. */
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int32_t)u;
+    t->v = (int32_t)v;
+    t->q = (int32_t)q;
+    t->r = (int32_t)r;
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
+     * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
+    VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
+                 (int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
+    *jacp = jac;
+    return eta;
+}
+
 /* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
  *
  * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
@@ -584,4 +664,74 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
     *x = d;
 }
 
+/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
+ * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
+#ifdef VERIFY
+#define JACOBI32_ITERATIONS 25
+#else
+#define JACOBI32_ITERATIONS 50
+#endif
+
+/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
+static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
+    /* Start with f=modulus, g=x, eta=-1. */
+    secp256k1_modinv32_signed30 f = modinfo->modulus;
+    secp256k1_modinv32_signed30 g = *x;
+    int j, len = 9;
+    int32_t eta = -1; /* eta = -delta; delta is initially 1 */
+    int32_t cond, fn, gn;
+    int jac = 0;
+    int count;
+
+    /* The input limbs must all be non-negative. */
+    VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
+
+    /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
+     * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
+     * time out). */
+    VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
+
+    for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
+        /* Compute transition matrix and new eta after 30 posdivsteps. */
+        secp256k1_modinv32_trans2x2 t;
+        eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
+        /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+#endif
+        secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
+        /* If the bottom limb of f is 1, there is a chance that f=1. */
+        if (f.v[0] == 1) {
+            cond = 0;
+            /* Check if the other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= f.v[j];
+            }
+            /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
+            if (cond == 0) return 1 - 2*(jac & 1);
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int32_t)len - 2) >> 31;
+        cond |= fn;
+        cond |= gn;
+        /* If so, reduce length. */
+        if (cond == 0) --len;
+#ifdef VERIFY
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+#endif
+    }
+
+    /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
+    return 0;
+}
+
 #endif /* SECP256K1_MODINV32_IMPL_H */
diff --git a/src/modinv64.h b/src/modinv64.h
index 1d69f14f3..f4208e6c2 100644
--- a/src/modinv64.h
+++ b/src/modinv64.h
@@ -39,4 +39,9 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
 /* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */
 static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
 
+/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
+ * cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
+ * cannot be computed. */
+static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
+
 #endif /* SECP256K1_MODINV64_H */
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index 2b5463a3f..df9aedff8 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -71,11 +71,13 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i
     return 0;
 }
 
-/* Check if the determinant of t is equal to 1 << n. */
-static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n) {
+/* Check if the determinant of t is equal to 1 << n. If abs, check if |det t| == 1 << n. */
+static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n, int abs) {
     secp256k1_int128 a;
     secp256k1_i128_det(&a, t->u, t->v, t->q, t->r);
-    return secp256k1_i128_check_pow2(&a, n);
+    if (secp256k1_i128_check_pow2(&a, n, 1)) return 1;
+    if (abs && secp256k1_i128_check_pow2(&a, n, -1)) return 1;
+    return 0;
 }
 #endif
 
@@ -218,7 +220,7 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
      * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
      * 8*identity (which has determinant 2^6) means the overall outputs has determinant
      * 2^65. */
-    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65));
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65, 0));
 #endif
     return zeta;
 }
@@ -266,7 +268,7 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
             tmp = v; v = r; r = -tmp;
             /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
              * out (as we'd be done before that point), and no more than eta+1 can be done as its
-             * will flip again once that happens. */
+             * sign will flip again once that happens. */
             limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
             VERIFY_CHECK(limit > 0 && limit <= 62);
             /* m is a mask for the bottom min(limit, 6) bits. */
@@ -301,8 +303,100 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
      * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
      * will be divided out again). As each divstep's individual matrix has determinant 2, the
      * aggregate of 62 of them will have determinant 2^62. */
-    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62));
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 0));
+#endif
+    return eta;
+}
+
+/* Compute the transition matrix and eta for 62 posdivsteps (variable time, eta=-delta), and keeps track
+ * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^64 rather than 2^62, because
+ * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
+ *
+ * Input:        eta: initial eta
+ *               f0:  bottom limb of initial f
+ *               g0:  bottom limb of initial g
+ * Output:       t: transition matrix
+ * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
+ *               by applying the returned transformation matrix to it. The other bits of *jacp may
+ *               change, but are meaningless.
+ * Return:       final eta
+ */
+static int64_t secp256k1_modinv64_posdivsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t, int *jacp) {
+    /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
+    uint64_t u = 1, v = 0, q = 0, r = 1;
+    uint64_t f = f0, g = g0, m;
+    uint32_t w;
+    int i = 62, limit, zeros;
+    int jac = *jacp;
+
+    for (;;) {
+        /* Use a sentinel bit to count zeros only up to i. */
+        zeros = secp256k1_ctz64_var(g | (UINT64_MAX << i));
+        /* Perform zeros divsteps at once; they all just divide g by two. */
+        g >>= zeros;
+        u <<= zeros;
+        v <<= zeros;
+        eta -= zeros;
+        i -= zeros;
+        /* Update the bottom bit of jac: when dividing g by an odd power of 2,
+         * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
+        jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
+        /* We're done once we've done 62 posdivsteps. */
+        if (i == 0) break;
+        VERIFY_CHECK((f & 1) == 1);
+        VERIFY_CHECK((g & 1) == 1);
+        VERIFY_CHECK((u * f0 + v * g0) == f << (62 - i));
+        VERIFY_CHECK((q * f0 + r * g0) == g << (62 - i));
+        /* If eta is negative, negate it and replace f,g with g,f. */
+        if (eta < 0) {
+            uint64_t tmp;
+            eta = -eta;
+            tmp = f; f = g; g = tmp;
+            tmp = u; u = q; q = tmp;
+            tmp = v; v = r; r = tmp;
+            /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
+             * if both f and g are 3 mod 4. */
+            jac ^= ((f & g) >> 1);
+            /* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
+             * out (as we'd be done before that point), and no more than eta+1 can be done as its
+             * sign will flip again once that happens. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 6) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 63U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 6)
+             * bits. */
+            w = (f * g * (f * f - 2)) & m;
+        } else {
+            /* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as
+             * eta tends to be smaller here. */
+            limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
+            VERIFY_CHECK(limit > 0 && limit <= 62);
+            /* m is a mask for the bottom min(limit, 4) bits. */
+            m = (UINT64_MAX >> (64 - limit)) & 15U;
+            /* Find what multiple of f must be added to g to cancel its bottom min(limit, 4)
+             * bits. */
+            w = f + (((f + 1) & 4) << 1);
+            w = (-w * g) & m;
+        }
+        g += f * w;
+        q += u * w;
+        r += v * w;
+        VERIFY_CHECK((g & m) == 0);
+    }
+    /* Return data in t and return value. */
+    t->u = (int64_t)u;
+    t->v = (int64_t)v;
+    t->q = (int64_t)q;
+    t->r = (int64_t)r;
+#ifdef VERIFY
+    /* The determinant of t must be a power of two. This guarantees that multiplication with t
+     * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
+     * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
+     * the aggregate of 62 of them will have determinant 2^62 or -2^62. */
+    VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62, 1));
 #endif
+    *jacp = jac;
     return eta;
 }
 
@@ -626,4 +720,74 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
     *x = d;
 }
 
+/* Do up to 25 iterations of 62 posdivsteps (up to 1550 steps; more is extremely rare) each until f=1.
+ * In VERIFY mode use a lower number of iterations (744, close to the median 756), so failure actually occurs. */
+#ifdef VERIFY
+#define JACOBI64_ITERATIONS 12
+#else
+#define JACOBI64_ITERATIONS 25
+#endif
+
+/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
+static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
+    /* Start with f=modulus, g=x, eta=-1. */
+    secp256k1_modinv64_signed62 f = modinfo->modulus;
+    secp256k1_modinv64_signed62 g = *x;
+    int j, len = 5;
+    int64_t eta = -1; /* eta = -delta; delta is initially 1 */
+    int64_t cond, fn, gn;
+    int jac = 0;
+    int count;
+
+    /* The input limbs must all be non-negative. */
+    VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0);
+
+    /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
+     * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
+     * time out). */
+    VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4]) != 0);
+
+    for (count = 0; count < JACOBI64_ITERATIONS; ++count) {
+        /* Compute transition matrix and new eta after 62 posdivsteps. */
+        secp256k1_modinv64_trans2x2 t;
+        eta = secp256k1_modinv64_posdivsteps_62_var(eta, f.v[0] | ((uint64_t)f.v[1] << 62), g.v[0] | ((uint64_t)g.v[1] << 62), &t, &jac);
+        /* Update f,g using that transition matrix. */
+#ifdef VERIFY
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+#endif
+        secp256k1_modinv64_update_fg_62_var(len, &f, &g, &t);
+        /* If the bottom limb of f is 1, there is a chance that f=1. */
+        if (f.v[0] == 1) {
+            cond = 0;
+            /* Check if the other limbs are also 0. */
+            for (j = 1; j < len; ++j) {
+                cond |= f.v[j];
+            }
+            /* If so, we're done. When f=1, the Jacobi symbol (g | f)=1. */
+            if (cond == 0) return 1 - 2*(jac & 1);
+        }
+
+        /* Determine if len>1 and limb (len-1) of both f and g is 0. */
+        fn = f.v[len - 1];
+        gn = g.v[len - 1];
+        cond = ((int64_t)len - 2) >> 63;
+        cond |= fn;
+        cond |= gn;
+        /* If so, reduce length. */
+        if (cond == 0) --len;
+#ifdef VERIFY
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
+        VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(&g, len, &modinfo->modulus, 1) < 0);  /* g < modulus */
+#endif
+    }
+
+    /* The loop failed to converge to f=g after 1550 iterations. Return 0, indicating unknown result. */
+    return 0;
+}
+
 #endif /* SECP256K1_MODINV64_IMPL_H */
diff --git a/src/tests.c b/src/tests.c
index bc5b7cb1f..e80397cb9 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1022,12 +1022,32 @@ static void test_modinv32_uint16(uint16_t* out, const uint16_t* in, const uint16
     uint16_to_signed30(&x, in);
     nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4] | x.v[5] | x.v[6] | x.v[7] | x.v[8]) != 0;
     uint16_to_signed30(&m.modulus, mod);
-    mutate_sign_signed30(&m.modulus);
 
     /* compute 1/modulus mod 2^30 */
     m.modulus_inv30 = modinv2p64(m.modulus.v[0]) & 0x3fffffff;
     CHECK(((m.modulus_inv30 * m.modulus.v[0]) & 0x3fffffff) == 1);
 
+    /* Test secp256k1_jacobi32_maybe_var. */
+    if (nonzero) {
+        int jac;
+        uint16_t sqr[16], negone[16];
+        mulmod256(sqr, in, in, mod);
+        uint16_to_signed30(&x, sqr);
+        /* Compute jacobi symbol of in^2, which must be 1 (or uncomputable). */
+        jac = secp256k1_jacobi32_maybe_var(&x, &m);
+        CHECK(jac == 0 || jac == 1);
+        /* Then compute the jacobi symbol of -(in^2). x and -x have opposite
+         * jacobi symbols if and only if (mod % 4) == 3. */
+        negone[0] = mod[0] - 1;
+        for (i = 1; i < 16; ++i) negone[i] = mod[i];
+        mulmod256(sqr, sqr, negone, mod);
+        uint16_to_signed30(&x, sqr);
+        jac = secp256k1_jacobi32_maybe_var(&x, &m);
+        CHECK(jac == 0 || jac == 1 - (mod[0] & 2));
+    }
+
+    uint16_to_signed30(&x, in);
+    mutate_sign_signed30(&m.modulus);
     for (vartime = 0; vartime < 2; ++vartime) {
         /* compute inverse */
         (vartime ? secp256k1_modinv32_var : secp256k1_modinv32)(&x, &m);
@@ -1095,12 +1115,32 @@ static void test_modinv64_uint16(uint16_t* out, const uint16_t* in, const uint16
     uint16_to_signed62(&x, in);
     nonzero = (x.v[0] | x.v[1] | x.v[2] | x.v[3] | x.v[4]) != 0;
     uint16_to_signed62(&m.modulus, mod);
-    mutate_sign_signed62(&m.modulus);
 
     /* compute 1/modulus mod 2^62 */
     m.modulus_inv62 = modinv2p64(m.modulus.v[0]) & M62;
     CHECK(((m.modulus_inv62 * m.modulus.v[0]) & M62) == 1);
 
+    /* Test secp256k1_jacobi64_maybe_var. */
+    if (nonzero) {
+        int jac;
+        uint16_t sqr[16], negone[16];
+        mulmod256(sqr, in, in, mod);
+        uint16_to_signed62(&x, sqr);
+        /* Compute jacobi symbol of in^2, which must be 1 (or uncomputable). */
+        jac = secp256k1_jacobi64_maybe_var(&x, &m);
+        CHECK(jac == 0 || jac == 1);
+        /* Then compute the jacobi symbol of -(in^2). x and -x have opposite
+         * jacobi symbols if and only if (mod % 4) == 3. */
+        negone[0] = mod[0] - 1;
+        for (i = 1; i < 16; ++i) negone[i] = mod[i];
+        mulmod256(sqr, sqr, negone, mod);
+        uint16_to_signed62(&x, sqr);
+        jac = secp256k1_jacobi64_maybe_var(&x, &m);
+        CHECK(jac == 0 || jac == 1 - (mod[0] & 2));
+    }
+
+    uint16_to_signed62(&x, in);
+    mutate_sign_signed62(&m.modulus);
     for (vartime = 0; vartime < 2; ++vartime) {
         /* compute inverse */
         (vartime ? secp256k1_modinv64_var : secp256k1_modinv64)(&x, &m);
@@ -2022,30 +2062,56 @@ static void run_int128_test_case(void) {
         }
         CHECK(secp256k1_i128_eq_var(&swa, &swz) == expect);
     }
-    /* test secp256k1_i128_check_pow2 */
+    /* test secp256k1_i128_check_pow2 (sign == 1) */
     {
         int expect = (uc & 1);
         int pos = ub % 127;
         if (expect) {
-            /* If expect==1, set swz to exactly (2 << pos). */
+            /* If expect==1, set swz to exactly 2^pos. */
             uint64_t hi = 0;
             uint64_t lo = 0;
-            if (pos & 64) {
+            if (pos >= 64) {
                 hi = (((uint64_t)1) << (pos & 63));
             } else {
                 lo = (((uint64_t)1) << (pos & 63));
             }
             secp256k1_i128_load(&swz, hi, lo);
         } else {
-            /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal (2 << pos). */
-            if (pos & 64) {
+            /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal 2^pos. */
+            if (pos >= 64) {
                 if ((v[1] == (((uint64_t)1) << (pos & 63))) && v[0] == 0) expect = 1;
             } else {
                 if ((v[0] == (((uint64_t)1) << (pos & 63))) && v[1] == 0) expect = 1;
             }
             swz = swa;
         }
-        CHECK(secp256k1_i128_check_pow2(&swz, pos) == expect);
+        CHECK(secp256k1_i128_check_pow2(&swz, pos, 1) == expect);
+    }
+    /* test secp256k1_i128_check_pow2 (sign == -1) */
+    {
+        int expect = (uc & 1);
+        int pos = ub % 127;
+        if (expect) {
+            /* If expect==1, set swz to exactly -2^pos. */
+            uint64_t hi = ~(uint64_t)0;
+            uint64_t lo = ~(uint64_t)0;
+            if (pos >= 64) {
+                hi <<= (pos & 63);
+                lo = 0;
+            } else {
+                lo <<= (pos & 63);
+            }
+            secp256k1_i128_load(&swz, hi, lo);
+        } else {
+            /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal -2^pos. */
+            if (pos >= 64) {
+                if ((v[1] == ((~(uint64_t)0) << (pos & 63))) && v[0] == 0) expect = 1;
+            } else {
+                if ((v[0] == ((~(uint64_t)0) << (pos & 63))) && v[1] == ~(uint64_t)0) expect = 1;
+            }
+            swz = swa;
+        }
+        CHECK(secp256k1_i128_check_pow2(&swz, pos, -1) == expect);
     }
 }
 
@@ -3233,8 +3299,10 @@ static void run_sqrt(void) {
         for (j = 0; j < COUNT; j++) {
             random_fe(&x);
             secp256k1_fe_sqr(&s, &x);
+            CHECK(secp256k1_fe_is_square_var(&s));
             test_sqrt(&s, &x);
             secp256k1_fe_negate(&t, &s, 1);
+            CHECK(!secp256k1_fe_is_square_var(&t));
             test_sqrt(&t, NULL);
             secp256k1_fe_mul(&t, &s, &ns);
             test_sqrt(&t, NULL);