-
-
Notifications
You must be signed in to change notification settings - Fork 62
Closed
Labels
Description
Functions like the multiplication by non-residue that return a result out-of-place
constantine/constantine/tower_field_extensions/tower_instantiation.nim
Lines 150 to 172 in d12d5fa
| func `*`*(_: typedesc[ξ], a: Fp2): Fp2 {.inline, noInit.} = | |
| ## Multiply an element of 𝔽p2 by the quadratic and cubic non-residue | |
| ## chosen to construct 𝔽p4/𝔽p6 | |
| # Yet another const tuple unpacking bug | |
| const u = Fp2.C.get_CNR_Fp2()[0] # Quadratic & Cubic non-residue to construct 𝔽p4/𝔽p6 | |
| const v = Fp2.C.get_CNR_Fp2()[1] | |
| const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2 | |
| # ξ = u + v x | |
| # and x² = β | |
| # | |
| # (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x² | |
| # => u c0 + β v c1 + (v c0 + u c1) x | |
| # TODO: check code generated when ξ = 1 + 𝑖 | |
| # The mul by constant are inline but | |
| # since we don't have __restrict tag | |
| # and we use arrays (which decay into pointer) | |
| # the compiler might not elide the temporary | |
| when a.fromComplexExtension(): | |
| result.c0.diff(u * a.c0, v * a.c1) | |
| else: | |
| result.c0.sum(u * a.c0, (Beta * v) * a.c1) | |
| result.c1.sum(v * a.c0, u * a.c1 ) |
Leads to a strange codegen with lots of vector moves for seemingly no reason:
For example cyclotomic square
static N_INLINE(tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw, star___o52jw9bogfvItM7F9a68XWlgtower_instantiation)(tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw* a) {
tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw result;
tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T1_;
tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T2_;
tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T3_;
tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T4_;
T1_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c0));
T2_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c1));
diff__Z7a6tu9cQAhfKCAfcix2P3wfinite_fields((&result.c0), (&T1_), (&T2_));
T3_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c0));
T4_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c1));
sum__Z7a6tu9cQAhfKCAfcix2P3w_2finite_fields((&result.c1), (&T3_), (&T4_));
return result;
}
static N_INLINE(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw, star___OcuTitixyJms2mXbB9aCuPAtower_instantiation)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* a) {
tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw result;
result.c0 = star___o52jw9bogfvItM7F9a68XWlgtower_instantiation((&(*a).c1));
result.c1 = (*a).c0;
return result;
}
static N_INLINE(void, stareq___6OjlbVANr8u6scRjSEiGYgtower_instantiation)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* a) {
(*a) = star___OcuTitixyJms2mXbB9aCuPAtower_instantiation((&(*a)));
}
static N_INLINE(void, conj__EqP0FRP809b11RLAQ9aKjS3g_2quadratic_extensions)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* r, tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw a) {
(*r).c0 = a.c0;
neg__x7zdGkRtGC9aRJX1AWI4vLQ_2((&(*r).c1), (&a.c1));
}
static N_INLINE(void, conjneg__EqP0FRP809b11RLAQ9aKjS3g_3quadratic_extensions)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* r, tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw a) {
neg__x7zdGkRtGC9aRJX1AWI4vLQ_2((&(*r).c0), (&a.c0));
(*r).c1 = a.c1;
}
N_LIB_PRIVATE N_NIMCALL(void, cyclotomic_square__by9bcdzqxL9cY5C41np9cjq4A)(tyObject_Fp12__QVujeBSVHotGpxbWmrGv8g* r, tyObject_Fp12__QVujeBSVHotGpxbWmrGv8g* a) {
tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw A;
tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw B;
tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw C;
tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw D;
A = (*a).c0;
square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&(*r).c0), (*a).c0);
double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&(*r).c0));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c0), (&D));
conjneg__I9boL26O9beHJCpZO8IIfU2A_2quadratic_extensions((&A));
double__JTO6FynOLekzb0lIHQZ4cA((&A));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c0), (&A));
square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&B), (*a).c2);
stareq___6OjlbVANr8u6scRjSEiGYgtower_instantiation((&B));
double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&B));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&B), (&D));
conj__EqP0FRP809b11RLAQ9aKjS3g_2quadratic_extensions((&(*r).c1), (*a).c1);
double__JTO6FynOLekzb0lIHQZ4cA((&(*r).c1));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c1), (&B));
square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&C), (*a).c1);
double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&C));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&C), (&D));
conjneg__EqP0FRP809b11RLAQ9aKjS3g_3quadratic_extensions((&(*r).c2), (*a).c2);
double__JTO6FynOLekzb0lIHQZ4cA((&(*r).c2));
pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c2), (&C));
}Reactions are currently unavailable
