Skip to content

Out-of-place functions lead to bad codegen #145

@mratsim

Description

@mratsim

Functions like the multiplication by non-residue that return a result out-of-place

func `*`*(_: typedesc[ξ], a: Fp2): Fp2 {.inline, noInit.} =
## Multiply an element of 𝔽p2 by the quadratic and cubic non-residue
## chosen to construct 𝔽p4/𝔽p6
# Yet another const tuple unpacking bug
const u = Fp2.C.get_CNR_Fp2()[0] # Quadratic & Cubic non-residue to construct 𝔽p4/𝔽p6
const v = Fp2.C.get_CNR_Fp2()[1]
const Beta {.used.} = Fp2.C.get_QNR_Fp() # Quadratic non-residue to construct 𝔽p2
# ξ = u + v x
# and x² = β
#
# (c0 + c1 x) (u + v x) => u c0 + (u c0 + u c1)x + v c1 x²
# => u c0 + β v c1 + (v c0 + u c1) x
# TODO: check code generated when ξ = 1 + 𝑖
# The mul by constant are inline but
# since we don't have __restrict tag
# and we use arrays (which decay into pointer)
# the compiler might not elide the temporary
when a.fromComplexExtension():
result.c0.diff(u * a.c0, v * a.c1)
else:
result.c0.sum(u * a.c0, (Beta * v) * a.c1)
result.c1.sum(v * a.c0, u * a.c1 )

Leads to a strange codegen with lots of vector moves for seemingly no reason:

For example cyclotomic square

image

static N_INLINE(tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw, star___o52jw9bogfvItM7F9a68XWlgtower_instantiation)(tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw* a) {
	tyObject_Fp2__MFvpv7bDjIeajDmaQmXLqw result;
	tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T1_;
	tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T2_;
	tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T3_;
	tyObject_Fp__6V3eX5xO0FJcHzFOqwm7OQ T4_;
	T1_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c0));
	T2_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c1));
	diff__Z7a6tu9cQAhfKCAfcix2P3wfinite_fields((&result.c0), (&T1_), (&T2_));
	T3_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c0));
	T4_ = star___yJ9aCkgoEpeRaoS807VHwUAfinite_fields((&(*a).c1));
	sum__Z7a6tu9cQAhfKCAfcix2P3w_2finite_fields((&result.c1), (&T3_), (&T4_));
	return result;
}
static N_INLINE(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw, star___OcuTitixyJms2mXbB9aCuPAtower_instantiation)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* a) {
	tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw result;
	result.c0 = star___o52jw9bogfvItM7F9a68XWlgtower_instantiation((&(*a).c1));
	result.c1 = (*a).c0;
	return result;
}
static N_INLINE(void, stareq___6OjlbVANr8u6scRjSEiGYgtower_instantiation)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* a) {
	(*a) = star___OcuTitixyJms2mXbB9aCuPAtower_instantiation((&(*a)));
}
static N_INLINE(void, conj__EqP0FRP809b11RLAQ9aKjS3g_2quadratic_extensions)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* r, tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw a) {
	(*r).c0 = a.c0;
	neg__x7zdGkRtGC9aRJX1AWI4vLQ_2((&(*r).c1), (&a.c1));
}
static N_INLINE(void, conjneg__EqP0FRP809b11RLAQ9aKjS3g_3quadratic_extensions)(tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw* r, tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw a) {
	neg__x7zdGkRtGC9aRJX1AWI4vLQ_2((&(*r).c0), (&a.c0));
	(*r).c1 = a.c1;
}
N_LIB_PRIVATE N_NIMCALL(void, cyclotomic_square__by9bcdzqxL9cY5C41np9cjq4A)(tyObject_Fp12__QVujeBSVHotGpxbWmrGv8g* r, tyObject_Fp12__QVujeBSVHotGpxbWmrGv8g* a) {
	tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw A;
	tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw B;
	tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw C;
	tyObject_Fp4__y9ageSyvuUFHaz5K14C7aAw D;
	A = (*a).c0;
	square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&(*r).c0), (*a).c0);
	double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&(*r).c0));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c0), (&D));
	conjneg__I9boL26O9beHJCpZO8IIfU2A_2quadratic_extensions((&A));
	double__JTO6FynOLekzb0lIHQZ4cA((&A));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c0), (&A));
	square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&B), (*a).c2);
	stareq___6OjlbVANr8u6scRjSEiGYgtower_instantiation((&B));
	double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&B));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&B), (&D));
	conj__EqP0FRP809b11RLAQ9aKjS3g_2quadratic_extensions((&(*r).c1), (*a).c1);
	double__JTO6FynOLekzb0lIHQZ4cA((&(*r).c1));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c1), (&B));
	square__EqP0FRP809b11RLAQ9aKjS3gquadratic_extensions((&C), (*a).c1);
	double__s51k9b9cjHcx6E0aaLWZMeMw((&D), (&C));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&C), (&D));
	conjneg__EqP0FRP809b11RLAQ9aKjS3g_3quadratic_extensions((&(*r).c2), (*a).c2);
	double__JTO6FynOLekzb0lIHQZ4cA((&(*r).c2));
	pluseq___eP9cp8bg6sFMQLe5QrnauTQ((&(*r).c2), (&C));
}

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions