Merge pull request google#4705 from IvyZX:einsum

Flax Authors · Flax Authors · commit 63106905630d · 2025-04-15T10:14:25.000-07:00
PiperOrigin-RevId: 747923709
diff --git a/flax/nnx/nn/linear.py b/flax/nnx/nn/linear.py
@@ -36,6 +36,7 @@
   PaddingLike,
   LaxPadding,
   PromoteDtypeFn,
+  EinsumT,
 )
 
 Array = jax.Array
@@ -426,6 +427,8 @@ class Einsum(Module):
       dtype. The function should accept a tuple of ``(inputs, kernel, bias)``
       and a ``dtype`` keyword argument, and return a tuple of arrays with the
       promoted dtype.
+    einsum_op: An injectable alternative of `jnp.einsum` to do the computation.
+      Should support same signature as `jnp.einsum`.
     rngs: rng key.
   """
 
@@ -441,6 +444,7 @@ def __init__(
     kernel_init: Initializer = default_kernel_init,
     bias_init: Initializer = default_bias_init,
     promote_dtype: PromoteDtypeFn = dtypes.promote_dtype,
+    einsum_op: EinsumT = jnp.einsum,
     rngs: rnglib.Rngs,
   ):
     einsum_str = einsum_str.replace(' ', '')
@@ -465,6 +469,7 @@ def __init__(
     self.kernel_init = kernel_init
     self.bias_init = bias_init
     self.promote_dtype = promote_dtype
+    self.einsum_op = einsum_op
 
   def __call__(
     self, inputs: Array, einsum_str: tp.Optional[str] = None
@@ -500,7 +505,7 @@ def __call__(
       dtype=self.dtype,
     )
 
-    y = jnp.einsum(einsum_str, inputs, kernel, precision=self.precision)
+    y = self.einsum_op(einsum_str, inputs, kernel, precision=self.precision)
 
     if bias is not None:
       broadcasted_bias_shape = self._infer_broadcasted_bias_shape(
diff --git a/flax/typing.py b/flax/typing.py
@@ -68,6 +68,7 @@ def is_key_like(x: Any) -> TypeGuard[Key]:
 ]
 DotGeneralT = Callable[..., Array]
 ConvGeneralDilatedT = Callable[..., Array]
+EinsumT = Callable[..., Array]
 
 PaddingLike = Union[str, int, Sequence[Union[int, tuple[int, int]]]]
 LaxPadding = Union[str, Sequence[tuple[int, int]]]
diff --git a/tests/nnx/nn/linear_test.py b/tests/nnx/nn/linear_test.py
@@ -154,6 +154,15 @@ def test_nnx_einsum_equivalence(
     assert isinstance(out, jax.Array)
     np.testing.assert_array_equal(out, out_nnx)
 
+  def test_einsum_op(self):
+    def custom_einsum(*args, **kwargs):
+      out = jnp.einsum(*args, **kwargs)
+      return out.reshape((1, *out.shape))
+    model = nnx.Einsum('ab,bc->ac', (3, 4), einsum_op=custom_einsum,
+                       rngs=nnx.Rngs(42))
+    y = model(jnp.ones((2, 3)))
+    assert y.shape == (1, 2, 4)
+
 
 if __name__ == '__main__':
   absltest.main()

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ def is_key_like(x: Any) -> TypeGuard[Key]:`
`68`	`68`	`]`
`69`	`69`	`DotGeneralT = Callable[..., Array]`
`70`	`70`	`ConvGeneralDilatedT = Callable[..., Array]`
	`71`	`+EinsumT = Callable[..., Array]`
`71`	`72`
`72`	`73`	`PaddingLike = Union[str, int, Sequence[Union[int, tuple[int, int]]]]`
`73`	`74`	`LaxPadding = Union[str, Sequence[tuple[int, int]]]`