Merge pull request #1450 from qiboteam/fix_autodiff

Fix `pytorch` gradients
qiboteam · Oct 10, 2024 · d290601 · d290601
2 parents 36ed3fe + 3f22d8d
commit d290601
Show file tree

Hide file tree

Showing 26 changed files with 411 additions and 263 deletions.
diff --git a/doc/source/code-examples/advancedexamples.rst b/doc/source/code-examples/advancedexamples.rst
@@ -289,12 +289,13 @@ The following gates support parameter setting:
 * :class:`qibo.gates.fSim`: Accepts a tuple of two parameters ``(theta, phi)``.
 * :class:`qibo.gates.GeneralizedfSim`: Accepts a tuple of two parameters
   ``(unitary, phi)``. Here ``unitary`` should be a unitary matrix given as an
-  array or ``tf.Tensor`` of shape ``(2, 2)``.
+  array or ``tf.Tensor`` of shape ``(2, 2)``. A ``torch.Tensor`` is required when using the pytorch backend.
 * :class:`qibo.gates.Unitary`: Accepts a single ``unitary`` parameter. This
-  should be an array or ``tf.Tensor`` of shape ``(2, 2)``.
+  should be an array or ``tf.Tensor`` of shape ``(2, 2)``. A ``torch.Tensor`` is required when using the pytorch backend.
 
 Note that a ``np.ndarray`` or a ``tf.Tensor`` may also be used in the place of
-a flat list. Using :meth:`qibo.models.circuit.Circuit.set_parameters` is more
+a flat list (``torch.Tensor`` is required when using the pytorch backend).
+Using :meth:`qibo.models.circuit.Circuit.set_parameters` is more
 efficient than recreating a new circuit with new parameter values. The inverse
 method :meth:`qibo.models.circuit.Circuit.get_parameters` is also available
 and returns a list, dictionary or flat list with the current parameter values
@@ -551,9 +552,9 @@ Here is a simple example using the Heisenberg XXZ model Hamiltonian:
 For more information on the available options of the ``vqe.minimize`` call we
 refer to the :ref:`Optimizers <Optimizers>` section of the documentation.
 Note that if the Stochastic Gradient Descent optimizer is used then the user
-has to use a backend based on tensorflow primitives and not the default custom
+has to use a backend based on tensorflow or pytorch primitives and not the default custom
 backend, as custom operators currently do not support automatic differentiation.
-To switch the backend one can do ``qibo.set_backend("tensorflow")``.
+To switch the backend one can do ``qibo.set_backend("tensorflow")`` or ``qibo.set_backend("pytorch")``.
 Check the :ref:`How to use automatic differentiation? <autodiff-example>`
 section for more details.
 
@@ -695,12 +696,13 @@ the model. For example the previous example would have to be modified as:
 How to use automatic differentiation?
 -------------------------------------
 
+The parameters of variational circuits can be optimized using the frameworks of
+Tensorflow or Pytorch.
+
 As a deep learning framework, Tensorflow supports
 `automatic differentiation <https://www.tensorflow.org/tutorials/customization/autodiff>`_.
-This can be used to optimize the parameters of variational circuits. For example
-the following script optimizes the parameters of two rotations so that the circuit
-output matches a target state using the fidelity as the corresponding loss
-function.
+The following script optimizes the parameters of two rotations so that the
+circuit output matches a target state using the fidelity as the corresponding loss function.
 
 Note that, as in the following example, the rotation angles have to assume real values
 to ensure the rotational gates are representing unitary operators.
@@ -777,6 +779,40 @@ that is supported by Tensorflow, such as defining
 and using the `Sequential model API <https://www.tensorflow.org/api_docs/python/tf/keras/Sequential>`_
 to train them.
 
+Similarly, Pytorch supports `automatic differentiation <https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorFor%20example%20tial.html>`_.
+The following script optimizes the parameters of the variational circuit of the first example using the Pytorch framework.
+
+.. code-block:: python
+
+    import qibo
+    qibo.set_backend("pytorch")
+    import torch
+    from qibo import gates, models
+
+    # Optimization parameters
+    nepochs = 1000
+    optimizer = torch.optim.Adam
+    target_state = torch.ones(4, dtype=torch.complex128) / 2.0
+
+    # Define circuit ansatz
+    params = torch.tensor(
+        torch.rand(2, dtype=torch.float64), requires_grad=True
+    )
+    c = models.Circuit(2)
+    c.add(gates.RX(0, params[0]))
+    c.add(gates.RY(1, params[1]))
+
+    optimizer = optimizer([params])
+
+    for _ in range(nepochs):
+        optimizer.zero_grad()
+        c.set_parameters(params)
+        final_state = c().state()
+        fidelity = torch.abs(torch.sum(torch.conj(target_state) * final_state))
+        loss = 1 - fidelity
+        loss.backward()
+        optimizer.step()
+
 
 .. _noisy-example:
 

diff --git a/src/qibo/backends/npmatrices.py b/src/qibo/backends/npmatrices.py
@@ -17,10 +17,6 @@ def __init__(self, dtype):
     def _cast(self, x, dtype):
         return self.np.array(x, dtype=dtype)
 
-    # This method is used to cast the parameters of the gates to the right type for other backends
-    def _cast_parameter(self, x):
-        return x
-
     @cached_property
     def H(self):
         return self._cast([[1, 1], [1, -1]], dtype=self.dtype) / math.sqrt(2)
@@ -66,34 +62,29 @@ def TDG(self):
         )
 
     def I(self, n=2):
-        return self._cast(self.np.eye(n), dtype=self.dtype)
+        return self.np.eye(n, dtype=self.dtype)
 
     def Align(self, delay, n=2):
-        return self._cast(self.I(n), dtype=self.dtype)
+        return self.I(n)
 
     def M(self):  # pragma: no cover
         raise_error(NotImplementedError)
 
     def RX(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         isin = -1j * self.np.sin(theta / 2.0)
         return self._cast([[cos, isin], [isin, cos]], dtype=self.dtype)
 
     def RY(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         sin = self.np.sin(theta / 2.0) + 0j
         return self._cast([[cos, -sin], [sin, cos]], dtype=self.dtype)
 
     def RZ(self, theta):
-        theta = self._cast_parameter(theta)
         phase = self.np.exp(0.5j * theta)
         return self._cast([[self.np.conj(phase), 0], [0, phase]], dtype=self.dtype)
 
     def PRX(self, theta, phi):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
         cos = self.np.cos(theta / 2)
         sin = self.np.sin(theta / 2)
         exponent1 = -1.0j * self.np.exp(-1.0j * phi)
@@ -104,25 +95,20 @@ def PRX(self, theta, phi):
         )
 
     def GPI(self, phi):
-        phi = self._cast_parameter(phi)
         phase = self.np.exp(1.0j * phi)
         return self._cast([[0, self.np.conj(phase)], [phase, 0]], dtype=self.dtype)
 
     def GPI2(self, phi):
-        phi = self._cast_parameter(phi)
         phase = self.np.exp(1.0j * phi)
         return self._cast(
             [[1, -1.0j * self.np.conj(phase)], [-1.0j * phase, 1]], dtype=self.dtype
         ) / math.sqrt(2)
 
     def U1(self, theta):
-        theta = self._cast_parameter(theta)
         phase = self.np.exp(1j * theta)
         return self._cast([[1, 0], [0, phase]], dtype=self.dtype)
 
     def U2(self, phi, lam):
-        phi = self._cast_parameter(phi)
-        lam = self._cast_parameter(lam)
         eplus = self.np.exp(1j * (phi + lam) / 2.0)
         eminus = self.np.exp(1j * (phi - lam) / 2.0)
         return self._cast(
@@ -131,9 +117,6 @@ def U2(self, phi, lam):
         ) / math.sqrt(2)
 
     def U3(self, theta, phi, lam):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
-        lam = self._cast_parameter(lam)
         cost = self.np.cos(theta / 2)
         sint = self.np.sin(theta / 2)
         eplus = self.np.exp(1j * (phi + lam) / 2.0)
@@ -147,8 +130,6 @@ def U3(self, theta, phi, lam):
         )
 
     def U1q(self, theta, phi):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
         return self._cast(
             self.U3(theta, phi - math.pi / 2, math.pi / 2 - phi), dtype=self.dtype
         )
@@ -179,12 +160,12 @@ def CZ(self):
 
     @cached_property
     def CSX(self):
-        a = self._cast_parameter((1 + 1j) / 2)
-        b = self.np.conj(a)
+        a = (1 + 1j) / 2
+        b = (1 - 1j) / 2
         return self._cast(
             [
-                [1, 0, 0, 0],
-                [0, 1, 0, 0],
+                [1 + 0j, 0, 0, 0],
+                [0, 1 + 0j, 0, 0],
                 [0, 0, a, b],
                 [0, 0, b, a],
             ],
@@ -193,20 +174,19 @@ def CSX(self):
 
     @cached_property
     def CSXDG(self):
-        a = self._cast_parameter((1 - 1j) / 2)
-        b = self.np.conj(a)
+        a = (1 + 1j) / 2
+        b = (1 - 1j) / 2
         return self._cast(
             [
-                [1, 0, 0, 0],
-                [0, 1, 0, 0],
-                [0, 0, a, b],
+                [1 + 0j, 0, 0, 0],
+                [0, 1 + 0j, 0, 0],
                 [0, 0, b, a],
+                [0, 0, a, b],
             ],
             dtype=self.dtype,
         )
 
     def CRX(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         isin = -1j * self.np.sin(theta / 2.0)
         matrix = [
@@ -218,14 +198,12 @@ def CRX(self, theta):
         return self._cast(matrix, dtype=self.dtype)
 
     def CRY(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         sin = self.np.sin(theta / 2.0) + 0j
         matrix = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, cos, -sin], [0, 0, sin, cos]]
         return self._cast(matrix, dtype=self.dtype)
 
     def CRZ(self, theta):
-        theta = self._cast_parameter(theta)
         phase = self.np.exp(0.5j * theta)
         matrix = [
             [1, 0, 0, 0],
@@ -236,7 +214,6 @@ def CRZ(self, theta):
         return self._cast(matrix, dtype=self.dtype)
 
     def CU1(self, theta):
-        theta = self._cast_parameter(theta)
         phase = self.np.exp(1j * theta)
         matrix = [
             [1, 0, 0, 0],
@@ -247,8 +224,6 @@ def CU1(self, theta):
         return self._cast(matrix, dtype=self.dtype)
 
     def CU2(self, phi, lam):
-        phi = self._cast_parameter(phi)
-        lam = self._cast_parameter(lam)
         eplus = self.np.exp(1j * (phi + lam) / 2.0) / math.sqrt(2)
         eminus = self.np.exp(1j * (phi - lam) / 2.0) / math.sqrt(2)
         matrix = [
@@ -260,9 +235,6 @@ def CU2(self, phi, lam):
         return self._cast(matrix, dtype=self.dtype)
 
     def CU3(self, theta, phi, lam):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
-        lam = self._cast_parameter(lam)
         cost = self.np.cos(theta / 2)
         sint = self.np.sin(theta / 2)
         eplus = self.np.exp(1j * (phi + lam) / 2.0)
@@ -324,8 +296,6 @@ def FSWAP(self):
         )
 
     def fSim(self, theta, phi):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
         cost = self.np.cos(theta) + 0j
         isint = -1j * self.np.sin(theta)
         phase = self.np.exp(-1j * phi)
@@ -355,7 +325,6 @@ def SYC(self):
         )
 
     def GeneralizedfSim(self, u, phi):
-        phi = self._cast_parameter(phi)
         phase = self.np.exp(-1j * phi)
         return self._cast(
             [
@@ -368,7 +337,6 @@ def GeneralizedfSim(self, u, phi):
         )
 
     def RXX(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         isin = -1j * self.np.sin(theta / 2.0)
         return self._cast(
@@ -382,7 +350,6 @@ def RXX(self, theta):
         )
 
     def RYY(self, theta):
-        theta = self._cast_parameter(theta)
         cos = self.np.cos(theta / 2.0) + 0j
         isin = -1j * self.np.sin(theta / 2.0)
         return self._cast(
@@ -396,7 +363,6 @@ def RYY(self, theta):
         )
 
     def RZZ(self, theta):
-        theta = self._cast_parameter(theta)
         phase = self.np.exp(0.5j * theta)
         return self._cast(
             [
@@ -409,7 +375,6 @@ def RZZ(self, theta):
         )
 
     def RZX(self, theta):
-        theta = self._cast_parameter(theta)
         cos, sin = self.np.cos(theta / 2) + 0j, self.np.sin(theta / 2) + 0j
         return self._cast(
             [
@@ -422,7 +387,6 @@ def RZX(self, theta):
         )
 
     def RXXYY(self, theta):
-        theta = self._cast_parameter(theta)
         cos, sin = self.np.cos(theta / 2) + 0j, self.np.sin(theta / 2) + 0j
         return self._cast(
             [
@@ -435,11 +399,6 @@ def RXXYY(self, theta):
         )
 
     def MS(self, phi0, phi1, theta):
-        phi0, phi1, theta = (
-            self._cast_parameter(phi0),
-            self._cast_parameter(phi1),
-            self._cast_parameter(theta),
-        )
         plus = self.np.exp(1.0j * (phi0 + phi1))
         minus = self.np.exp(1.0j * (phi0 - phi1))
         cos = self.np.cos(theta / 2) + 0j
@@ -455,7 +414,6 @@ def MS(self, phi0, phi1, theta):
         )
 
     def GIVENS(self, theta):
-        theta = self._cast_parameter(theta)
         return self._cast(
             [
                 [1, 0, 0, 0],
@@ -514,7 +472,6 @@ def CCZ(self):
         )
 
     def DEUTSCH(self, theta):
-        theta = self._cast_parameter(theta)
         sin = self.np.sin(theta) + 0j  # 0j necessary for right tensorflow dtype
         cos = self.np.cos(theta) + 0j
         return self._cast(
@@ -532,8 +489,6 @@ def DEUTSCH(self, theta):
         )
 
     def GeneralizedRBS(self, qubits_in, qubits_out, theta, phi):
-        theta = self._cast_parameter(theta)
-        phi = self._cast_parameter(phi)
         bitstring_length = len(qubits_in) + len(qubits_out)
         integer_in = "".join(
             ["1" if k in qubits_in else "0" for k in range(bitstring_length)]

diff --git a/src/qibo/backends/numpy.py b/src/qibo/backends/numpy.py
@@ -433,7 +433,6 @@ def execute_circuit(self, circuit, initial_state=None, nshots=1000):
                 if initial_state is None:
                     state = self.zero_density_matrix(nqubits)
                 else:
-                    # cast to proper complex type
                     state = self.cast(initial_state)
 
                 for gate in circuit.queue:
@@ -443,7 +442,6 @@ def execute_circuit(self, circuit, initial_state=None, nshots=1000):
                 if initial_state is None:
                     state = self.zero_state(nqubits)
                 else:
-                    # cast to proper complex type
                     state = self.cast(initial_state)
 
                 for gate in circuit.queue: