PennyLaneAI · josh146 · Nov 25, 2020 · Nov 18, 2020 · Nov 18, 2020 · Nov 18, 2020
diff --git a/doc/development/plugins.rst b/doc/development/plugins.rst
@@ -466,11 +466,13 @@ where
 * :attr:`~.Operation.grad_method`: the gradient computation method; ``'A'`` for the analytic
   method, ``'F'`` for finite differences, and ``None`` if the operation may not be differentiated
 
-* :attr:`~.Operation.grad_recipe`: The gradient recipe for the analytic ``'A'`` method.
-  This is a list with one tuple per operation parameter. For parameter :math:`k`, the tuple is of
-  the form :math:`(c_k, s_k)`, resulting in a gradient recipe of
+* :attr:`~.Operation.grad_recipe`: The gradient recipe for the analytic ``'A'``
+  method. This is a tuple with one nested list per operation parameter. For
+  parameter :math:`k`, the nested list contains elements of the form
+  :math:`[c_i, a_i, s_i]` where :math:`i \in I_{k}` is the index of the term,
+  resulting in a gradient recipe of
 
-  .. math:: \frac{d}{d\phi_k}f(O(\phi_k)) = c_k\left[f(O(\phi_k+s_k))-f(O(\phi_k-s_k))\right].
+  .. math:: \frac{\partial}{\partial\phi_k}O = \sum_{i \in I_{k}} c_i * O(a_i * \phi_k+s_i).
 
   where :math:`f` is an expectation value that depends on :math:`O(\phi_k)`, an example being
 
@@ -479,8 +481,9 @@ where
   which is the simple expectation value of the operator :math:`\hat{B}` evolved via the gate
   :math:`O(\phi_k)`.
 
-  Note that if ``grad_recipe = None``, the default gradient recipe is
-  :math:`(c_k, s_k)=(1/2, \pi/2)` for every parameter.
+  Note that if ``grad_recipe = None``, the default gradient recipe containing
+  the two terms :math:`[c_0, a_0, s_0]=[1/2, 1, \pi/2]` and :math:`[c_1, a_1,
+  s_1]=[-1/2, 1, -\pi/2]` is assumed for every parameter.
 
 The user can then import this operation directly from your plugin, and use it when defining a QNode:
 

diff --git a/pennylane/operation.py b/pennylane/operation.py
@@ -65,8 +65,15 @@
     transformation on the quadrature operators.
 
 For gates that *are* supported via the analytic method, the gradient recipe
-(with multiplier :math:`c_k`, parameter shift :math:`s_k` for parameter :math:`\phi_k`)
-works as follows:
+(with multipliers :math:`c_i`, scaling factors :math:`a_i` and parameter shifts
+:math:`s_i` for parameter :math:`\phi_k` where :math:`i \in I_{k}`) works as
+follows:
+
+.. math:: \frac{\partial}{\partial\phi_k}O = \sum_{i \in I_{k}} c_i * O(a_i * \phi_k+s_i).
+
+The following specific case holds for example for qubit operations that are
+generated by one of the Pauli matrices and results in an overall positive and
+negative shift:
 
 .. math:: \frac{\partial}{\partial\phi_k}O = c_k\left[O(\phi_k+s_k)-O(\phi_k-s_k)\right].
 
@@ -613,16 +620,19 @@ def grad_method(self):
         return None if self.num_params == 0 else "F"
 
     grad_recipe = None
-    r"""list[tuple[float]] or None: Gradient recipe for the parameter-shift method.
+    r"""tuple(Union(list[list[float]], None)) or None: Gradient recipe for the
+        parameter-shift method.
 
-        This is a list with one tuple per operation parameter. For parameter
-        :math:`k`, the tuple is of the form :math:`(c_k, s_k)`, resulting in
-        a gradient recipe of
+        This is a tuple with one nested list per operation parameter. For
+        parameter :math:`k`, the nested list contains elements of the form
+        :math:`[c_i, a_i, s_i]` where :math:`i \in I_{k}` is the index of the
+        term, resulting in a gradient recipe of
 
-        .. math:: \frac{\partial}{\partial\phi_k}O = c_k\left[O(\phi_k+s_k)-O(\phi_k-s_k)\right].
+        .. math:: \frac{\partial}{\partial\phi_k}O = \sum_{i \in I_{k}} c_i * O(a_i * \phi_k+s_i).
 
-        If ``None``, the default gradient recipe
-        :math:`(c_k, s_k)=(1/2, \pi/2)` is assumed for every parameter.
+        If ``None``, the default gradient recipe containing the two terms
+        :math:`[c_0, a_0, s_0]=[1/2, 1, \pi/2]` and :math:`[c_1, a_1,
+        s_1]=[-1/2, 1, -\pi/2]` is assumed for every parameter.
     """
 
     def get_parameter_shift(self, idx):
@@ -636,16 +646,30 @@ def get_parameter_shift(self, idx):
         """
         # get the gradient recipe for this parameter
         recipe = self.grad_recipe[idx]
-        multiplier, shift = (0.5, np.pi / 2) if recipe is None else recipe
+
+        # Default values
+        multiplier = 0.5
+        a = 1
+        shift = np.pi / 2
+
+        # We set the default recipe following:
+        # ∂f(x) = c1*f(a1*x+s1) + c2*f(a2*x+s2)
+        # where we express a positive and a negative shift by default
+        default_param_shift = [[multiplier, a, shift], [-multiplier, a, -shift]]
+        param_shift = default_param_shift if recipe is None else recipe
 
         # internal multiplier in the Variable
         var_mult = self.data[idx].mult
 
-        multiplier *= var_mult
-        if var_mult != 0:
-            # zero multiplier means the shift is unimportant
-            shift /= var_mult
-        return multiplier, shift
+        for elem in param_shift:
+
+            # Update the multiplier
+            elem[0] *= var_mult
+            if var_mult != 0:
+                # Update the shift
+                # zero multiplier means the shift is unimportant
+                elem[2] /= var_mult
+        return param_shift
 
     @property
     def generator(self):
@@ -1588,16 +1612,33 @@ def heisenberg_pd(self, idx):
         """
         # get the gradient recipe for this parameter
         recipe = self.grad_recipe[idx]
-        multiplier = 0.5 if recipe is None else recipe[0]
-        shift = np.pi / 2 if recipe is None else recipe[1]
+
+        # Default values
+        multiplier = 0.5
+        a = 1
+        shift = np.pi / 2
+
+        # We set the default recipe to as follows:
+        # ∂f(x) = c1*f(a1*x+s1) + c2*f(a2*x+s2)
+        default_param_shift = [[multiplier, a, shift], [-multiplier, a, -shift]]
+        param_shift = default_param_shift if recipe is None else recipe
+
+        pd = None  # partial derivative of the transformation
 
         p = self.parameters
-        # evaluate the transform at the shifted parameter values
-        p[idx] += shift
-        U2 = self._heisenberg_rep(p)  # pylint: disable=assignment-from-none
-        p[idx] -= 2 * shift
-        U1 = self._heisenberg_rep(p)  # pylint: disable=assignment-from-none
-        return (U2 - U1) * multiplier  # partial derivative of the transformation
+
+        original_p_idx = p[idx]
+        for c, _a, s in param_shift:
+            # evaluate the transform at the shifted parameter values
+            p[idx] = _a * original_p_idx + s
+            U = self._heisenberg_rep(p)  # pylint: disable=assignment-from-none
+
+            if pd is None:
+                pd = c * U
+            else:
+                pd += c * U
+
+        return pd
 
     def heisenberg_tr(self, wires, inverse=False):
         r"""Heisenberg picture representation of the linear transformation carried

diff --git a/pennylane/ops/cv.py b/pennylane/ops/cv.py
@@ -138,7 +138,9 @@ class Squeezing(CVOperation):
     grad_method = "A"
 
     shift = 0.1
-    grad_recipe = [(0.5 / math.sinh(shift), shift), None]
+    multiplier = 0.5 / math.sinh(shift)
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]], None)
 
     @staticmethod
     def _heisenberg_rep(p):
@@ -180,7 +182,9 @@ class Displacement(CVOperation):
     grad_method = "A"
 
     shift = 0.1
-    grad_recipe = [(0.5 / shift, shift), None]
+    multiplier = 0.5 / shift
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]], None)
 
     @staticmethod
     def _heisenberg_rep(p):
@@ -278,8 +282,11 @@ class TwoModeSqueezing(CVOperation):
     par_domain = "R"
 
     grad_method = "A"
+
     shift = 0.1
-    grad_recipe = [(0.5 / math.sinh(shift), shift), None]
+    multiplier = 0.5 / math.sinh(shift)
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]], None)
 
     @staticmethod
     def _heisenberg_rep(p):
@@ -326,8 +333,11 @@ class QuadraticPhase(CVOperation):
     par_domain = "R"
 
     grad_method = "A"
+
     shift = 0.1
-    grad_recipe = [(0.5 / shift, shift)]
+    multiplier = 0.5 / shift
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]],)
 
     @staticmethod
     def _heisenberg_rep(p):
@@ -371,8 +381,11 @@ class ControlledAddition(CVOperation):
     par_domain = "R"
 
     grad_method = "A"
+
     shift = 0.1
-    grad_recipe = [(0.5 / shift, shift)]
+    multiplier = 0.5 / shift
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]],)
 
     @staticmethod
     def _heisenberg_rep(p):
@@ -417,8 +430,11 @@ class ControlledPhase(CVOperation):
     par_domain = "R"
 
     grad_method = "A"
+
     shift = 0.1
-    grad_recipe = [(0.5 / shift, shift)]
+    multiplier = 0.5 / shift
+    a = 1
+    grad_recipe = ([[multiplier, a, shift], [-multiplier, a, -shift]],)
 
     @staticmethod
     def _heisenberg_rep(p):

diff --git a/pennylane/qnodes/cv.py b/pennylane/qnodes/cv.py
@@ -181,20 +181,37 @@ def _pd_analytic(self, idx, args, kwargs, **options):
             temp_var.idx = n
             op.data[p_idx] = temp_var
 
-            multiplier, shift = op.get_parameter_shift(p_idx)
-
-            # shifted parameter values
-            shift_p1 = np.r_[args, args[idx] + shift]
-            shift_p2 = np.r_[args, args[idx] - shift]
+            param_shift = op.get_parameter_shift(p_idx)
 
             if not force_order2 and op.use_method != "B":
                 # basic parameter-shift method, for Gaussian CV gates
                 # succeeded by order-1 observables
-                # evaluate the circuit at two points with shifted parameter values
-                y2 = np.asarray(self.evaluate(shift_p1, kwargs))
-                y1 = np.asarray(self.evaluate(shift_p2, kwargs))
-                pd += (y2 - y1) * multiplier
+                # evaluate the circuit at multiple points with the linear
+                # combination of parameter values (in most cases at two points)
+                for multiplier, a, shift in param_shift:
+
+                    # shifted parameter values
+                    shift_p = np.r_[args, a * args[idx] + shift]
+
+                    term = multiplier * np.asarray(self.evaluate(shift_p, kwargs))
+                    pd += term
             else:
+                if len(param_shift) != 2:
+                    # TODO: check if more than two terms is supported
+                    raise NotImplementedError(
+                        "Taking the analytic gradient for order-2 operators is "
+                        "unsupported for {op} which contains a parameter with a "
+                        "gradient recipe of more than two terms."
+                    )
+
+                # Get the shifts and the multipliers
+                pos_multiplier, a1, pos_shift = param_shift[0]
+                neg_multiplier, a2, neg_shift = param_shift[1]
+
+                # shifted parameter values
+                shift_p1 = np.r_[args, a1 * args[idx] + pos_shift]
+                shift_p2 = np.r_[args, a2 * args[idx] + neg_shift]
+
                 # order-2 parameter-shift method, for gaussian CV gates
                 # succeeded by order-2 observables
                 # evaluate transformed observables at the original parameter point
@@ -203,7 +220,7 @@ def _pd_analytic(self, idx, args, kwargs, **options):
                 Z2 = op.heisenberg_tr(self.device.wires)
                 self._set_variables(shift_p2, kwargs)
                 Z1 = op.heisenberg_tr(self.device.wires)
-                Z = (Z2 - Z1) * multiplier  # derivative of the operation
+                Z = pos_multiplier * Z2 + neg_multiplier * Z1  # derivative of the operation
 
                 unshifted_args = np.r_[args, args[idx]]
                 self._set_variables(unshifted_args, kwargs)

diff --git a/pennylane/qnodes/qubit.py b/pennylane/qnodes/qubit.py
@@ -128,16 +128,18 @@ def _pd_analytic(self, idx, args, kwargs, **options):
             temp_var.idx = n
             op.data[p_idx] = temp_var
 
-            multiplier, shift = op.get_parameter_shift(p_idx)
+            param_shift = op.get_parameter_shift(p_idx)
 
-            # shifted parameter values
-            shift_p1 = np.r_[args, args[idx] + shift]
-            shift_p2 = np.r_[args, args[idx] - shift]
+            for multiplier, a, shift in param_shift:
 
-            # evaluate the circuit at two points with shifted parameter values
-            y2 = np.asarray(self.evaluate(shift_p1, kwargs))
-            y1 = np.asarray(self.evaluate(shift_p2, kwargs))
-            pd += (y2 - y1) * multiplier
+                # shifted parameter values
+                shift_p = np.r_[args, a * args[idx] + shift]
+
+                # evaluate the circuit at point with shifted parameter values
+                y = np.asarray(self.evaluate(shift_p, kwargs))
+
+                # add the contribution to the partial derivative
+                pd += multiplier * y
 
             # restore the original parameter
             op.data[p_idx] = orig