fix egnn + equivariance/invariance remaining tests

dario-coscia · dario-coscia · commit a3e7f9f45a1d · 2025-06-01T13:42:58.000+02:00
diff --git a/pina/model/block/message_passing/en_equivariant_network_block.py b/pina/model/block/message_passing/en_equivariant_network_block.py
@@ -8,34 +8,6 @@
 
 
 class EnEquivariantNetworkBlock(MessagePassing):
-    """
-    Implementation of the E(n) Equivariant Graph Neural Network block.
-
-    This block is used to perform message-passing between nodes and edges in a
-    graph neural network, following the scheme proposed by Satorras et al. in
-    2021. It serves as an inner block in a larger graph neural network
-    architecture.
-
-    The message between two nodes connected by an edge is computed by applying a
-    linear transformation to the sender node features and the edge features,
-    together with the squared euclidean distance between the sender and
-    recipient node positions, followed by a non-linear activation function.
-    Messages are then aggregated using an aggregation scheme (e.g., sum, mean,
-    min, max, or product).
-
-    The update step is performed by applying another MLP to the concatenation of
-    the incoming messages and the node features. Here, also the node
-    positions are updated by adding the incoming messages divided by the
-    degree of the recipient node.
-
-    .. seealso::
-
-        **Original reference** Satorras, V. G., Hoogeboom, E., Welling, M.
-        (2021). *E(n) Equivariant Graph Neural Networks.*
-        In International Conference on Machine Learning.
-        DOI: `<https://doi.org/10.48550/arXiv.2102.09844>`_.
-    """
-
     def __init__(
         self,
         node_feature_dim,
@@ -49,50 +21,15 @@ def __init__(
         node_dim=-2,
         flow="source_to_target",
     ):
-        """
-        Initialization of the :class:`EnEquivariantNetworkBlock` class.
-
-        :param int node_feature_dim: The dimension of the node features.
-        :param int edge_feature_dim: The dimension of the edge features.
-        :param int pos_dim: The dimension of the position features.
-        :param int hidden_dim: The dimension of the hidden features.
-            Default is 64.
-        :param int n_message_layers: The number of layers in the message
-            network. Default is 2.
-        :param int n_update_layers: The number of layers in the update network.
-            Default is 2.
-        :param torch.nn.Module activation: The activation function.
-            Default is :class:`torch.nn.SiLU`.
-        :param str aggr: The aggregation scheme to use for message passing.
-            Available options are "add", "mean", "min", "max", "mul".
-            See :class:`torch_geometric.nn.MessagePassing` for more details.
-            Default is "add".
-        :param int node_dim: The axis along which to propagate. Default is -2.
-        :param str flow: The direction of message passing. Available options
-            are "source_to_target" and "target_to_source".
-            The "source_to_target" flow means that messages are sent from
-            the source node to the target node, while the "target_to_source"
-            flow means that messages are sent from the target node to the
-            source node. See :class:`torch_geometric.nn.MessagePassing` for more
-            details. Default is "source_to_target".
-        :raises AssertionError: If `node_feature_dim` is not a positive integer.
-        :raises AssertionError: If `edge_feature_dim` is a negative integer.
-        :raises AssertionError: If `pos_dim` is not a positive integer.
-        :raises AssertionError: If `hidden_dim` is not a positive integer.
-        :raises AssertionError: If `n_message_layers` is not a positive integer.
-        :raises AssertionError: If `n_update_layers` is not a positive integer.
-        """
         super().__init__(aggr=aggr, node_dim=node_dim, flow=flow)
 
-        # Check values
         check_positive_integer(node_feature_dim, strict=True)
         check_positive_integer(edge_feature_dim, strict=False)
         check_positive_integer(pos_dim, strict=True)
         check_positive_integer(hidden_dim, strict=True)
         check_positive_integer(n_message_layers, strict=True)
         check_positive_integer(n_update_layers, strict=True)
 
-        # Layer for computing the message
         self.message_net = FeedForward(
             input_dimensions=2 * node_feature_dim + edge_feature_dim + 1,
             output_dimensions=pos_dim,
@@ -101,7 +38,6 @@ def __init__(
             func=activation,
         )
 
-        # Layer for updating the node features
         self.update_feat_net = FeedForward(
             input_dimensions=node_feature_dim + pos_dim,
             output_dimensions=node_feature_dim,
@@ -110,8 +46,6 @@ def __init__(
             func=activation,
         )
 
-        # Layer for updating the node positions
-        # The output dimension is set to 1 for equivariant updates
         self.update_pos_net = FeedForward(
             input_dimensions=pos_dim,
             output_dimensions=1,
@@ -120,9 +54,6 @@ def __init__(
             func=activation,
         )
 
-        # Placeholder for the messages
-        self._m_ij = None
-
     def forward(self, x, pos, edge_index, edge_attr=None):
         """
         Forward pass of the block, triggering the message-passing routine.
@@ -158,28 +89,57 @@ def message(self, x_i, x_j, pos_i, pos_j, edge_attr):
         :return: The message to be passed.
         :rtype: torch.Tensor
         """
-        # Compute the euclidean distance between the sender and recipient nodes
         diff = pos_i - pos_j
         dist = torch.norm(diff, dim=-1, keepdim=True) ** 2
 
-        # Compute the message input
         if edge_attr is None:
             input_ = torch.cat((x_i, x_j, dist), dim=-1)
         else:
             input_ = torch.cat((x_i, x_j, dist, edge_attr), dim=-1)
 
-        # Compute the messages and save them for feature update
-        self._m_ij = self.message_net(input_)
+        m_ij = self.message_net(input_)  # message features
+        message = diff * self.update_pos_net(m_ij)  # equivariant message
 
-        # Rescale the message by the euclidean distance
-        return diff * self.update_pos_net(self._m_ij)
+        return message, m_ij
 
-    def update(self, message, x, pos, edge_index):
+    def aggregate(self, inputs, index, ptr=None, dim_size=None):
+        """
+        Aggregate the messages at the nodes during message passing.
+
+        This method receives a tuple of tensors corresponding to the messages 
+        to be aggregated. Both messages are aggregated separately according to 
+        the specified aggregation scheme.
+
+        :param tuple(torch.Tensor) inputs: Tuple containing two messages to 
+            aggregate.
+        :param torch.Tensor | LabelTensor index: The indices of target nodes 
+            for each message. This tensor specifies which node each message 
+            is aggregated into.
+        :param torch.Tensor | LabelTensor ptr: Optional tensor to specify 
+            the slices of messages for each node (used in some aggregation 
+            strategies).
+        :param int dim_size: Optional size of the output dimension, i.e., 
+            number of nodes.
+        :return: Tuple of aggregated tensors corresponding to
+            (aggregated messages for position updates, aggregated messages for
+            feature updates).
+        :rtype: tuple(torch.Tensor, torch.Tensor)
+        """
+        # inputs is tuple (message, m_ij), we want to aggregate separately
+        message, m_ij = inputs
+
+        # Aggregate messages as usual using self.aggr method
+        agg_message = super().aggregate(message, index, ptr, dim_size)
+        agg_m_ij = super().aggregate(m_ij, index, ptr, dim_size)
+
+        return agg_message, agg_m_ij
+
+    def update(self, aggregated_inputs, x, pos, edge_index):
         """
         Update the node features and the node coordinates with the received
         messages.
 
-        :param torch.Tensor message: The message to be passed.
+        :param tuple(torch.Tensor) aggregated_inputs: The messages to be passed.
         :param x: The node features.
         :type x: torch.Tensor | LabelTensor
         :param pos: The euclidean coordinates of the nodes.
@@ -188,14 +148,14 @@ def update(self, message, x, pos, edge_index):
         :return: The updated node features and node positions.
         :rtype: tuple(torch.Tensor, torch.Tensor)
         """
-        # Sum the incoming messages for each node (m_i = sum_j m_ij)
-        m_sum = torch.zeros(x.size(0), self._m_ij.shape[-1], device=x.device)
-        m_sum.index_add_(0, edge_index[1], self._m_ij)
+        # aggregated_inputs is tuple (agg_message, agg_m_ij)
+        agg_message, agg_m_ij = aggregated_inputs
+
+        # Update node features with aggregated m_ij
+        x = self.update_feat_net(torch.cat((x, agg_m_ij), dim=-1))
 
-        # Update the node features
-        x = self.update_feat_net(torch.cat((x, m_sum), dim=-1))
+        # Degree for normalization of position updates
+        c = degree(edge_index[1], pos.shape[0]).unsqueeze(-1).clamp(min=1)
+        pos = pos + agg_message / c
 
-        # Update the node positions
-        c = degree(edge_index[1], pos.shape[0]).unsqueeze(-1)
-        pos = pos + message / c
         return x, pos
diff --git a/tests/test_messagepassing/test_equivariant_network_block.py b/tests/test_messagepassing/test_equivariant_network_block.py
@@ -153,12 +153,13 @@ def test_equivariance():
         n_update_layers=2,
     ).eval()
 
-    _, pos1 = model(edge_index=edge_index, x=x, pos=pos)
-    _, pos2 = model(
+    h1, pos1 = model(edge_index=edge_index, x=x, pos=pos)
+    h2, pos2 = model(
         edge_index=edge_index, x=x, pos=pos @ rotation.T + translation
     )
 
     # Transform model output
     pos1_transformed = (pos1 @ rotation.T) + translation
 
     assert torch.allclose(pos2, pos1_transformed, atol=1e-5)
+    assert torch.allclose(h1, h2, atol=1e-5)
diff --git a/tests/test_messagepassing/test_radial_field_network_block.py b/tests/test_messagepassing/test_radial_field_network_block.py
@@ -65,3 +65,28 @@ def test_backward():
     loss = torch.mean(output_)
     loss.backward()
     assert x.grad.shape == x.shape
+
+
+def test_equivariance():
+
+    # Graph to be fully connected and undirected
+    edge_index = torch.combinations(torch.arange(x.shape[0]), r=2).T
+    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
+
+    # Random rotation (det(rotation) should be 1)
+    rotation = torch.linalg.qr(torch.rand(x.shape[-1], x.shape[-1])).Q
+    if torch.det(rotation) < 0:
+        rotation[:, 0] *= -1
+
+    # Random translation
+    translation = torch.rand(1, x.shape[-1])
+
+    model = RadialFieldNetworkBlock(node_feature_dim=x.shape[1]).eval()
+
+    pos1 = model(edge_index=edge_index, x=x)
+    pos2 = model(edge_index=edge_index, x=x @ rotation.T + translation)
+
+    # Transform model output
+    pos1_transformed = (pos1 @ rotation.T) + translation
+
+    assert torch.allclose(pos2, pos1_transformed, atol=1e-5)
diff --git a/tests/test_messagepassing/test_schnet_block.py b/tests/test_messagepassing/test_schnet_block.py
@@ -71,3 +71,25 @@ def test_backward():
     loss = torch.mean(output_)
     loss.backward()
     assert x.grad.shape == x.shape
+
+
+def test_invariance():
+
+    # Graph to be fully connected and undirected
+    edge_index = torch.combinations(torch.arange(x.shape[0]), r=2).T
+    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
+
+    # Random rotation (det(rotation) should be 1)
+    rotation = torch.linalg.qr(torch.rand(pos.shape[-1], pos.shape[-1])).Q
+    if torch.det(rotation) < 0:
+        rotation[:, 0] *= -1
+
+    # Random translation
+    translation = torch.rand(1, pos.shape[-1])
+
+    model = SchnetBlock(node_feature_dim=x.shape[1]).eval()
+
+    out1 = model(edge_index=edge_index, x=x, pos=pos)
+    out2 = model(edge_index=edge_index, x=x, pos=pos @ rotation.T + translation)
+
+    assert torch.allclose(out1, out2, atol=1e-5)