Create carnival experiments notebook, improve plotting function

saezlab · Aug 26, 2024 · 4cc35f9 · 4cc35f9
1 parent 48d24b2
commit 4cc35f9
Show file tree

Hide file tree

Showing 8 changed files with 8,275 additions and 2,925 deletions.
diff --git a/corneto/_plotting.py b/corneto/_plotting.py
@@ -16,15 +16,31 @@ def clip_quantiles(arr, q):
 
 def vertex_style(
     P,
-    G,  # TODO: G should not be required
+    G: Optional[BaseGraph] = None,  # TODO: G should not be required
     vertex_var: str = "vertex_values",
     negative_color: str = "dodgerblue4",
     positive_color: str = "firebrick4",
-    condition: Optional[int] = 0,
+    sample: Optional[int] = None,
 ):
     v_values = np.array(P.expr[vertex_var].value)
+    if v_values is None:
+        raise ValueError(
+            f"""Variable {vertex_var} in the problem, but values are None.
+            Has the problem been solved?"""
+        )
+    if sample is not None:
+        if len(v_values.shape) < 2:
+            raise ValueError(
+                f"""Variable {vertex_var} in the problem is not a matrix.
+                Cannot select sample {sample}"""
+            )
+        v_values = v_values[:, sample]
+    if G is None:
+        vertices = list(range(len(v_values)))
+    else:
+        vertices = G.V
     return create_graphviz_vertex_attributes(
-        G.V, v_values, negative_color=negative_color, positive_color=positive_color
+        vertices, v_values, negative_color=negative_color, positive_color=positive_color
     )
 
 
@@ -35,14 +51,21 @@ def edge_style(
     edge_var: str = "edge_values",
     negative_color: str = "dodgerblue4",
     positive_color: str = "firebrick4",
-    condition: Optional[int] = 0,
+    sample: Optional[int] = None,
 ):
     e_values = P.expr[edge_var].value
     if e_values is None:
         raise ValueError(
             f"""Variable {edge_var} in the problem, but values are None.
             Has the problem been solved?"""
         )
+    if sample is not None:
+        if len(e_values.shape) < 2:
+            raise ValueError(
+                f"""Variable {edge_var} in the problem is not a matrix.
+                Cannot select sample {sample}"""
+            )
+        e_values = e_values[:, sample]
     return create_graphviz_edge_attributes(
         e_values,
         max_edge_width=max_edge_width,
@@ -187,7 +210,7 @@ def to_graphviz(
     node_attr: Optional[Dict[str, str]] = None,
     edge_attr: Optional[Dict[str, str]] = None,
     custom_edge_attr: Optional[Dict[int, Dict[str, str]]] = None,
-    custom_vertex_attr: Optional[Dict[str, Dict[str, str]]] = None,
+    custom_vertex_attr: Optional[Dict[Union[int, str], Dict[str, str]]] = None,
     layout: str = "dot",
     orphan_edges: bool = True,
 ) -> Any:
@@ -198,6 +221,14 @@ def to_graphviz(
         custom_edge_attr = {}
     if custom_vertex_attr is None:
         custom_vertex_attr = {}
+    if len(custom_vertex_attr) > 0:
+        keys = list(custom_vertex_attr.keys())
+        # Check if all the keys are integers
+        if all(isinstance(k, int) for k in keys):
+            vertices = graph.V
+            custom_vertex_attr = {
+                str(v): custom_vertex_attr[i] for i, v in enumerate(vertices)
+            }
     if node_attr is None:
         node_attr = dict(fixedsize="true")
     g = graphviz.Digraph(

diff --git a/corneto/methods/carnival.py b/corneto/methods/carnival.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 import corneto as cn
-from corneto._graph import Graph
+from corneto._graph import BaseGraph, Graph
 from corneto._settings import LOGGER
 from corneto.methods.signal._util import (
     get_incidence_matrices_of_edges,
@@ -73,6 +73,21 @@ def read_dataset(zip_path):
     return G, data_dict
 
 
+def preprocess_graph(
+    priorKnowledgeNetwork: BaseGraph, perturbations: Dict, measurements: Dict
+):
+    V = set(priorKnowledgeNetwork.vertices)
+    inputs = set(perturbations.keys())
+    outputs = set(measurements.keys())
+    c_inputs = V.intersection(inputs)
+    c_outputs = V.intersection(outputs)
+    Gp = priorKnowledgeNetwork.prune(list(c_inputs), list(c_outputs))
+    V = set(Gp.vertices)
+    cp_inputs = {input: v for input, v in perturbations.items() if input in V}
+    cp_outputs = {output: v for output, v in measurements.items() if output in V}
+    return Gp, cp_inputs, cp_outputs
+
+
 # TODO: Return a problem, which is associated to the carnival graph
 # think about connecting edge/nodes with variables!
 def runVanillaCarnival(
@@ -735,3 +750,192 @@ def runCARNIVAL_Flow_Acyclic_Signal(
 
     P.solve(solver=solver, verbosity=verbosity)
     return P
+
+
+def milp_carnival(
+    G,
+    perturbations,
+    measurements,
+    beta_weight: float = 0.2,
+    max_dist=None,
+    penalize="edges",  # nodes, edges, both
+    use_perturbation_weights=False,
+    interaction_graph_attribute="interaction",
+    disable_acyclicity=False,
+    backend=cn.DEFAULT_BACKEND,
+):
+    """Improved port of the original Carnival R method.
+
+    This implementation uses the CORNETO backend capabilities to create a MILP problem.
+    However, it does not use the flow formulation and multi-sample capabilities of the
+    novel method implemented in CORNETO. This method is kept for compatibility with the
+    original Carnival R method and for comparison purposes.
+
+    NOTE: Since the method is decoupled from specific solvers, the default pool of
+    solutions generated using CPLEX is not available.
+
+    Args:
+        G: The graph object representing the network.
+        perturbations: A dictionary of perturbations applied to specific vertices in the graph.
+        measurements: A dictionary of measured values for specific vertices in the graph.
+        beta_weight: The weight for the regularization term in the objective function.
+        max_dist: The maximum distance allowed for vertex positions in the graph.
+        penalize: The type of regularization to apply ('nodes', 'edges', or 'both').
+        use_perturbation_weights: Whether to use perturbation weights in the objective function.
+        interaction_graph_attribute: The attribute name for the interaction type in the graph.
+        disable_acyclicity: Whether to disable the acyclicity constraint in the optimization.
+        backend: The backend engine to use for the optimization.
+
+    Returns:
+        The optimization problem object.
+    """
+    max_dist = G.num_vertices if max_dist is None else max_dist
+
+    # The problem uses 2*|V| + 2*|E| binary variables + |V| continuous variables
+    V_act = backend.Variable(
+        "vertex_activated", shape=(len(G.V),), vartype=cn.VarType.BINARY
+    )
+    V_inh = backend.Variable(
+        "vertex_inhibited", shape=(len(G.V),), vartype=cn.VarType.BINARY
+    )
+    E_act = backend.Variable(
+        "edge_activating", shape=(len(G.E),), vartype=cn.VarType.BINARY
+    )
+    E_inh = backend.Variable(
+        "edge_inhibiting", shape=(len(G.E),), vartype=cn.VarType.BINARY
+    )
+    V_pos = backend.Variable(
+        "vertex_position",
+        shape=(len(G.V),),
+        lb=0,
+        ub=max_dist,
+        vartype=cn.VarType.CONTINUOUS,
+    )
+
+    V_index = {v: i for i, v in enumerate(G.V)}
+
+    P = backend.Problem()
+
+    # A vertex can be activated or inhibited, but not both
+    P += V_act + V_inh <= 1
+    # An edge can activate or inhibit, but not both
+    P += E_act + E_inh <= 1
+
+    for i, (s, t) in enumerate(G.E):
+        s = list(s)
+        t = list(t)
+        if len(s) == 0:
+            continue
+        if len(s) > 1:
+            raise ValueError("Only one source vertex allowed")
+        if len(t) > 1:
+            raise ValueError("Only one target vertex allowed")
+        s = s[0]
+        t = t[0]
+        # An edge can activate its downstream (target vertex) (E_act=1, E_inh=0),
+        # inhibit it (E_act=0, E_inh=1), or do nothing (E_act=0, E_inh=0)
+        si = V_index[s]
+        ti = V_index[t]
+        interaction = int(G.get_attr_edge(i).get(interaction_graph_attribute))
+        # If edge interaction type is activatory, it can only activate the downstream
+        # vertex if the source vertex is activated
+        # NOTE: The 4 constraints can be merged by 2, but kept like this for clarity
+        # This implements the basics of the sign consistency rules
+        if interaction == 1:
+            # Edge is activatory: E_act can only be 1 if V_act[source] is 1
+            # edge (s->t) can activate t only if s is activated
+            P += E_act[i] <= V_act[si]
+            # edge (s->t) can inhibit t only if s is inhibited
+            P += E_inh[i] <= V_inh[si]
+        elif interaction == -1:
+            # edge (s-|t) can activate t only if s is inhibited
+            P += E_act[i] <= V_inh[si]
+            # edge (s-|t) can inhibit t only if s is activated
+            P += E_inh[i] <= V_act[si]
+        else:
+            raise ValueError(f"Invalid interaction value for edge {i}: {interaction}")
+
+        # If the edge is selected, then we must respect the order of the vertices:
+        # V_pos[target] - V_pos[source] >= 1
+        # E.g., if a partial solution is A -> B -> C, and the ordering assigned is
+        # A(0) -> B(1) -> C(2), we cannot select an edge C -> A since 2 > 0
+        # The maximum numbering possible, starting with 0, cannot exceed the
+        # number of vertices of the graph.
+        # Note that there are two possible orderings: or target vertex is greater
+        # than source (then edge can be selected), or less or equal to 0
+        # (in which case the edge cannot be selected).
+        # The acyclicity constraint is reduced to this simple constraint:
+        # - if edge selected, then target vertex must be greater than source (diff >= 1)
+        # - if edge not selected, then the diff. does not matter (we can assign any value)
+        # IMPORTANT: acyclicity can be disabled, but then self activatory loops that are
+        # not downstream the perturbations can appear in the solution.
+        if not disable_acyclicity:
+            edge_selected = E_act[i] + E_inh[i]
+            P += V_pos[ti] - V_pos[si] >= 1 - max_dist * (1 - edge_selected)
+
+    # Now compute the value of each vertex, based on the incoming selected edges
+    # NOTE: Here we force that a vertex can have at most 1 incoming edge but this
+    # could be relaxed (e.g. allow many inputs and integrate signal).
+    for v in G.V:
+        in_edges_idx = [i for i, _ in G.in_edges(v)]
+        i = V_index[v]
+        perturbed_value = 0
+        perturbed = v in perturbations
+        if perturbed:
+            perturbed_value = np.sign(perturbations[v])
+        in_edges_selected = [E_act[i] + E_inh[i] for i in in_edges_idx]
+        if len(in_edges_idx) > 0:
+            P += sum(in_edges_selected) <= 1
+        # And the value of the target vertex equals the value of the selected edge
+        # If no edge is selected, then the value is 0]
+        incoming_activating = (
+            sum(E_act[j] for j in in_edges_idx) if len(in_edges_idx) > 0 else 0
+        )
+        incoming_inhibiting = (
+            sum(E_inh[j] for j in in_edges_idx) if len(in_edges_idx) > 0 else 0
+        )
+        P += V_act[i] <= int(perturbed) + incoming_activating
+        P += V_inh[i] <= int(perturbed) + incoming_inhibiting
+        # If perturbed but value is 0, then perturbation can take any value,
+        # otherwise it must be the same as the perturbation
+        if perturbed_value > 0:
+            P += V_act[i] == 1
+            P += V_inh[i] == 0
+        elif perturbed_value < 0:
+            P += V_act[i] == 0
+            P += V_inh[i] == 1
+
+    # Finally, the objective function is to minimise the difference between
+    # the predicted values and the measurements
+
+    data = measurements.copy()
+    if use_perturbation_weights:
+        data.update(perturbations)
+
+    error_terms = []
+    for k, v in data.items():
+        i = V_index[k]
+        prediction = V_act[i] - V_inh[i]  # -1, 0, 1
+        sign = np.sign(v)
+        if sign > 0:
+            error_terms.append(np.abs(v) * (sign - prediction))
+        elif sign < 0:
+            error_terms.append(np.abs(v) * (prediction - sign))
+    obj = sum(error_terms)
+    reg = 0
+    P.add_objectives(obj)
+    if beta_weight > 0:
+        if penalize == "nodes":
+            reg = sum(V_act) + sum(V_inh)
+        elif penalize == "edges":
+            reg = sum(E_act) + sum(E_inh)
+        elif penalize == "both":
+            # This is the default implemented in CarnivalR,
+            # although regularization by edges should be preferred
+            reg = sum(V_act) + sum(V_inh) + sum(E_act) + sum(E_inh)
+        P.add_objectives(reg, weights=beta_weight)
+
+    # Finally, register some aliases for convenience
+    P.register("vertex_values", V_act - V_inh)
+    P.register("edge_values", E_act - E_inh)
+    return P
diff --git a/corneto/methods/method.py b/corneto/methods/method.py
@@ -0,0 +1,23 @@
+from abc import ABC, abstractmethod
+
+from corneto import DEFAULT_BACKEND
+from corneto._graph import BaseGraph
+
+
+class CornetoMethod(ABC):
+    def __init__(self, backend=DEFAULT_BACKEND):
+        self._backend = backend
+        self._annotated_flow_graph = None
+        self.problem = None
+
+    @abstractmethod
+    def create_problem(self):
+        raise NotImplementedError
+
+    @abstractmethod
+    def map_data(self, graph: BaseGraph, data) -> BaseGraph:
+        raise NotImplementedError
+
+    @abstractmethod
+    def transform_graph(self, graph: BaseGraph) -> BaseGraph:
+        raise NotImplementedError
diff --git a/corneto/methods/shortest_path.py b/corneto/methods/shortest_path.py
@@ -7,6 +7,36 @@
 from corneto.backend import DEFAULT_BACKEND, Backend
 from corneto.backend._base import DEFAULT_UB, Indicator
 
+"""
+class ShortestPath(CornetoMethod):
+    def __init__(self, backend=DEFAULT_BACKEND):
+        super().__init__(backend=backend)
+
+    def create_problem(self, source_target_pairs: list, lambd: float = 0.0):
+        P = self._backend.Flow(Gc, lb=0, ub=DEFAULT_UB, n_flows=len(source_target_nodes))
+        # Now we add the objective and constraints for each sample
+        for i, (s, t) in enumerate(source_target_pairs):
+            weights = edge_weights[i, :]
+            P.add_objectives(P.expr.flow[:, i] @ weights)
+            # Now we inject/extract 1 unit flow from s to t
+            P += P.expr.flow[inflow_edges[s]] == 1
+            P += P.expr.flow[outflow_edges[t]] == 1
+            # For the rest of inflow/outflow edges, we set the flow to 0
+            for node in inflow_edges:
+                if node != s:
+                    P += P.expr.flow[inflow_edges[node]] == 0
+            for node in outflow_edges:
+                if node != t:
+                    P += P.expr.flow[outflow_edges[node]] == 0
+        # Add reg
+        if lambd > 0:
+            P += self._backend.linear_or(
+                P.expr.flow, axis=1, ignore_type=True, varname="active_edge"
+            )
+            P.add_objectives(sum(P.expr.active_edge), weights=lambd)
+
+"""
+
 
 def create_multisample_shortest_path(
     G: BaseGraph,