Enable flagser_count capabilities from C++ flagser and have two modul…

…es to handle coeff = or != 2 (#37) * Propagate flagser count_cells binding fix * Add flagser_count bindings * Update CMakeLists for compilation of new flagser count target * Add flagser_count functions * Add tests for flagser_count functions * Remove unsupported arguments * Remove OpenMP and add Threads and add flagser_coeff * Document and add flagser coeff * Update for new pybind11 modules * Improve and document flagser-count bindings * Propagate flagser bug fix * Remove Threads as a C++ dependency * Reshape dgms output of flagser_weight * Update directed docstring Signed-off-by: Guillaume Tauzin <[email protected]> Co-authored-by: Umberto Lupo <[email protected]>
giotto-ai · Jun 11, 2020 · a531569 · a531569
1 parent f6e59c5
commit a531569
Show file tree

Hide file tree

Showing 10 changed files with 385 additions and 97 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -7,14 +7,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pybind11)
 
 set(BINDINGS_DIR "src")
 
-find_package(OpenMP)
-
+# flagser
 pybind11_add_module(flagser_pybind "${BINDINGS_DIR}/flagser_bindings.cpp")
 
-if(OpenMP_FOUND)
-  target_link_libraries(flagser_pybind PRIVATE OpenMP::OpenMP_CXX)
-endif()
-
 target_compile_definitions(flagser_pybind PRIVATE RETRIEVE_PERSISTENCE=1)
 target_include_directories(flagser_pybind PRIVATE .)
 
@@ -25,3 +20,31 @@ else()
     target_compile_options(flagser_pybind PUBLIC $<$<CONFIG:RELEASE>: -Ofast>)
     target_compile_options(flagser_pybind PUBLIC $<$<CONFIG:DEBUG>: -O2 -ggdb -D_GLIBCXX_DEBUG>)
 endif()
+
+# flagser with USE_COEFFICIENTS
+pybind11_add_module(flagser_coeff_pybind "${BINDINGS_DIR}/flagser_bindings.cpp")
+
+target_compile_definitions(flagser_coeff_pybind PRIVATE RETRIEVE_PERSISTENCE=1 USE_COEFFICIENTS=1)
+target_include_directories(flagser_coeff_pybind PRIVATE .)
+
+if(MSVC)
+    target_compile_options(flagser_coeff_pybind PUBLIC $<$<CONFIG:RELEASE>: /Wall /O2>)
+    target_compile_options(flagser_coeff_pybind PUBLIC $<$<CONFIG:DEBUG>:/O1 /DEBUG:FULL /Zi /Zo>)
+else()
+    target_compile_options(flagser_coeff_pybind PUBLIC $<$<CONFIG:RELEASE>: -Ofast>)
+    target_compile_options(flagser_coeff_pybind PUBLIC $<$<CONFIG:DEBUG>: -O2 -ggdb -D_GLIBCXX_DEBUG>)
+endif()
+
+
+# flagser-count
+pybind11_add_module(flagser_count_pybind "${BINDINGS_DIR}/flagser_count_bindings.cpp")
+
+target_include_directories(flagser_count_pybind PRIVATE .)
+
+if(MSVC)
+    target_compile_options(flagser_count_pybind PUBLIC $<$<CONFIG:RELEASE>: /Wall /O2>)
+    target_compile_options(flagser_count_pybind PUBLIC $<$<CONFIG:DEBUG>:/O1 /DEBUG:FULL /Zi /Zo>)
+else()
+    target_compile_options(flagser_count_pybind PUBLIC $<$<CONFIG:RELEASE>: -Ofast>)
+    target_compile_options(flagser_count_pybind PUBLIC $<$<CONFIG:DEBUG>: -O2 -ggdb -D_GLIBCXX_DEBUG>)
+endif()
diff --git a/flagser b/flagser
diff --git a/pyflagser/__init__.py b/pyflagser/__init__.py
@@ -3,8 +3,15 @@
 from .flagio import load_unweighted_flag, load_weighted_flag, \
     save_unweighted_flag, save_weighted_flag
 from .flagser import flagser_unweighted, flagser_weighted
+from .flagser_count import flagser_count_unweighted, \
+    flagser_count_weighted
 
-__all__ = ['load_unweighted_flag', 'load_weighted_flag',
-           'save_unweighted_flag', 'save_weighted_flag',
-           'flagser_unweighted', 'flagser_weighted',
+__all__ = ['load_unweighted_flag',
+           'load_weighted_flag',
+           'save_unweighted_flag',
+           'save_weighted_flag',
+           'flagser_unweighted',
+           'flagser_weighted',
+           'flagser_count_unweighted',
+           'flagser_count_weighted',
            '__version__']
diff --git a/pyflagser/flagser.py b/pyflagser/flagser.py
@@ -3,7 +3,9 @@
 import numpy as np
 
 from ._utils import _extract_unweighted_graph, _extract_weighted_graph
-from flagser_pybind import compute_homology, implemented_filtrations
+from .modules.flagser_pybind import compute_homology, AVAILABLE_FILTRATIONS
+from .modules.flagser_coeff_pybind import compute_homology as \
+    compute_homology_coeff
 
 
 def flagser_unweighted(adjacency_matrix, min_dimension=0, max_dimension=np.inf,
@@ -53,11 +55,11 @@ def flagser_unweighted(adjacency_matrix, min_dimension=0, max_dimension=np.inf,
         A dictionary with the following key-value pairs:
 
         - ``'betti'``: list of int
-          Betti number per dimension greater than or equal than
+          Betti numbers, per dimension greater than or equal than
           `min_dimension` and less than `max_dimension`.
         - ``'cell_count'``: list of int
-          Cell count (number of simplices) per dimension greater than or equal
-          `min_dimension` and less than `max_dimension`.
+          Cell counts (number of simplices), per dimension greater than or
+          equal to `min_dimension` and less than `max_dimension`.
         - ``'euler'``: int
           Euler characteristic.
 
@@ -91,10 +93,16 @@ def flagser_unweighted(adjacency_matrix, min_dimension=0, max_dimension=np.inf,
     # Extract vertices and edges
     vertices, edges = _extract_unweighted_graph(adjacency_matrix)
 
+    # Select the homology computer based on coeff
+    if coeff == 2:
+        _compute_homology = compute_homology
+    else:
+        _compute_homology = compute_homology_coeff
+
     # Call flagser binding
-    homology = compute_homology(vertices, edges, min_dimension, _max_dimension,
-                                directed, coeff, _approximation,
-                                _filtration)[0]
+    homology = _compute_homology(vertices, edges, min_dimension,
+                                 _max_dimension, directed, coeff,
+                                 _approximation, _filtration)[0]
 
     # Creating dictionary of return values
     out = {
@@ -143,13 +151,18 @@ def flagser_weighted(adjacency_matrix, max_edge_weight=None, min_dimension=0,
 
     directed : bool, optional, default: ``True``
         If ``True``, computes persistent homology for the directed filtered
-        flag complex determined by `adjacency_matrix`. If False, computes
+        flag complex determined by `adjacency_matrix`. If ``False``, computes
         persistent homology for the undirected filtered flag complex obtained
-        by considering all weighted edges as undirected, and it is therefore
-        sufficient (but not necessary) to pass an upper-triangular matrix. When
-        ``False``, if two directed edges corresponding to the same undirected
-        edge are assigned different weights, only the one on the upper
-        triangular part of the adjacency matrix is considered.
+        by considering all weighted edges as undirected, and if two directed
+        edges corresponding to the same undirected edge are explicitly assigned
+        different weights and neither exceeds `max_edge_weight`, only the one
+        in the upper triangular part of the adjacency matrix is considered.
+        Therefore:
+
+        - if `max_edge_weight` is ``numpy.inf``, it is sufficient to pass a
+          (dense or sparse) upper-triangular matrix;
+        - if `max_edge_weight` is finite, it is recommended to pass either a
+          symmetric dense matrix, or a sparse upper-triangular matrix.
 
     filtration : string, optional, default: ``'max'``
         Algorithm determining the filtration. Warning: if an edge filtration is
@@ -185,12 +198,12 @@ def flagser_weighted(adjacency_matrix, max_edge_weight=None, min_dimension=0,
           column representing the birth time and the second column
           representing the death time of each pair.
         - ``'betti'``: list of int
-          Betti number at filtration value `max_edge_weight` per dimension
-          greater than or equal than `min_dimension` and less than
+          Betti numbers at filtration value `max_edge_weight`, per dimension
+          greater than or equal to `min_dimension` and less than
           `max_dimension`.
         - ``'cell_count'``: list of int
-          Cell count (number of simplices) at filtration value
-          `max_edge_weight` per dimension greater than or equal than
+          Cell counts (number of simplices) at filtration value
+          `max_edge_weight`, per dimension greater than or equal to
           `min_dimension` and less than `max_dimension`.
         - ``'euler'``: int
           Euler characteristic at filtration value `max_edge_weight`.
@@ -219,21 +232,29 @@ def flagser_weighted(adjacency_matrix, max_edge_weight=None, min_dimension=0,
     else:
         _approximation = approximation
 
-    if filtration not in implemented_filtrations:
+    if filtration not in AVAILABLE_FILTRATIONS:
         raise ValueError("Filtration not recognized. Available filtrations "
-                         "are ", implemented_filtrations)
+                         "are ", AVAILABLE_FILTRATIONS)
 
     # Extract vertices and edges weights
     vertices, edges = _extract_weighted_graph(adjacency_matrix,
                                               max_edge_weight)
 
+    # Select the homology computer based on coeff
+    if coeff == 2:
+        _compute_homology = compute_homology
+    else:
+        _compute_homology = compute_homology_coeff
+
     # Call flagser binding
-    homology = compute_homology(vertices, edges, min_dimension, _max_dimension,
-                                directed, coeff, _approximation, filtration)[0]
+    homology = _compute_homology(vertices, edges, min_dimension,
+                                 _max_dimension, directed, coeff,
+                                 _approximation, filtration)[0]
 
     # Create dictionary of return values
     out = {
-        'dgms': [np.asarray(d) for d in homology.get_persistence_diagram()],
+        'dgms': [np.asarray(d).reshape((-1, 2))
+                 for d in homology.get_persistence_diagram()],
         'betti': homology.get_betti_numbers(),
         'cell_count': homology.get_cell_count(),
         'euler': homology.get_euler_characteristic()

diff --git a/pyflagser/flagser_count.py b/pyflagser/flagser_count.py
@@ -0,0 +1,132 @@
+"""Implementation of the python API for the cell count of the flagser C++
+library."""
+
+import numpy as np
+
+from ._utils import _extract_unweighted_graph, _extract_weighted_graph
+from .modules.flagser_count_pybind import compute_cell_count
+
+
+def flagser_count_unweighted(adjacency_matrix, min_dimension=0,
+                             max_dimension=np.inf, directed=True):
+    """Compute the cell count per dimension of a directed/undirected unweighted
+    flag complex.
+
+    From an adjacency matrix construct all cells forming its associated flag
+    complex and compute their number per dimension.
+
+    Parameters
+    ----------
+    adjacency_matrix : 2d ndarray or scipy.sparse matrix of shape \
+        (n_vertices, n_vertices), required
+        Adjacency matrix of a directed/undirected unweighted graph. It is
+        understood as a boolean matrix. Off-diagonal, ``0`` or ``False`` values
+        denote abstent edges while non-``0`` or ``True`` values denote edges
+        which are present. Diagonal values are ignored.
+
+    directed : bool, optional, default: ``True``
+        If ``True``, computes homology for the directed flad complex determined
+        by `adjacency_matrix`. If ``False``, computes homology for the
+        undirected flag complex obtained by considering all edges as
+        undirected, and it is therefore sufficient (but not necessary)
+        to pass an upper-triangular matrix.
+
+    Returns
+    -------
+    out : list of int
+        Cell counts (number of simplices), per dimension greater than or equal
+        to `min_dimension` and less than `max_dimension`.
+
+    Notes
+    -----
+    The input graphs cannot contain self-loops, i.e. edges that start and end
+    in the same vertex, therefore diagonal elements of the input adjacency
+    matrix will be ignored.
+
+    References
+    ----------
+    .. [1] D. Luetgehetmann, "Documentation of the C++ flagser library";
+           `GitHub: luetge/flagser <https://github.com/luetge/flagser/blob/\
+           master/docs/documentation_flagser.pdf>`_.
+
+    """
+    # Extract vertices and edges
+    vertices, edges = _extract_unweighted_graph(adjacency_matrix)
+
+    # Call flagser_count binding
+    cell_count = compute_cell_count(vertices, edges, directed)
+
+    return cell_count
+
+
+def flagser_count_weighted(adjacency_matrix, max_edge_weight=None,
+                           directed=True):
+    """Compute the cell count per dimension of a directed/undirected
+    filtered flag complex.
+
+    From an adjacency matrix construct a filtered flag complex as a sequence of
+    its cells associated to their filtration values and compute the number of
+    cells per dimension at the end of the filtration.
+
+    Parameters
+    ----------
+    adjacency_matrix : 2d ndarray or scipy.sparse matrix of shape \
+        (n_vertices, n_vertices), required
+        Matrix representation of a directed/undirected weighted graph. Diagonal
+        elements are vertex weights. The way zero values are handled depends on
+        the format of the matrix. If the matrix is a dense ``numpy.ndarray``,
+        zero values denote zero-weighted edges. If the matrix is a sparse
+        ``scipy.sparse`` matrix, explicitly stored off-diagonal zeros and all
+        diagonal zeros denote zero-weighted edges. Off-diagonal values that
+        have not been explicitely stored are treated by ``scipy.sparse`` as
+        zeros but will be understood as infinitely-valued edges, i.e., edges
+        absent from the filtration.
+
+    max_edge_weight : int or float or ``None``, optional, default: ``None``
+        Maximum edge weight to be considered in the filtration. All edge
+        weights greater than that value will be considered as
+        infinitely-valued, i.e., absent from the filtration. If ``None``, all
+        finite edge weights are considered.
+
+    directed : bool, optional, default: ``True``
+        If ``True``, computes persistent homology for the directed filtered
+        flag complex determined by `adjacency_matrix`. If ``False``, computes
+        persistent homology for the undirected filtered flag complex obtained
+        by considering all weighted edges as undirected, and if two directed
+        edges corresponding to the same undirected edge are explicitly assigned
+        different weights and neither exceeds `max_edge_weight`, only the one
+        in the upper triangular part of the adjacency matrix is considered.
+        Therefore:
+
+        - if `max_edge_weight` is ``numpy.inf``, it is sufficient to pass a
+          (dense or sparse) upper-triangular matrix;
+        - if `max_edge_weight` is finite, it is recommended to pass either a
+          symmetric dense matrix, or a sparse upper-triangular matrix.
+
+    Returns
+    -------
+    out : list of int
+        Cell counts (number of simplices) at filtration value
+        `max_edge_weight`, per dimension.
+
+    Notes
+    -----
+    The input graphs cannot contain self-loops, i.e. edges that start and end
+    in the same vertex, therefore diagonal elements of the input adjacency
+    matrix store vertex weights.
+
+    References
+    ----------
+    .. [1] D. Luetgehetmann, "Documentation of the C++ flagser library";
+           `GitHub: luetge/flagser <https://github.com/luetge/flagser/blob/\
+           master/docs/documentation_flagser.pdf>`_.
+
+    """
+    # Extract vertices and edges weights
+    vertices, edges = _extract_weighted_graph(adjacency_matrix,
+                                              max_edge_weight)
+
+    # Call flagser_count binding
+    cell_count = compute_cell_count(vertices, edges, directed)
+
+    return cell_count
diff --git a/pyflagser/tests/conftest.py b/pyflagser/tests/conftest.py
@@ -2,7 +2,7 @@
 from tempfile import mkdtemp
 from urllib.request import urlopen, urlretrieve
 
-from flagser_pybind import implemented_filtrations
+from ..modules.flagser_pybind import AVAILABLE_FILTRATIONS
 
 files_with_filtration_results = ["d5.flag"]
 
@@ -54,7 +54,7 @@ def pytest_generate_tests(metafunc):
         webdl = metafunc.config.option.webdl
         FlagFiles.paths = fetch_flag_files(webdl)
     if "filtration" in metafunc.fixturenames:
-        metafunc.parametrize("filtration", implemented_filtrations)
+        metafunc.parametrize("filtration", AVAILABLE_FILTRATIONS)
         paths = [
             path for path in FlagFiles.paths
             if os.path.split(path)[1] in files_with_filtration_results

diff --git a/pyflagser/tests/test_flagser_count.py b/pyflagser/tests/test_flagser_count.py
@@ -0,0 +1,44 @@
+"""Testing for the python bindings of the C++ flagser library."""
+
+import os
+
+from numpy.testing import assert_almost_equal
+
+from pyflagser import load_unweighted_flag, load_weighted_flag, \
+    flagser_count_unweighted, flagser_count_weighted
+
+
+cell_count = {
+    'a.flag': [5, 7, 1],
+    'b.flag': [4, 6, 3],
+    'c.flag': [8, 12],
+    'd.flag': [5, 9, 6],
+    'e.flag': [5, 9, 7, 2],
+    'f.flag': [3, 5, 3],
+    'd2.flag': [2, 2],
+    'd3.flag': [3, 6, 6],
+    'd3-allzero.flag': [3, 6, 6],
+    'double-d3.flag': [4, 10, 12],
+    'double-d3-allzero.flag': [4, 10, 12],
+    'd4.flag': [4, 12, 24, 24],
+    'd4-allzero.flag': [4, 12, 24, 24],
+    'd5.flag': [5, 20, 60, 120, 120],
+    'd7.flag': [7, 42, 210, 840, 2520, 5040, 5040],
+    'medium-test-data.flag': [31346, 68652, 12694, 250],
+    'd10.flag': [10, 90, 720, 5040, 30240, 151200, 604800,
+                 1814400, 3628800, 3628800],
+}
+
+
+def test_unweighted(flag_file):
+    adjacency_matrix = load_unweighted_flag(flag_file, fmt='dense')
+    cell_count_exp = cell_count[os.path.split(flag_file)[1]]
+    cell_count_res = flagser_count_unweighted(adjacency_matrix)
+    assert_almost_equal(cell_count_res, cell_count_exp)
+
+
+def test_weighted(flag_file):
+    adjacency_matrix = load_weighted_flag(flag_file, fmt='coo')
+    cell_count_exp = cell_count[os.path.split(flag_file)[1]]
+    cell_count_res = flagser_count_weighted(adjacency_matrix)
+    assert_almost_equal(cell_count_res, cell_count_exp)
diff --git a/setup.py b/setup.py
@@ -102,8 +102,8 @@ def install_dependencies(self):
                                '--init', '--recursive'])
 
     def build_extension(self, ext):
-        extdir = os.path.abspath(os.path.dirname(
-            self.get_ext_fullpath(ext.name)))
+        extdir = os.path.abspath(os.path.join(os.path.dirname(
+            self.get_ext_fullpath(ext.name)), 'pyflagser', 'modules'))
         cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
                       '-DPYTHON_EXECUTABLE=' + sys.executable]
+48 −0		.github/workflows/build.yml
+0 −23		.travis.yml
+31 −8		CMakeLists.txt
+1 −1		include/complex/directed_flag_complex_computer.h
+1 −1		include/complex/directed_flag_complex_in_memory_computer.h
+1 −0		include/persistence.h
+6 −4		src/flagser-count.cpp
+7 −1		test/base.h
+0 −4		test/test_flagser.cpp