From 3fdb7ce0584156677a02e1c3ddfc688c92a04e4e Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 13:30:35 +0200 Subject: [PATCH 01/21] Add support to compute and retrieve cocycles * Enable feature in C++ backend * Cannot enable the computation in Python right now * A lot of changes happened in the bindings, but in reality is not much, I forgot to run previously clang-format Signed-off-by: julian --- gph/bindings/ripser_bindings.cpp | 29 +++++----- gph/python/ripser_interface.py | 24 ++++---- gph/src/ripser.h | 94 ++++++++++++++++++++++++++++++-- 3 files changed, 117 insertions(+), 30 deletions(-) diff --git a/gph/bindings/ripser_bindings.cpp b/gph/bindings/ripser_bindings.cpp index 1ae6a736..09463e34 100644 --- a/gph/bindings/ripser_bindings.cpp +++ b/gph/bindings/ripser_bindings.cpp @@ -39,15 +39,13 @@ to_numpy_barcodes(std::vector barcodes) } #if defined USE_COEFFICIENTS -PYBIND11_MODULE(gph_ripser_coeff, m) -{ +PYBIND11_MODULE(gph_ripser_coeff, m) { #else -PYBIND11_MODULE(gph_ripser, m) -{ +PYBIND11_MODULE(gph_ripser, m) { #endif - using namespace pybind11::literals; - m.doc() = "Ripser python interface"; + using namespace pybind11::literals; + m.doc() = "Ripser python interface"; py::class_(m, "flagPersGen", py::module_local()) .def_readwrite("finite_0", &flagPersGen::finite_0) @@ -63,6 +61,7 @@ PYBIND11_MODULE(gph_ripser, m) [&](ripserResults& res) { return to_numpy_barcodes(res.births_and_deaths_by_dim); }) + .def_readwrite("cocycles_by_dim", &ripserResults::cocycles_by_dim) .def_readwrite("flag_persistence_generators_by_dim", &ripserResults::flag_persistence_generators); @@ -70,7 +69,7 @@ PYBIND11_MODULE(gph_ripser, m) "rips_dm", [](py::array_t& D, py::array_t& diag, int modulus, int dim_max, float threshold, int num_threads, - bool return_generators) { + bool return_generators, const bool do_cocycles) { // Setup distance matrix and figure out threshold auto D_ = static_cast(D.request().ptr); std::vector distances(D_, D_ + D.size()); @@ -84,20 +83,21 @@ PYBIND11_MODULE(gph_ripser, m) ripserResults res; ripser r( - std::move(dist), dim_max, threshold, modulus, num_threads, - return_generators); + std::move(dist), dim_max, threshold, modulus, + num_threads, return_generators, do_cocycles); r.compute_barcodes(); r.copy_results(res); return res; }, - "D"_a, "diag"_a, "modulus"_a, "dim_max"_a, "threshold"_a, - "num_threads"_a, "return_generators"_a, "ripser distance matrix"); + "D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "num_threads"_a, + "return_generators"_a, "do_cocycles"_a, "ripser distance matrix"); m.def( "rips_dm_sparse", [](py::array_t& I, py::array_t& J, py::array_t& V, int NEdges, int N, int modulus, int dim_max, - float threshold, int num_threads, bool return_generators) { + float threshold, int num_threads, bool return_generators, + const bool do_cocycles) { auto I_ = static_cast(I.request().ptr); auto J_ = static_cast(J.request().ptr); auto V_ = static_cast(V.request().ptr); @@ -105,7 +105,8 @@ PYBIND11_MODULE(gph_ripser, m) // Setup distance matrix and figure out threshold ripser r( sparse_distance_matrix(I_, J_, V_, NEdges, N, threshold), - dim_max, threshold, modulus, num_threads, return_generators); + dim_max, threshold, modulus, num_threads, + return_generators, do_cocycles); r.compute_barcodes(); ripserResults res; @@ -113,7 +114,7 @@ PYBIND11_MODULE(gph_ripser, m) return res; }, "I"_a, "J"_a, "V"_a, "NEdges"_a, "N"_a, "modulus"_a, "dim_max"_a, - "threshold"_a, "num_threads"_a, "return_generators"_a, + "threshold"_a, "num_threads"_a, "return_generators"_a, "do_cocycles"_a, "ripser sparse distance matrix"); m.def("get_max_coefficient_field_supported", diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 87aefeb3..017d39b1 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -34,27 +34,31 @@ MAX_COEFF_SUPPORTED = gph_ripser.get_max_coefficient_field_supported() -def _compute_ph_vr_dense(DParam, diagonal, maxHomDim, thresh=-1, coeff=2, - n_threads=1, return_generators=False): +def _compute_ph_vr_dense(DParam, maxHomDim, thresh=-1, coeff=2, n_threads=1, + return_generators=False, return_cocycles=False): if coeff == 2: - ret = gph_ripser.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh, - n_threads, return_generators) + ret = gph_ripser.rips_dm(DParam, DParam.shape[0], coeff, + maxHomDim, thresh, n_threads, + return_generators, return_cocycles) else: - ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, maxHomDim, - thresh, n_threads, return_generators) + ret = gph_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff, + maxHomDim, thresh, n_threads, + return_generators, return_cocycles) return ret def _compute_ph_vr_sparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, - n_threads=1, return_generators=False): + n_threads=1, return_generators=False, + return_cocycles=False): if coeff == 2: ret = gph_ripser.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, thresh, n_threads, - return_generators) + return_generators, return_cocycles) else: ret = gph_ripser_coeff.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, thresh, n_threads, - return_generators) + return_generators, + return_cocycles) return ret @@ -498,7 +502,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", Python", *Journal of Open Source Software*, **3**(29), 2021; `DOI: 10.21105/joss.00925 `_. - + .. [2] U. Bauer, "Ripser: efficient computation of Vietoris–Rips persistence barcodes", *J Appl. and Comput. Topology*, **5**, pp. 391–423, 2021; `DOI: 10.1007/s41468-021-00071-5 diff --git a/gph/src/ripser.h b/gph/src/ripser.h index ee7b076c..d1ccc9c6 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -91,11 +91,14 @@ typedef float value_t; typedef int64_t index_t; typedef uint16_t coefficient_t; -using barcodes_t = std::vector; - const size_t num_coefficient_bits = 8; constexpr value_t inf_value = std::numeric_limits::infinity(); +using barcodes_t = std::vector; +using cocycle_t = std::vector; +using cocycles_t = std::vector; + + // 1L on windows is ALWAYS 32 bits, when on unix systems is pointer size static const index_t max_simplex_index = (uintptr_t(1) << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1; @@ -145,6 +148,11 @@ class binomial_coeff_table } }; +coefficient_t normalize(const coefficient_t n, const coefficient_t modulus) +{ + return n > modulus / 2 ? n - modulus : n; +} + std::vector multiplicative_inverse_vector(const coefficient_t m) { std::vector inverse(m); @@ -615,6 +623,21 @@ typedef struct { /* The second variable stores the flag persistence generators */ flagPersGen flag_persistence_generators; + /* + The third variable is a vector of representative cocycles for each + dimension. For now, only cocycles above dimension 0 are added, so + dimension 0 is an empty list For the others, cocycles_by_dim[d] holds an + array of representative cocycles for dimension d which is parallel with + the array of births/deaths for dimension d. Each element of the array is + itself an array of unrolled information about the cocycle For dimension 1, + for example, the zeroeth element of the array contains [ccl0_simplex0_idx0 + ccl0_simplex0_idx1 ccl0_simplex0_val, ccl0_simplex1_idx0 + ccl0_simplex1_idx1 ccl0_simplex1_val, ... ccl0_simplexk_idx0 + ccl0_simplexk_idx1 ccl0_simplexk_val] for a cocycle representing the first + persistence point, which has k simplices with nonzero values in the + representative cocycle + */ + std::vector cocycles_by_dim; } ripserResults; template @@ -631,6 +654,9 @@ class ripser // If this flag is off, don't extract the flag persistence // pairings and essential simplices const bool return_flag_persistence_generators; + // If this flag is off, don't extract the representative cocycles to save + // time + const bool return_cocycles; struct entry_hash { std::size_t operator()(const entry_t& e) const @@ -665,16 +691,19 @@ class ripser public: mutable std::vector births_and_deaths_by_dim; mutable flagPersGen flag_persistence_generators; + mutable std::vector cocycles_by_dim; ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, coefficient_t _modulus, int _num_threads, - bool return_flag_persistence_generators_) + bool return_flag_persistence_generators_, const bool return_cocycles_ + ) : dist(std::move(_dist)), n(dist.size()), dim_max(_dim_max), threshold(_threshold), modulus(_modulus), num_threads(_num_threads), binomial_coeff(n, dim_max + 2), multiplicative_inverse(multiplicative_inverse_vector(_modulus)), return_flag_persistence_generators( - return_flag_persistence_generators_) + return_flag_persistence_generators_), + return_cocycles(return_cocycles_) { /* Uses all concurrent threads supported */ if (num_threads == -1) @@ -696,6 +725,7 @@ class ripser { res.births_and_deaths_by_dim = births_and_deaths_by_dim; res.flag_persistence_generators = flag_persistence_generators; + res.cocycles_by_dim = cocycles_by_dim; } index_t get_max_vertex(const index_t idx, const index_t k, @@ -1257,7 +1287,7 @@ class ripser #endif void retire_column(const index_t& idx_col, - WorkingColumn& working_reduction_column, Matrix& mat, + WorkingColumn working_reduction_column, Matrix& mat, const std::vector& columns_to_reduce, const index_t dim, const index_t& pivot_index, mrzv::MemoryManager& mem_manager) @@ -1317,6 +1347,28 @@ class ripser return edge; } + inline void compute_cocycles(WorkingColumn cocycle, index_t dim, + cocycle_t& cocycles) + { + thread_local diameter_entry_t cocycle_e; + thread_local std::vector cocycle_simplex; + thread_local cocycle_t thiscocycle; + + thiscocycle.clear(); + while (get_index(cocycle_e = get_pivot(cocycle)) != -1) { + cocycle_simplex.clear(); + get_simplex_vertices(get_index(cocycle_e), dim, n, + std::back_inserter(cocycle_simplex)); + for (size_t k = 0; k < cocycle_simplex.size(); k++) { + thiscocycle.push_back((int) cocycle_simplex[k]); + } + thiscocycle.push_back( + normalize(get_coefficient(cocycle_e), modulus)); + cocycle.pop(); + } + cocycles = thiscocycle; + } + void compute_pairs(const std::vector& columns_to_reduce, entry_hash_map& pivot_column_index, const index_t dim) { @@ -1324,7 +1376,8 @@ class ripser // extra vector is a work-around inability to store floats in the // hash_map - std::atomic idx_finite_bar{0}, idx_essential{0}; + std::atomic idx_finite_bar{0}, idx_essential{0}, + idx_cocycles{0}; entry_hash_map pivot_to_death_idx(columns_to_reduce.size()); pivot_to_death_idx.reserve(columns_to_reduce.size()); @@ -1334,6 +1387,7 @@ class ripser flagPersGen::essential_higher_t essential_generator( columns_to_reduce.size()); flagPersGen::finite_higher_t finite_generator(columns_to_reduce.size()); + cocycles_t cocycles(columns_to_reduce.size()); foreach (columns_to_reduce, [&](index_t index_column_to_reduce, bool first, @@ -1502,6 +1556,14 @@ class ripser } } + if (return_cocycles) { + // Representative cocycle + size_t idx_ = idx_cocycles++; + working_reduction_column.push(column_to_reduce); + compute_cocycles(working_reduction_column, dim, + cocycles[idx_]); + } + break; } } else { @@ -1537,6 +1599,14 @@ class ripser essential_generator[idx_] = {birth_edge.first, birth_edge.second}; } + + if (return_cocycles) { + // Representative cocycle + size_t idx_ = idx_cocycles++; + working_reduction_column.push(column_to_reduce); + compute_cocycles(working_reduction_column, dim, + cocycles[idx_]); + } } } @@ -1629,6 +1699,17 @@ class ripser essential_generator[i]); } } + + if (return_cocycles) { +#if defined(SORT_BARCODES) + for (const auto ordered_idx : indexes) + cocycles_by_dim[dim].push_back( + cocycles[ordered_location[ordered_idx]]); +#else + for (const auto ordered_idx : ordered_location) + cocycles_by_dim[dim].push_back(cocycles[ordered_idx]); +#endif + } } std::vector get_edges(); @@ -1641,6 +1722,7 @@ class ripser births_and_deaths_by_dim.resize(dim_max + 1); flag_persistence_generators.finite_higher.resize(dim_max); flag_persistence_generators.essential_higher.resize(dim_max); + cocycles_by_dim.resize(dim_max + 1); compute_dim_0_pairs(simplices, columns_to_reduce); /* pre allocate Container for each dimension */ From ed4f3d6538c2436fd55a3eda2097ac38e60d9855 Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 13:33:49 +0200 Subject: [PATCH 02/21] Update Python interface to support cocycles Signed-off-by: julian --- gph/python/ripser_interface.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 017d39b1..d09d0cd5 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -314,7 +314,8 @@ def _is_prime_and_larger_than_2(x, N): def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", metric_params={}, nearest_neighbors_params={}, weights=None, weight_params=None, collapse_edges=False, - n_threads=1, return_generators=False): + n_threads=1, return_generators=False, + return_cocycles=False): """Compute persistence diagrams from an input dense array or sparse matrix. If `X` represents a point cloud, a distance matrix will be internally @@ -445,6 +446,10 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", the return dictionary under the key `gens`. Cannot be ``True`` if `collapse_edges` is also ``True``. + do_cocycles: bool, optional, default False + Computed cocycles will be available in the `cocycles` value + of the return dictionary. + Returns ------- A dictionary holding the results of the computation. Keys and values are as @@ -613,14 +618,16 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", thresh, coeff, n_threads, - return_generators + return_generators, + return_cocycles ) else: # Only consider upper diagonal diagonal = np.diagonal(dm).astype(np.float32) DParam = squareform(dm, checks=False).astype(np.float32) - res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh, - coeff, n_threads, return_generators) + res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh, coeff, + n_threads, return_generators, + return_cocycles) # Unwrap persistence diagrams # Barcodes must match the inner type of C++ core filtration value. @@ -649,4 +656,18 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", ] ret['gens'] = (finite_0, finite_higher, essential_0, essential_higher) + # Unwrap cocycles + if return_cocycles: + cocycles = [] + for dim in range(len(res.cocycles_by_dim)): + cocycles.append([]) + for j in range(len(res.cocycles_by_dim[dim])): + ccl = res.cocycles_by_dim[dim][j] + n = int(len(ccl) / (dim + 2)) + ccl = np.reshape(np.array(ccl, dtype=np.int64), [n, dim + 2]) + ccl[:, -1] = np.mod(ccl[:, -1], coeff) + cocycles[dim].append(ccl) + + ret["cocycles"] = cocycles + return ret From 457ae1fae2693464b691d6d3ab97ff43fbf03b4f Mon Sep 17 00:00:00 2001 From: MonkeyBreaker Date: Mon, 16 Aug 2021 14:10:59 +0200 Subject: [PATCH 03/21] Update gph/python/ripser_interface.py Co-authored-by: Umberto Lupo <46537483+ulupo@users.noreply.github.com> --- gph/python/ripser_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index d09d0cd5..c9c0e78c 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -446,7 +446,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", the return dictionary under the key `gens`. Cannot be ``True`` if `collapse_edges` is also ``True``. - do_cocycles: bool, optional, default False + do_cocycles: bool, optional, default: ``False`` Computed cocycles will be available in the `cocycles` value of the return dictionary. From a69e53210ce9436f0d4c303242ac04c8de29692b Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 15:31:02 +0200 Subject: [PATCH 04/21] [PY] Rename do_cocycles into return_cocycles Signed-off-by: julian --- gph/python/ripser_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index c9c0e78c..bcdf31d6 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -446,7 +446,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", the return dictionary under the key `gens`. Cannot be ``True`` if `collapse_edges` is also ``True``. - do_cocycles: bool, optional, default: ``False`` + return_cocycles: bool, optional, default: ``False`` Computed cocycles will be available in the `cocycles` value of the return dictionary. From c62a1f44b7a3ce216fad222a81d6bab42bfd81f5 Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 15:31:26 +0200 Subject: [PATCH 05/21] [PY] Add documentation to the cocycles output Signed-off-by: julian --- gph/python/ripser_interface.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index bcdf31d6..2300efdf 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -481,6 +481,15 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", Essential simplices corresponding to infinite bars in dimensions 1 to maxdim, with 2 vertices (edge) for each birth. + 'cocycles': list (size maxdim) of list of ndarray, optional + For each dimension less than 'maxdim' a list of representative + cocycles. Each representative cocycle in dimension 'd' is + represented as a ndarray of '(k,d+1)' elements. Each non zero value + of the cocycle is laid out in a row, first the 'd' indices of the + simplex and then the value of the cocycle on the simplex. The + indices of the simplex reference the original point cloud. + 'cocycles' are only available if 'return_cocycles' is True. + Notes ----- The C++ backend and Python API for the computation of Vietoris–Rips From e98c0dbe62ff75b8a79ad1bd2f4fd00e264f9154 Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 15:59:18 +0200 Subject: [PATCH 06/21] [PY] Add test for cocycles when persistance pair Signed-off-by: julian --- gph/python/test/test_ripser.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py index 03397a07..db62ded3 100644 --- a/gph/python/test/test_ripser.py +++ b/gph/python/test/test_ripser.py @@ -476,3 +476,35 @@ def test_infinite_deaths_always_essential(): # With this example no finite generators in dimension 1 shall be found assert len(gens_fin_dim1) == 0 + + +def test_cocycles_in_persistance_pair(): + """Test for a simple cocycle in a persistence pair + The data and the results are taken from cocycles notebook of + ripser.py package""" + X = np.array([[0.05546534, -1.03551662], + [-0.97905569, 0.02129887], + [-0.66797382, -0.99523678], + [-0.95970867, -0.0622854], + [-0.61541581, -1.02874641], + [0.32049913, 0.96618272], + [0.49975854, 1.02644011], + [1.10357502, 0.36892592], + [0.79793975, -0.50982899], + [-0.15508265, 1.0381044], + [0.96130068, -0.04503272], + [1.11553304, 0.36782575]]) + expected = np.array([[], [np.array([[9, 1, 1], + [9, 3, 1], + [5, 1, 1], + [5, 3, 1], + [6, 1, 1], + [8, 3, 1]]) + ]], dtype=object + ) + cocycles = ripser(X, coeff=17, return_cocycles=True)['cocycles'] + + assert np.array_equal(expected.shape, cocycles.shape) + + for dim in range(2): + assert np.array_equal(expected[dim], cocycles[dim]) From 60626d7273cdb9615bef5f2d3fa9825d030c18bf Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 16 Aug 2021 15:59:43 +0200 Subject: [PATCH 07/21] [PY] Update cocycle to return a numpy array Signed-off-by: julian --- gph/python/ripser_interface.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 2300efdf..ee4afe64 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -667,8 +667,9 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", # Unwrap cocycles if return_cocycles: + nb_dim = len(res.cocycles_by_dim) cocycles = [] - for dim in range(len(res.cocycles_by_dim)): + for dim in range(nb_dim): cocycles.append([]) for j in range(len(res.cocycles_by_dim[dim])): ccl = res.cocycles_by_dim[dim][j] @@ -676,6 +677,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", ccl = np.reshape(np.array(ccl, dtype=np.int64), [n, dim + 2]) ccl[:, -1] = np.mod(ccl[:, -1], coeff) cocycles[dim].append(ccl) + cocycles = np.asarray(cocycles) ret["cocycles"] = cocycles From e969b564aed55c8097593bf9e4069b1932af2c4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juli=C3=A1n?= Date: Tue, 26 Oct 2021 16:31:10 +0200 Subject: [PATCH 08/21] Update on @ulupo feedback Co-authored-by: Umberto Lupo <46537483+ulupo@users.noreply.github.com> --- gph/python/test/test_ripser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py index db62ded3..d7b095de 100644 --- a/gph/python/test/test_ripser.py +++ b/gph/python/test/test_ripser.py @@ -478,7 +478,7 @@ def test_infinite_deaths_always_essential(): assert len(gens_fin_dim1) == 0 -def test_cocycles_in_persistance_pair(): +def test_cocycles_in_persistence_pair(): """Test for a simple cocycle in a persistence pair The data and the results are taken from cocycles notebook of ripser.py package""" From b260833410e7218a8e961e8f167648e19337676a Mon Sep 17 00:00:00 2001 From: julian Date: Wed, 5 Jan 2022 17:49:22 +0100 Subject: [PATCH 09/21] [TEST] Add explicit xfail for cocycle test Signed-off-by: julian --- gph/python/test/test_ripser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py index d7b095de..ab797290 100644 --- a/gph/python/test/test_ripser.py +++ b/gph/python/test/test_ripser.py @@ -436,7 +436,6 @@ def test_unsupported_coefficient(): coeff=gph_ripser.get_max_coefficient_field_supported()+1) -@settings(deadline=500) @given(dm_dense=get_dense_distance_matrices()) def test_non_0_diagonal_internal_representation(dm_dense): """Checks that, when passing a full distance matrix with non-zero values in @@ -478,6 +477,7 @@ def test_infinite_deaths_always_essential(): assert len(gens_fin_dim1) == 0 +@pytest.mark.xfail def test_cocycles_in_persistence_pair(): """Test for a simple cocycle in a persistence pair The data and the results are taken from cocycles notebook of From c2ad0d578b1eea6e6a8cd4ec1ab33a8ce08534f6 Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 7 Jan 2022 12:02:02 +0100 Subject: [PATCH 10/21] [CPP] Fix no cocycles output Signed-off-by: julian --- gph/src/ripser.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index d1ccc9c6..d04b4bad 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -1650,7 +1650,8 @@ class ripser * sorting barcodes. See https://stackoverflow.com/a/17554343 */ if (persistence_pair.size()) { - if (return_flag_persistence_generators) { + if (return_flag_persistence_generators || + return_cocycles) { indexes.resize(ordered_location.size()); std::iota(indexes.begin(), indexes.end(), 0); std::sort(indexes.begin(), indexes.end(), From a72e2a788a4b2b875abb95a57c26a7773581ab15 Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 7 Jan 2022 12:18:04 +0100 Subject: [PATCH 11/21] [CPP] Split cocycles for finite and essential This allows that finite cocycles are sorted with the corresponding finite barcode Signed-off-by: julian --- gph/src/ripser.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index d04b4bad..9c65bea7 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -1387,7 +1387,8 @@ class ripser flagPersGen::essential_higher_t essential_generator( columns_to_reduce.size()); flagPersGen::finite_higher_t finite_generator(columns_to_reduce.size()); - cocycles_t cocycles(columns_to_reduce.size()); + cocycles_t cocycles_finite(columns_to_reduce.size()); + cocycles_t cocycles_essential(columns_to_reduce.size()); foreach (columns_to_reduce, [&](index_t index_column_to_reduce, bool first, @@ -1558,10 +1559,9 @@ class ripser if (return_cocycles) { // Representative cocycle - size_t idx_ = idx_cocycles++; working_reduction_column.push(column_to_reduce); compute_cocycles(working_reduction_column, dim, - cocycles[idx_]); + cocycles_finite[new_idx_finite_bar]); } break; @@ -1602,10 +1602,9 @@ class ripser if (return_cocycles) { // Representative cocycle - size_t idx_ = idx_cocycles++; working_reduction_column.push(column_to_reduce); compute_cocycles(working_reduction_column, dim, - cocycles[idx_]); + cocycles_essential[idx_]); } } } @@ -1704,12 +1703,17 @@ class ripser if (return_cocycles) { #if defined(SORT_BARCODES) for (const auto ordered_idx : indexes) - cocycles_by_dim[dim].push_back( - cocycles[ordered_location[ordered_idx]]); + cocycles_by_dim[dim].push_back(std::move( + cocycles_finite[ordered_location[ordered_idx]])); #else for (const auto ordered_idx : ordered_location) - cocycles_by_dim[dim].push_back(cocycles[ordered_idx]); + cocycles_by_dim[dim].push_back(std::move( + cocycles_finite[ordered_idx])); #endif + for (size_t i = 0; i < idx_essential; ++i) { + cocycles_by_dim[dim].push_back( + std::move(cocycles_essential[i])); + } } } From dcd8f94f184baeea942539ed4ec4ad0017459786 Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 7 Jan 2022 14:46:43 +0100 Subject: [PATCH 12/21] [CPP] Refactor comput_cocycles to return the computed value Signed-off-by: julian --- gph/src/ripser.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index 9c65bea7..4dc63771 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -1347,8 +1347,7 @@ class ripser return edge; } - inline void compute_cocycles(WorkingColumn cocycle, index_t dim, - cocycle_t& cocycles) + inline cocycle_t compute_cocycles(WorkingColumn cocycle, index_t dim) { thread_local diameter_entry_t cocycle_e; thread_local std::vector cocycle_simplex; @@ -1366,7 +1365,7 @@ class ripser normalize(get_coefficient(cocycle_e), modulus)); cocycle.pop(); } - cocycles = thiscocycle; + return thiscocycle; } void compute_pairs(const std::vector& columns_to_reduce, @@ -1560,8 +1559,8 @@ class ripser if (return_cocycles) { // Representative cocycle working_reduction_column.push(column_to_reduce); - compute_cocycles(working_reduction_column, dim, - cocycles_finite[new_idx_finite_bar]); + cocycles_finite[new_idx_finite_bar] = + compute_cocycles(working_reduction_column, dim); } break; @@ -1603,8 +1602,8 @@ class ripser if (return_cocycles) { // Representative cocycle working_reduction_column.push(column_to_reduce); - compute_cocycles(working_reduction_column, dim, - cocycles_essential[idx_]); + cocycles_essential[idx_] = + compute_cocycles(working_reduction_column, dim); } } } From 3132077e6fef922ffbc77e34325468dbeee8704b Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 7 Jan 2022 15:44:22 +0100 Subject: [PATCH 13/21] [CPP] Change cocycle type from int to index_t Signed-off-by: julian --- gph/src/ripser.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index 4dc63771..802c2736 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -95,7 +95,7 @@ const size_t num_coefficient_bits = 8; constexpr value_t inf_value = std::numeric_limits::infinity(); using barcodes_t = std::vector; -using cocycle_t = std::vector; +using cocycle_t = std::vector; using cocycles_t = std::vector; @@ -1358,8 +1358,8 @@ class ripser cocycle_simplex.clear(); get_simplex_vertices(get_index(cocycle_e), dim, n, std::back_inserter(cocycle_simplex)); - for (size_t k = 0; k < cocycle_simplex.size(); k++) { - thiscocycle.push_back((int) cocycle_simplex[k]); + for (auto& cocycle : cocycle_simplex) { + thiscocycle.push_back(cocycle); } thiscocycle.push_back( normalize(get_coefficient(cocycle_e), modulus)); From 24be18d8ced2978b6ef3c9a6b1e7fadaa1f4b6db Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 7 Jan 2022 15:48:00 +0100 Subject: [PATCH 14/21] [CPP] use variable when getting pivot index Remove check for first insertion for pivot_to_death_idx hash table Signed-off-by: julian --- gph/src/ripser.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index 802c2736..6ef932b3 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -1419,10 +1419,11 @@ class ripser diameter_entry_t e; bool is_essential = false; + index_t pivot_idx; while (true) { - if (get_index(pivot) != -1) { - auto pair = - pivot_column_index.find(get_index(get_entry(pivot))); + pivot_idx = get_index(pivot); + if (pivot_idx != -1) { + auto pair = pivot_column_index.find(pivot_idx); if (pair != pivot_column_index.end()) { entry_t old_entry_column_to_add; index_t index_column_to_add; @@ -1472,7 +1473,7 @@ class ripser retire_column(index_column_to_reduce, working_reduction_column, reduction_matrix, columns_to_reduce, - dim, get_index(pivot), + dim, pivot_idx, memory_manager); if (pivot_column_index.update( @@ -1496,12 +1497,11 @@ class ripser retire_column(index_column_to_reduce, working_reduction_column, reduction_matrix, columns_to_reduce, dim, - get_index(pivot), memory_manager); + pivot_idx, memory_manager); // equivalent to CAS in the original algorithm auto insertion_result = pivot_column_index.insert( - {get_index(get_entry(pivot)), - make_entry(index_column_to_reduce, + {pivot_idx, make_entry(index_column_to_reduce, get_coefficient(pivot))}); if (!insertion_result .second) // failed to insert, somebody @@ -1632,7 +1632,7 @@ class ripser /* We push the order of the generator simplices by when * they are inserted as bars. This can be done because * get_index(it->second) is equivalent to - * `new_idx_finite_bar` in the core algorithm + * `new_idx_fin_bar` in the core algorithm */ ordered_location.push_back(get_index(it->second)); #if defined(SORT_BARCODES) From 38e9246dae0a12fa7a1a11b628523dae4bf634f4 Mon Sep 17 00:00:00 2001 From: julian Date: Wed, 17 Aug 2022 21:57:21 +0200 Subject: [PATCH 15/21] [CPP] Fix bad rebase Signed-off-by: julian --- gph/src/ripser.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index 6ef932b3..435092c4 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -1554,15 +1554,17 @@ class ripser birth_edge.first, birth_edge.second, death_edge.first, death_edge.second}; } - } - if (return_cocycles) { - // Representative cocycle - working_reduction_column.push(column_to_reduce); - cocycles_finite[new_idx_finite_bar] = - compute_cocycles(working_reduction_column, dim); + if (return_cocycles) { + // Representative cocycle + working_reduction_column.push(column_to_reduce); + cocycles_finite[new_idx_finite_bar] = + compute_cocycles(working_reduction_column, + dim); + } } + break; } } else { From 31015b0eb3f717a3c41eacd03626c123776212ef Mon Sep 17 00:00:00 2001 From: julian Date: Wed, 17 Aug 2022 21:57:43 +0200 Subject: [PATCH 16/21] [PY] Fix bad rebase Signed-off-by: julian --- gph/python/ripser_interface.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index ee4afe64..5f726bf6 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -34,14 +34,15 @@ MAX_COEFF_SUPPORTED = gph_ripser.get_max_coefficient_field_supported() -def _compute_ph_vr_dense(DParam, maxHomDim, thresh=-1, coeff=2, n_threads=1, - return_generators=False, return_cocycles=False): +def _compute_ph_vr_dense(DParam, diagonal, maxHomDim, thresh=-1, coeff=2, + n_threads=1, return_generators=False, + return_cocycles=False): if coeff == 2: - ret = gph_ripser.rips_dm(DParam, DParam.shape[0], coeff, + ret = gph_ripser.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh, n_threads, return_generators, return_cocycles) else: - ret = gph_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff, + ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh, n_threads, return_generators, return_cocycles) return ret From 2ab0b4bbac095507861f88329875bc6a05e09fae Mon Sep 17 00:00:00 2001 From: julian Date: Wed, 17 Aug 2022 21:58:10 +0200 Subject: [PATCH 17/21] [TEST] Remove warnings Signed-off-by: julian --- gph/python/ripser_interface.py | 2 +- gph/python/test/test_ripser.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 5f726bf6..3a495696 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -678,7 +678,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", ccl = np.reshape(np.array(ccl, dtype=np.int64), [n, dim + 2]) ccl[:, -1] = np.mod(ccl[:, -1], coeff) cocycles[dim].append(ccl) - cocycles = np.asarray(cocycles) + cocycles = np.asarray(cocycles, dtype=object) ret["cocycles"] = cocycles diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py index ab797290..b899bf6d 100644 --- a/gph/python/test/test_ripser.py +++ b/gph/python/test/test_ripser.py @@ -477,7 +477,6 @@ def test_infinite_deaths_always_essential(): assert len(gens_fin_dim1) == 0 -@pytest.mark.xfail def test_cocycles_in_persistence_pair(): """Test for a simple cocycle in a persistence pair The data and the results are taken from cocycles notebook of From 80a4244bd470a17ca0a13170a32f5af5c6e706ac Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Wed, 17 Aug 2022 22:47:07 +0200 Subject: [PATCH 18/21] Update gph/python/ripser_interface.py --- gph/python/ripser_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 3a495696..76564585 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -483,7 +483,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", dimensions 1 to maxdim, with 2 vertices (edge) for each birth. 'cocycles': list (size maxdim) of list of ndarray, optional - For each dimension less than 'maxdim' a list of representative + For each dimension less than `maxdim`, a list of representative cocycles. Each representative cocycle in dimension 'd' is represented as a ndarray of '(k,d+1)' elements. Each non zero value of the cocycle is laid out in a row, first the 'd' indices of the From 73e57ba18034b229aa98d05fe77af9dabf66c240 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Wed, 17 Aug 2022 22:47:18 +0200 Subject: [PATCH 19/21] Update gph/python/ripser_interface.py --- gph/python/ripser_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 76564585..eb9fff9f 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -484,7 +484,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", 'cocycles': list (size maxdim) of list of ndarray, optional For each dimension less than `maxdim`, a list of representative - cocycles. Each representative cocycle in dimension 'd' is + cocycles. Each representative cocycle in dimension ``d`` is represented as a ndarray of '(k,d+1)' elements. Each non zero value of the cocycle is laid out in a row, first the 'd' indices of the simplex and then the value of the cocycle on the simplex. The From 58a25910647c04b68eda9a8386fa09618d7cd018 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Wed, 17 Aug 2022 22:47:27 +0200 Subject: [PATCH 20/21] Update gph/python/ripser_interface.py --- gph/python/ripser_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index eb9fff9f..f1bc00e9 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -485,7 +485,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", 'cocycles': list (size maxdim) of list of ndarray, optional For each dimension less than `maxdim`, a list of representative cocycles. Each representative cocycle in dimension ``d`` is - represented as a ndarray of '(k,d+1)' elements. Each non zero value + represented as a ndarray of ``(k, d + 1)`` elements. Each non zero value of the cocycle is laid out in a row, first the 'd' indices of the simplex and then the value of the cocycle on the simplex. The indices of the simplex reference the original point cloud. From 9094b07656b80b13144b2dfb2a67686aa507eb57 Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 30 Aug 2022 21:49:53 +0200 Subject: [PATCH 21/21] [CPP] Fix invalid cocycle type should have been signed instead of unsigned Signed-off-by: julian --- gph/src/ripser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index 435092c4..356bbe53 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -89,7 +89,7 @@ class hash_map : public concurrent_hash_map::junction_leapfrog_hm typedef float value_t; typedef int64_t index_t; -typedef uint16_t coefficient_t; +typedef int16_t coefficient_t; const size_t num_coefficient_bits = 8; constexpr value_t inf_value = std::numeric_limits::infinity();