diff --git a/gph/bindings/ripser_bindings.cpp b/gph/bindings/ripser_bindings.cpp index 1ae6a736..09463e34 100644 --- a/gph/bindings/ripser_bindings.cpp +++ b/gph/bindings/ripser_bindings.cpp @@ -39,15 +39,13 @@ to_numpy_barcodes(std::vector barcodes) } #if defined USE_COEFFICIENTS -PYBIND11_MODULE(gph_ripser_coeff, m) -{ +PYBIND11_MODULE(gph_ripser_coeff, m) { #else -PYBIND11_MODULE(gph_ripser, m) -{ +PYBIND11_MODULE(gph_ripser, m) { #endif - using namespace pybind11::literals; - m.doc() = "Ripser python interface"; + using namespace pybind11::literals; + m.doc() = "Ripser python interface"; py::class_(m, "flagPersGen", py::module_local()) .def_readwrite("finite_0", &flagPersGen::finite_0) @@ -63,6 +61,7 @@ PYBIND11_MODULE(gph_ripser, m) [&](ripserResults& res) { return to_numpy_barcodes(res.births_and_deaths_by_dim); }) + .def_readwrite("cocycles_by_dim", &ripserResults::cocycles_by_dim) .def_readwrite("flag_persistence_generators_by_dim", &ripserResults::flag_persistence_generators); @@ -70,7 +69,7 @@ PYBIND11_MODULE(gph_ripser, m) "rips_dm", [](py::array_t& D, py::array_t& diag, int modulus, int dim_max, float threshold, int num_threads, - bool return_generators) { + bool return_generators, const bool do_cocycles) { // Setup distance matrix and figure out threshold auto D_ = static_cast(D.request().ptr); std::vector distances(D_, D_ + D.size()); @@ -84,20 +83,21 @@ PYBIND11_MODULE(gph_ripser, m) ripserResults res; ripser r( - std::move(dist), dim_max, threshold, modulus, num_threads, - return_generators); + std::move(dist), dim_max, threshold, modulus, + num_threads, return_generators, do_cocycles); r.compute_barcodes(); r.copy_results(res); return res; }, - "D"_a, "diag"_a, "modulus"_a, "dim_max"_a, "threshold"_a, - "num_threads"_a, "return_generators"_a, "ripser distance matrix"); + "D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "num_threads"_a, + "return_generators"_a, "do_cocycles"_a, "ripser distance matrix"); m.def( "rips_dm_sparse", [](py::array_t& I, py::array_t& J, py::array_t& V, int NEdges, int N, int modulus, int dim_max, - float threshold, int num_threads, bool return_generators) { + float threshold, int num_threads, bool return_generators, + const bool do_cocycles) { auto I_ = static_cast(I.request().ptr); auto J_ = static_cast(J.request().ptr); auto V_ = static_cast(V.request().ptr); @@ -105,7 +105,8 @@ PYBIND11_MODULE(gph_ripser, m) // Setup distance matrix and figure out threshold ripser r( sparse_distance_matrix(I_, J_, V_, NEdges, N, threshold), - dim_max, threshold, modulus, num_threads, return_generators); + dim_max, threshold, modulus, num_threads, + return_generators, do_cocycles); r.compute_barcodes(); ripserResults res; @@ -113,7 +114,7 @@ PYBIND11_MODULE(gph_ripser, m) return res; }, "I"_a, "J"_a, "V"_a, "NEdges"_a, "N"_a, "modulus"_a, "dim_max"_a, - "threshold"_a, "num_threads"_a, "return_generators"_a, + "threshold"_a, "num_threads"_a, "return_generators"_a, "do_cocycles"_a, "ripser sparse distance matrix"); m.def("get_max_coefficient_field_supported", diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py index 87aefeb3..f1bc00e9 100644 --- a/gph/python/ripser_interface.py +++ b/gph/python/ripser_interface.py @@ -35,26 +35,31 @@ def _compute_ph_vr_dense(DParam, diagonal, maxHomDim, thresh=-1, coeff=2, - n_threads=1, return_generators=False): + n_threads=1, return_generators=False, + return_cocycles=False): if coeff == 2: - ret = gph_ripser.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh, - n_threads, return_generators) + ret = gph_ripser.rips_dm(DParam, diagonal, coeff, + maxHomDim, thresh, n_threads, + return_generators, return_cocycles) else: - ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, maxHomDim, - thresh, n_threads, return_generators) + ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, + maxHomDim, thresh, n_threads, + return_generators, return_cocycles) return ret def _compute_ph_vr_sparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, - n_threads=1, return_generators=False): + n_threads=1, return_generators=False, + return_cocycles=False): if coeff == 2: ret = gph_ripser.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, thresh, n_threads, - return_generators) + return_generators, return_cocycles) else: ret = gph_ripser_coeff.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, thresh, n_threads, - return_generators) + return_generators, + return_cocycles) return ret @@ -310,7 +315,8 @@ def _is_prime_and_larger_than_2(x, N): def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", metric_params={}, nearest_neighbors_params={}, weights=None, weight_params=None, collapse_edges=False, - n_threads=1, return_generators=False): + n_threads=1, return_generators=False, + return_cocycles=False): """Compute persistence diagrams from an input dense array or sparse matrix. If `X` represents a point cloud, a distance matrix will be internally @@ -441,6 +447,10 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", the return dictionary under the key `gens`. Cannot be ``True`` if `collapse_edges` is also ``True``. + return_cocycles: bool, optional, default: ``False`` + Computed cocycles will be available in the `cocycles` value + of the return dictionary. + Returns ------- A dictionary holding the results of the computation. Keys and values are as @@ -472,6 +482,15 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", Essential simplices corresponding to infinite bars in dimensions 1 to maxdim, with 2 vertices (edge) for each birth. + 'cocycles': list (size maxdim) of list of ndarray, optional + For each dimension less than `maxdim`, a list of representative + cocycles. Each representative cocycle in dimension ``d`` is + represented as a ndarray of ``(k, d + 1)`` elements. Each non zero value + of the cocycle is laid out in a row, first the 'd' indices of the + simplex and then the value of the cocycle on the simplex. The + indices of the simplex reference the original point cloud. + 'cocycles' are only available if 'return_cocycles' is True. + Notes ----- The C++ backend and Python API for the computation of Vietoris–Rips @@ -498,7 +517,7 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", Python", *Journal of Open Source Software*, **3**(29), 2021; `DOI: 10.21105/joss.00925 `_. - + .. [2] U. Bauer, "Ripser: efficient computation of Vietoris–Rips persistence barcodes", *J Appl. and Comput. Topology*, **5**, pp. 391–423, 2021; `DOI: 10.1007/s41468-021-00071-5 @@ -609,14 +628,16 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", thresh, coeff, n_threads, - return_generators + return_generators, + return_cocycles ) else: # Only consider upper diagonal diagonal = np.diagonal(dm).astype(np.float32) DParam = squareform(dm, checks=False).astype(np.float32) - res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh, - coeff, n_threads, return_generators) + res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh, coeff, + n_threads, return_generators, + return_cocycles) # Unwrap persistence diagrams # Barcodes must match the inner type of C++ core filtration value. @@ -645,4 +666,20 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", ] ret['gens'] = (finite_0, finite_higher, essential_0, essential_higher) + # Unwrap cocycles + if return_cocycles: + nb_dim = len(res.cocycles_by_dim) + cocycles = [] + for dim in range(nb_dim): + cocycles.append([]) + for j in range(len(res.cocycles_by_dim[dim])): + ccl = res.cocycles_by_dim[dim][j] + n = int(len(ccl) / (dim + 2)) + ccl = np.reshape(np.array(ccl, dtype=np.int64), [n, dim + 2]) + ccl[:, -1] = np.mod(ccl[:, -1], coeff) + cocycles[dim].append(ccl) + cocycles = np.asarray(cocycles, dtype=object) + + ret["cocycles"] = cocycles + return ret diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py index 03397a07..b899bf6d 100644 --- a/gph/python/test/test_ripser.py +++ b/gph/python/test/test_ripser.py @@ -436,7 +436,6 @@ def test_unsupported_coefficient(): coeff=gph_ripser.get_max_coefficient_field_supported()+1) -@settings(deadline=500) @given(dm_dense=get_dense_distance_matrices()) def test_non_0_diagonal_internal_representation(dm_dense): """Checks that, when passing a full distance matrix with non-zero values in @@ -476,3 +475,35 @@ def test_infinite_deaths_always_essential(): # With this example no finite generators in dimension 1 shall be found assert len(gens_fin_dim1) == 0 + + +def test_cocycles_in_persistence_pair(): + """Test for a simple cocycle in a persistence pair + The data and the results are taken from cocycles notebook of + ripser.py package""" + X = np.array([[0.05546534, -1.03551662], + [-0.97905569, 0.02129887], + [-0.66797382, -0.99523678], + [-0.95970867, -0.0622854], + [-0.61541581, -1.02874641], + [0.32049913, 0.96618272], + [0.49975854, 1.02644011], + [1.10357502, 0.36892592], + [0.79793975, -0.50982899], + [-0.15508265, 1.0381044], + [0.96130068, -0.04503272], + [1.11553304, 0.36782575]]) + expected = np.array([[], [np.array([[9, 1, 1], + [9, 3, 1], + [5, 1, 1], + [5, 3, 1], + [6, 1, 1], + [8, 3, 1]]) + ]], dtype=object + ) + cocycles = ripser(X, coeff=17, return_cocycles=True)['cocycles'] + + assert np.array_equal(expected.shape, cocycles.shape) + + for dim in range(2): + assert np.array_equal(expected[dim], cocycles[dim]) diff --git a/gph/src/ripser.h b/gph/src/ripser.h index ee7b076c..356bbe53 100644 --- a/gph/src/ripser.h +++ b/gph/src/ripser.h @@ -89,13 +89,16 @@ class hash_map : public concurrent_hash_map::junction_leapfrog_hm typedef float value_t; typedef int64_t index_t; -typedef uint16_t coefficient_t; - -using barcodes_t = std::vector; +typedef int16_t coefficient_t; const size_t num_coefficient_bits = 8; constexpr value_t inf_value = std::numeric_limits::infinity(); +using barcodes_t = std::vector; +using cocycle_t = std::vector; +using cocycles_t = std::vector; + + // 1L on windows is ALWAYS 32 bits, when on unix systems is pointer size static const index_t max_simplex_index = (uintptr_t(1) << (8 * sizeof(index_t) - 1 - num_coefficient_bits)) - 1; @@ -145,6 +148,11 @@ class binomial_coeff_table } }; +coefficient_t normalize(const coefficient_t n, const coefficient_t modulus) +{ + return n > modulus / 2 ? n - modulus : n; +} + std::vector multiplicative_inverse_vector(const coefficient_t m) { std::vector inverse(m); @@ -615,6 +623,21 @@ typedef struct { /* The second variable stores the flag persistence generators */ flagPersGen flag_persistence_generators; + /* + The third variable is a vector of representative cocycles for each + dimension. For now, only cocycles above dimension 0 are added, so + dimension 0 is an empty list For the others, cocycles_by_dim[d] holds an + array of representative cocycles for dimension d which is parallel with + the array of births/deaths for dimension d. Each element of the array is + itself an array of unrolled information about the cocycle For dimension 1, + for example, the zeroeth element of the array contains [ccl0_simplex0_idx0 + ccl0_simplex0_idx1 ccl0_simplex0_val, ccl0_simplex1_idx0 + ccl0_simplex1_idx1 ccl0_simplex1_val, ... ccl0_simplexk_idx0 + ccl0_simplexk_idx1 ccl0_simplexk_val] for a cocycle representing the first + persistence point, which has k simplices with nonzero values in the + representative cocycle + */ + std::vector cocycles_by_dim; } ripserResults; template @@ -631,6 +654,9 @@ class ripser // If this flag is off, don't extract the flag persistence // pairings and essential simplices const bool return_flag_persistence_generators; + // If this flag is off, don't extract the representative cocycles to save + // time + const bool return_cocycles; struct entry_hash { std::size_t operator()(const entry_t& e) const @@ -665,16 +691,19 @@ class ripser public: mutable std::vector births_and_deaths_by_dim; mutable flagPersGen flag_persistence_generators; + mutable std::vector cocycles_by_dim; ripser(DistanceMatrix&& _dist, index_t _dim_max, value_t _threshold, coefficient_t _modulus, int _num_threads, - bool return_flag_persistence_generators_) + bool return_flag_persistence_generators_, const bool return_cocycles_ + ) : dist(std::move(_dist)), n(dist.size()), dim_max(_dim_max), threshold(_threshold), modulus(_modulus), num_threads(_num_threads), binomial_coeff(n, dim_max + 2), multiplicative_inverse(multiplicative_inverse_vector(_modulus)), return_flag_persistence_generators( - return_flag_persistence_generators_) + return_flag_persistence_generators_), + return_cocycles(return_cocycles_) { /* Uses all concurrent threads supported */ if (num_threads == -1) @@ -696,6 +725,7 @@ class ripser { res.births_and_deaths_by_dim = births_and_deaths_by_dim; res.flag_persistence_generators = flag_persistence_generators; + res.cocycles_by_dim = cocycles_by_dim; } index_t get_max_vertex(const index_t idx, const index_t k, @@ -1257,7 +1287,7 @@ class ripser #endif void retire_column(const index_t& idx_col, - WorkingColumn& working_reduction_column, Matrix& mat, + WorkingColumn working_reduction_column, Matrix& mat, const std::vector& columns_to_reduce, const index_t dim, const index_t& pivot_index, mrzv::MemoryManager& mem_manager) @@ -1317,6 +1347,27 @@ class ripser return edge; } + inline cocycle_t compute_cocycles(WorkingColumn cocycle, index_t dim) + { + thread_local diameter_entry_t cocycle_e; + thread_local std::vector cocycle_simplex; + thread_local cocycle_t thiscocycle; + + thiscocycle.clear(); + while (get_index(cocycle_e = get_pivot(cocycle)) != -1) { + cocycle_simplex.clear(); + get_simplex_vertices(get_index(cocycle_e), dim, n, + std::back_inserter(cocycle_simplex)); + for (auto& cocycle : cocycle_simplex) { + thiscocycle.push_back(cocycle); + } + thiscocycle.push_back( + normalize(get_coefficient(cocycle_e), modulus)); + cocycle.pop(); + } + return thiscocycle; + } + void compute_pairs(const std::vector& columns_to_reduce, entry_hash_map& pivot_column_index, const index_t dim) { @@ -1324,7 +1375,8 @@ class ripser // extra vector is a work-around inability to store floats in the // hash_map - std::atomic idx_finite_bar{0}, idx_essential{0}; + std::atomic idx_finite_bar{0}, idx_essential{0}, + idx_cocycles{0}; entry_hash_map pivot_to_death_idx(columns_to_reduce.size()); pivot_to_death_idx.reserve(columns_to_reduce.size()); @@ -1334,6 +1386,8 @@ class ripser flagPersGen::essential_higher_t essential_generator( columns_to_reduce.size()); flagPersGen::finite_higher_t finite_generator(columns_to_reduce.size()); + cocycles_t cocycles_finite(columns_to_reduce.size()); + cocycles_t cocycles_essential(columns_to_reduce.size()); foreach (columns_to_reduce, [&](index_t index_column_to_reduce, bool first, @@ -1365,10 +1419,11 @@ class ripser diameter_entry_t e; bool is_essential = false; + index_t pivot_idx; while (true) { - if (get_index(pivot) != -1) { - auto pair = - pivot_column_index.find(get_index(get_entry(pivot))); + pivot_idx = get_index(pivot); + if (pivot_idx != -1) { + auto pair = pivot_column_index.find(pivot_idx); if (pair != pivot_column_index.end()) { entry_t old_entry_column_to_add; index_t index_column_to_add; @@ -1418,7 +1473,7 @@ class ripser retire_column(index_column_to_reduce, working_reduction_column, reduction_matrix, columns_to_reduce, - dim, get_index(pivot), + dim, pivot_idx, memory_manager); if (pivot_column_index.update( @@ -1442,12 +1497,11 @@ class ripser retire_column(index_column_to_reduce, working_reduction_column, reduction_matrix, columns_to_reduce, dim, - get_index(pivot), memory_manager); + pivot_idx, memory_manager); // equivalent to CAS in the original algorithm auto insertion_result = pivot_column_index.insert( - {get_index(get_entry(pivot)), - make_entry(index_column_to_reduce, + {pivot_idx, make_entry(index_column_to_reduce, get_coefficient(pivot))}); if (!insertion_result .second) // failed to insert, somebody @@ -1500,8 +1554,17 @@ class ripser birth_edge.first, birth_edge.second, death_edge.first, death_edge.second}; } + + if (return_cocycles) { + // Representative cocycle + working_reduction_column.push(column_to_reduce); + cocycles_finite[new_idx_finite_bar] = + compute_cocycles(working_reduction_column, + dim); + } } + break; } } else { @@ -1537,6 +1600,13 @@ class ripser essential_generator[idx_] = {birth_edge.first, birth_edge.second}; } + + if (return_cocycles) { + // Representative cocycle + working_reduction_column.push(column_to_reduce); + cocycles_essential[idx_] = + compute_cocycles(working_reduction_column, dim); + } } } @@ -1564,7 +1634,7 @@ class ripser /* We push the order of the generator simplices by when * they are inserted as bars. This can be done because * get_index(it->second) is equivalent to - * `new_idx_finite_bar` in the core algorithm + * `new_idx_fin_bar` in the core algorithm */ ordered_location.push_back(get_index(it->second)); #if defined(SORT_BARCODES) @@ -1580,7 +1650,8 @@ class ripser * sorting barcodes. See https://stackoverflow.com/a/17554343 */ if (persistence_pair.size()) { - if (return_flag_persistence_generators) { + if (return_flag_persistence_generators || + return_cocycles) { indexes.resize(ordered_location.size()); std::iota(indexes.begin(), indexes.end(), 0); std::sort(indexes.begin(), indexes.end(), @@ -1629,6 +1700,22 @@ class ripser essential_generator[i]); } } + + if (return_cocycles) { +#if defined(SORT_BARCODES) + for (const auto ordered_idx : indexes) + cocycles_by_dim[dim].push_back(std::move( + cocycles_finite[ordered_location[ordered_idx]])); +#else + for (const auto ordered_idx : ordered_location) + cocycles_by_dim[dim].push_back(std::move( + cocycles_finite[ordered_idx])); +#endif + for (size_t i = 0; i < idx_essential; ++i) { + cocycles_by_dim[dim].push_back( + std::move(cocycles_essential[i])); + } + } } std::vector get_edges(); @@ -1641,6 +1728,7 @@ class ripser births_and_deaths_by_dim.resize(dim_max + 1); flag_persistence_generators.finite_higher.resize(dim_max); flag_persistence_generators.essential_higher.resize(dim_max); + cocycles_by_dim.resize(dim_max + 1); compute_dim_0_pairs(simplices, columns_to_reduce); /* pre allocate Container for each dimension */