Getting Error while using HDBSCAN #609

shyamdthakkar · 2023-08-07T15:46:26Z

import hdbscan
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt

data = pd.read_csv('DBSCAN Data.csv')
X = data[['Feature 1', 'Feature 2']].values
true_labels = data['Cluster'].values

clusterer = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True)
hdbscan_labels = clusterer.fit_predict(X)

Error Msg -

TypeError Traceback (most recent call last)
Cell In[10], line 2
1 clusterer = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True)
----> 2 hdbscan_labels = clusterer.fit_predict(X)

File ~\anaconda3\Lib\site-packages\hdbscan\hdbscan_.py:1243, in HDBSCAN.fit_predict(self, X, y)
1228 def fit_predict(self, X, y=None):
1229 """Performs clustering on X and returns cluster labels.
1230
1231 Parameters
(...)
1241 cluster labels
1242 """
-> 1243 self.fit(X)
1244 return self.labels_

File ~\anaconda3\Lib\site-packages\hdbscan\hdbscan_.py:1205, in HDBSCAN.fit(self, X, y)
1195 kwargs.pop("prediction_data", None)
1196 kwargs.update(self.metric_kwargs)
1198 (
1199 self.labels,
1200 self.probabilities_,
1201 self.cluster_persistence_,
1202 self._condensed_tree,
1203 self._single_linkage_tree,
1204 self._min_spanning_tree,
-> 1205 ) = hdbscan(clean_data, **kwargs)
1207 if self.metric != "precomputed" and not self._all_finite:
1208 # remap indices to align with original data in the case of non-finite entries.
1209 self._condensed_tree = remap_condensed_tree(
1210 self._condensed_tree, internal_to_raw, outliers
1211 )

File ~\anaconda3\Lib\site-packages\hdbscan\hdbscan_.py:884, in hdbscan(X, min_cluster_size, min_samples, alpha, cluster_selection_epsilon, max_cluster_size, metric, p, leaf_size, algorithm, memory, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, cluster_selection_method, allow_single_cluster, match_reference_implementation, **kwargs)
867 else:
868 (single_linkage_tree, result_min_span_tree) = memory.cache(
869 _hdbscan_boruvka_balltree
870 )(
(...)
880 **kwargs
881 )
883 return (
--> 884 _tree_to_labels(
885 X,
886 single_linkage_tree,
887 min_cluster_size,
888 cluster_selection_method,
889 allow_single_cluster,
890 match_reference_implementation,
891 cluster_selection_epsilon,
892 max_cluster_size,
893 )
894 + (result_min_span_tree,)
895 )

File ~\anaconda3\Lib\site-packages\hdbscan\hdbscan_.py:80, in _tree_to_labels(X, single_linkage_tree, min_cluster_size, cluster_selection_method, allow_single_cluster, match_reference_implementation, cluster_selection_epsilon, max_cluster_size)
78 condensed_tree = condense_tree(single_linkage_tree, min_cluster_size)
79 stability_dict = compute_stability(condensed_tree)
---> 80 labels, probabilities, stabilities = get_clusters(
81 condensed_tree,
82 stability_dict,
83 cluster_selection_method,
84 allow_single_cluster,
85 match_reference_implementation,
86 cluster_selection_epsilon,
87 max_cluster_size,
88 )
90 return (labels, probabilities, stabilities, condensed_tree, single_linkage_tree)

File hdbscan\_hdbscan_tree.pyx:659, in hdbscan._hdbscan_tree.get_clusters()

File hdbscan\_hdbscan_tree.pyx:733, in hdbscan._hdbscan_tree.get_clusters()

TypeError: 'numpy.float64' object cannot be interpreted as an integer
DBSCAN Data.csv

FinnHuelsbusch · 2023-08-08T09:10:03Z

Seems like a duplicate of #607 . Maybe the suggested fix helps you.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Getting Error while using HDBSCAN #609

Getting Error while using HDBSCAN #609

shyamdthakkar commented Aug 7, 2023

FinnHuelsbusch commented Aug 8, 2023

Getting Error while using HDBSCAN #609

Getting Error while using HDBSCAN #609

Comments

shyamdthakkar commented Aug 7, 2023

Error Msg -

FinnHuelsbusch commented Aug 8, 2023