Skip to content

Commit

Permalink
SUBCLU fix with automatic indexing reusing the wrong index.
Browse files Browse the repository at this point in the history
  • Loading branch information
kno10 committed Jul 4, 2024
1 parent e5c96e5 commit 82fbab3
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions elki-clustering/src/main/java/elki/clustering/subspace/SUBCLU.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,21 @@
import elki.database.ids.DBIDUtil;
import elki.database.ids.DBIDs;
import elki.database.ids.ModifiableDBIDs;
import elki.database.query.QueryBuilder;
import elki.database.query.distance.DistanceQuery;
import elki.database.relation.ProxyView;
import elki.database.relation.Relation;
import elki.database.relation.RelationUtil;
import elki.distance.subspace.DimensionSelectingSubspaceDistance;
import elki.distance.subspace.SubspaceEuclideanDistance;
import elki.index.RangeIndex;
import elki.logging.Logging;
import elki.logging.progress.FiniteProgress;
import elki.logging.progress.StepProgress;
import elki.math.linearalgebra.Centroid;
import elki.result.Metadata;
import elki.utilities.datastructures.BitsUtil;
import elki.utilities.datastructures.iterator.It;
import elki.utilities.documentation.Description;
import elki.utilities.documentation.Reference;
import elki.utilities.documentation.Title;
Expand Down Expand Up @@ -72,6 +76,9 @@
* only include points in clusters that are not already part of sub-clusters
* (note that this does not remove overlap of independent subspaces).
* <p>
* TODO: modifying the subspaces of the distance function can cause weird
* errors. Rather replace this with a subspace distance factory.
* <p>
* Reference:
* <p>
* Karin Kailing, Hans-Peter Kriegel, Peer Kröger<br>
Expand Down Expand Up @@ -316,17 +323,22 @@ private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace
}
// subset filter:
relation = ids == null ? relation : new ProxyView<>(ids, relation);

DBSCAN<V> dbscan = new DBSCAN<>(distance, epsilon, minpts);
Clustering<Model> dbsres = dbscan.run(relation);

Clustering<Model> dbsres = new DBSCAN<>(distance, epsilon, minpts).run(relation);
// separate cluster and noise
List<Cluster<Model>> clusters = new ArrayList<>();
for(Cluster<Model> c : dbsres.getAllClusters()) {
if(!c.isNoise()) {
clusters.add(c);
}
}
// Remove any automatically generated index associated with this distance
// because we modify the distance subspace again!
DistanceQuery<V> dq = new QueryBuilder<>(relation, distance).distanceQuery();
for(It<RangeIndex<V>> it = Metadata.hierarchyOf(relation).iterChildrenReverse().filter(RangeIndex.class); it.valid(); it.advance()) {
if(it.get().rangeByDBID(dq, epsilon, 0) != null) {
Metadata.hierarchyOf(relation).removeChild(it.get());
}
}
return clusters;
}

Expand Down

0 comments on commit 82fbab3

Please sign in to comment.