Skip to content

Commit 82fbab3

Browse files
committed
SUBCLU fix with automatic indexing reusing the wrong index.
1 parent e5c96e5 commit 82fbab3

File tree

1 file changed

+16
-4
lines changed
  • elki-clustering/src/main/java/elki/clustering/subspace

1 file changed

+16
-4
lines changed

elki-clustering/src/main/java/elki/clustering/subspace/SUBCLU.java

+16-4
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,21 @@
3434
import elki.database.ids.DBIDUtil;
3535
import elki.database.ids.DBIDs;
3636
import elki.database.ids.ModifiableDBIDs;
37+
import elki.database.query.QueryBuilder;
38+
import elki.database.query.distance.DistanceQuery;
3739
import elki.database.relation.ProxyView;
3840
import elki.database.relation.Relation;
3941
import elki.database.relation.RelationUtil;
4042
import elki.distance.subspace.DimensionSelectingSubspaceDistance;
4143
import elki.distance.subspace.SubspaceEuclideanDistance;
44+
import elki.index.RangeIndex;
4245
import elki.logging.Logging;
4346
import elki.logging.progress.FiniteProgress;
4447
import elki.logging.progress.StepProgress;
4548
import elki.math.linearalgebra.Centroid;
4649
import elki.result.Metadata;
4750
import elki.utilities.datastructures.BitsUtil;
51+
import elki.utilities.datastructures.iterator.It;
4852
import elki.utilities.documentation.Description;
4953
import elki.utilities.documentation.Reference;
5054
import elki.utilities.documentation.Title;
@@ -72,6 +76,9 @@
7276
* only include points in clusters that are not already part of sub-clusters
7377
* (note that this does not remove overlap of independent subspaces).
7478
* <p>
79+
* TODO: modifying the subspaces of the distance function can cause weird
80+
* errors. Rather replace this with a subspace distance factory.
81+
* <p>
7582
* Reference:
7683
* <p>
7784
* Karin Kailing, Hans-Peter Kriegel, Peer Kröger<br>
@@ -316,17 +323,22 @@ private List<Cluster<Model>> runDBSCAN(Relation<V> relation, DBIDs ids, Subspace
316323
}
317324
// subset filter:
318325
relation = ids == null ? relation : new ProxyView<>(ids, relation);
319-
320-
DBSCAN<V> dbscan = new DBSCAN<>(distance, epsilon, minpts);
321-
Clustering<Model> dbsres = dbscan.run(relation);
322-
326+
Clustering<Model> dbsres = new DBSCAN<>(distance, epsilon, minpts).run(relation);
323327
// separate cluster and noise
324328
List<Cluster<Model>> clusters = new ArrayList<>();
325329
for(Cluster<Model> c : dbsres.getAllClusters()) {
326330
if(!c.isNoise()) {
327331
clusters.add(c);
328332
}
329333
}
334+
// Remove any automatically generated index associated with this distance
335+
// because we modify the distance subspace again!
336+
DistanceQuery<V> dq = new QueryBuilder<>(relation, distance).distanceQuery();
337+
for(It<RangeIndex<V>> it = Metadata.hierarchyOf(relation).iterChildrenReverse().filter(RangeIndex.class); it.valid(); it.advance()) {
338+
if(it.get().rangeByDBID(dq, epsilon, 0) != null) {
339+
Metadata.hierarchyOf(relation).removeChild(it.get());
340+
}
341+
}
330342
return clusters;
331343
}
332344

0 commit comments

Comments
 (0)