1
+ import numpy as np
2
+ import matplotlib .pyplot as plt
3
+ from sklearn .datasets import make_moons
4
+ from sklearn .preprocessing import StandardScaler
5
+
6
+ # Generate the dataset
7
+ X , _ = make_moons (n_samples = 1000 , noise = 0.1 , random_state = 42 )
8
+
9
+ X = StandardScaler ().fit_transform (X )
10
+
11
+ class DBSCAN :
12
+
13
+ def __init__ (self , eps , min_points ):
14
+ self .eps = eps
15
+ self .min_points = min_points
16
+ self .labels = None
17
+
18
+
19
+ def fit (self , X ):
20
+ self .labels = np .full (len (X ), - 1 ) # Initialize all labels to -1 (noise)
21
+ cluster_id = 0
22
+
23
+ for point_idx in range (len (X )):
24
+ if (self .labels [point_idx ] != - 1 ): # Already visited
25
+ continue
26
+
27
+ neighbors = self .region_query (X , point_idx )
28
+
29
+ if (len (neighbors ) < self .min_points ):
30
+ self .labels [point_idx ] = - 1 # Mark as noise
31
+ else :
32
+ cluster_id += 1
33
+ self .labels [point_idx ] = cluster_id # Start a new cluster
34
+ self .expand_cluster (X , neighbors , cluster_id )
35
+
36
+ return self .labels
37
+
38
+
39
+ def euclidean_distance (self , point1 , point2 ):
40
+ return np .sqrt (np .sum ((point1 - point2 ) ** 2 ))
41
+
42
+
43
+ def region_query (self , X , point_idx ):
44
+
45
+ neighbors = []
46
+
47
+ for idx in range (len (X )):
48
+ if (self .euclidean_distance (X [point_idx ], X [idx ]) < self .eps ):
49
+ neighbors .append (idx )
50
+
51
+ return neighbors
52
+
53
+
54
+ def expand_cluster (self , X , neighbors , cluster_id ):
55
+
56
+ for neighbor_idx in neighbors :
57
+ if (self .labels [neighbor_idx ] == - 1 ): # Previously marked as noise
58
+ self .labels [neighbor_idx ] = cluster_id
59
+
60
+ elif (self .labels [neighbor_idx ] != 0 ): # Already visited
61
+ continue
62
+
63
+ self .labels [neighbor_idx ] = cluster_id
64
+
65
+ # Get new neighbors and expand if needed
66
+ new_neighbors = self .region_query (X , neighbor_idx )
67
+
68
+ if (len (new_neighbors ) >= self .min_points ):
69
+ self .expand_cluster (X , new_neighbors , cluster_id )
70
+
71
+
72
+ dbscan = DBSCAN (eps = 0.2 , min_points = 5 )
73
+ labels = dbscan .fit (X )
74
+
75
+
76
+ def plot_clusters (X , labels ):
77
+
78
+ plt .figure (figsize = (10 , 6 ))
79
+
80
+ for label in set (labels ):
81
+ color = "black" if label == - 1 else plt .cm .rainbow (label / len (set (labels )))
82
+ plt .scatter (X [labels == label , 0 ], X [labels == label , 1 ], color = color , label = f"Cluster { label } " if label != - 1 else "Noise" )
83
+
84
+ plt .title ("DBSCAN Clustering" )
85
+ plt .xlabel ("Feature 1" )
86
+ plt .ylabel ("Feature 2" )
87
+ plt .legend ()
88
+ plt .show ()
89
+
90
+ plot_clusters (X , labels )
0 commit comments