Skip to content

Commit a13eab0

Browse files
authored
Add files via upload
1 parent 1a3b72b commit a13eab0

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn.datasets import make_moons
4+
from sklearn.preprocessing import StandardScaler
5+
6+
# Generate the dataset
7+
X, _ = make_moons(n_samples = 1000, noise = 0.1, random_state = 42)
8+
9+
X = StandardScaler().fit_transform(X)
10+
11+
class DBSCAN:
12+
13+
def __init__(self, eps, min_points):
14+
self.eps = eps
15+
self.min_points = min_points
16+
self.labels = None
17+
18+
19+
def fit(self, X):
20+
self.labels = np.full(len(X), -1) # Initialize all labels to -1 (noise)
21+
cluster_id = 0
22+
23+
for point_idx in range(len(X)):
24+
if(self.labels[point_idx] != -1): # Already visited
25+
continue
26+
27+
neighbors = self.region_query(X, point_idx)
28+
29+
if(len(neighbors) < self.min_points):
30+
self.labels[point_idx] = -1 # Mark as noise
31+
else:
32+
cluster_id += 1
33+
self.labels[point_idx] = cluster_id # Start a new cluster
34+
self.expand_cluster(X, neighbors, cluster_id)
35+
36+
return self.labels
37+
38+
39+
def euclidean_distance(self, point1, point2):
40+
return np.sqrt(np.sum((point1 - point2) ** 2))
41+
42+
43+
def region_query(self, X, point_idx):
44+
45+
neighbors = []
46+
47+
for idx in range(len(X)):
48+
if(self.euclidean_distance(X[point_idx], X[idx]) < self.eps):
49+
neighbors.append(idx)
50+
51+
return neighbors
52+
53+
54+
def expand_cluster(self, X, neighbors, cluster_id):
55+
56+
for neighbor_idx in neighbors:
57+
if(self.labels[neighbor_idx] == -1): # Previously marked as noise
58+
self.labels[neighbor_idx] = cluster_id
59+
60+
elif(self.labels[neighbor_idx] != 0): # Already visited
61+
continue
62+
63+
self.labels[neighbor_idx] = cluster_id
64+
65+
# Get new neighbors and expand if needed
66+
new_neighbors = self.region_query(X, neighbor_idx)
67+
68+
if(len(new_neighbors) >= self.min_points):
69+
self.expand_cluster(X, new_neighbors, cluster_id)
70+
71+
72+
dbscan = DBSCAN(eps=0.2, min_points=5)
73+
labels = dbscan.fit(X)
74+
75+
76+
def plot_clusters(X, labels):
77+
78+
plt.figure(figsize = (10, 6))
79+
80+
for label in set(labels):
81+
color = "black" if label == -1 else plt.cm.rainbow(label / len(set(labels)))
82+
plt.scatter(X[labels == label, 0], X[labels == label, 1], color=color, label=f"Cluster {label}" if label != -1 else "Noise")
83+
84+
plt.title("DBSCAN Clustering")
85+
plt.xlabel("Feature 1")
86+
plt.ylabel("Feature 2")
87+
plt.legend()
88+
plt.show()
89+
90+
plot_clusters(X, labels)
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
from sklearn.datasets import make_moons
4+
from sklearn.preprocessing import StandardScaler
5+
from sklearn.cluster import DBSCAN
6+
7+
# Generate the dataset
8+
X, _ = make_moons(n_samples = 1000, noise = 0.1, random_state = 42)
9+
10+
X = StandardScaler().fit_transform(X)
11+
12+
dbscan = DBSCAN(eps = 0.2, min_samples = 5)
13+
labels = dbscan.fit_predict(X)
14+
15+
16+
def plot_clusters(X, labels):
17+
18+
plt.figure(figsize = (10, 6))
19+
20+
for label in set(labels):
21+
color = "black" if label == -1 else plt.cm.rainbow(label / len(set(labels)))
22+
plt.scatter(X[labels == label, 0], X[labels == label, 1], color=color, label=f"Cluster {label}" if label != -1 else "Noise")
23+
24+
plt.title("DBSCAN Clustering")
25+
plt.xlabel("Feature 1")
26+
plt.ylabel("Feature 2")
27+
plt.legend()
28+
plt.show()
29+
30+
plot_clusters(X, labels)

0 commit comments

Comments
 (0)