-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclassic_demo.py
59 lines (55 loc) · 1.86 KB
/
classic_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
import time
from pathlib import Path
def cluster_and_time(data, algorithm_name, algorithm):
"""
Cluster the data using the given algorithm and measure its execution time.
"""
start_time = time.time()
labels = algorithm.fit_predict(data)
end_time = time.time()
duration = end_time - start_time
return labels, duration
def plot_clusters(data, labels, title, algorithm_name):
"""
Plot clustered data.
"""
sns.set(style="whitegrid")
plt.figure(figsize=(8, 6))
sns.scatterplot(
x=data[:, 0],
y=data[:, 1],
hue=labels,
palette="tab10",
s=50,
edgecolor="w",
legend="full"
)
plt.title(title, fontsize=16)
plt.xlabel("x", fontsize=12)
plt.ylabel("y", fontsize=12)
plt.legend(title="Cluster", loc="best", fontsize=10)
plt.grid(True)
plt.savefig(f"plots/circles_{algorithm_name}.png")
def main():
# Load the data
input_folder = Path("./demos")
for input_file in input_folder.glob("*.csv"):
df = pd.read_csv(input_file)
data = df[['x', 'y']].values
# Algorithms to use
algorithms = {
"K-Means (k=2)": KMeans(n_clusters=2, random_state=42, max_iter=100000),
"DBSCAN (eps=3)": DBSCAN(eps=3, min_samples=5),
}
# Perform clustering and time each algorithm
for algorithm_name, algorithm in algorithms.items():
print(f"Clustering with {algorithm_name}...")
labels, duration = cluster_and_time(data, algorithm_name, algorithm)
print(f"Execution time for {algorithm_name}: {duration:.4f} seconds")
plot_clusters(data, labels, f"{algorithm_name} (Time: {duration:.4f}s)", algorithm_name)
if __name__ == "__main__":
main()