-
Notifications
You must be signed in to change notification settings - Fork 0
/
music-genre-classification.py
97 lines (71 loc) · 2.28 KB
/
music-genre-classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import numpy as np
from tempfile import TemporaryFile
import os
import pickle
import random
import operator
import math
# function to get the distance between feature vecotrs and find neighbors
def getNeighbors(trainingSet, instance, k):
distances = []
for x in range (len(trainingSet)):
dist = distance(trainingSet[x], instance, k) + distance(instance, trainingSet[x], k)
distances.append((trainingSet[x][2], dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
# identify the class of the instance
def nearestClass(neighbors):
classVote = {}
for x in range(len(neighbors)):
response = neighbors[x]
if response in classVote:
classVote[response] += 1
else:
classVote[response] = 1
sorter = sorted(classVote.items(), key = operator.itemgetter(1), reverse=True)
return sorter[0][0]
# function to evaluate the model
def getAccuracy(testSet, prediction):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (1.0 * correct) / len(testSet)
# directory that holds the dataset
directory = "Data/"
f = open("my.dat", 'wb')
i = 0
for folder in os.listdir(directory):
i += 1
if i == 11:
break
for file in os.listdir(directory+folder):
(rate, sig) = wav.read(directory+folder+"/"+file)
mfcc_feat = mfcc(sig, rate, winlen=0.020, appendEnergy=False)
covariance = np.cov(np.matrix.transpose(mfcc_feat))
mean_matrix = mfcc_feat.mean(0)
feature = (mean_matrix, covariance, i)
pickle.dump(feature, f)
f.close()
dataset = []
def loadDataset(filename, split,trSet, teSet):
with open("my.dat", rb) as f:
while True:
try:
dataset.append(pickle.load(f))
except EOFError:
f.close()
break
for x in range(len(dataset)):
if random.random() < split:
trSet.append(dataset[x])
else:
teSet.append(dataset[x])
trainingSet = []
testSet = []
loadDataset("my.dat", 0.66, trainingSet, testSet)