Skip to content

Commit 4417fba

Browse files
committed
added files
1 parent e40f010 commit 4417fba

37 files changed

+2082
-0
lines changed

README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,66 @@
11
# deep-learning-based-pitch-detection
22
Deep learning based pitch detection
3+
4+
The folder consists of 5 directories
5+
6+
dataset_creation: contains,
7+
Files used to convert MIDI data to time domain signals
8+
LSTM Pickle file creation
9+
LSTM Non-Overlapping time steps
10+
LSTM Overlapping time steps
11+
12+
evaluation_matlabFiles: contains,
13+
script used to reconstruct the audio files
14+
Visualize the files created for real audio
15+
Visualize the files created for synthetic audio
16+
17+
test_files: contains 2 directories,
18+
cnn, contains
19+
feed spectrogram as .png file
20+
feed spectrogram as .mat file
21+
lstm, contains
22+
file to create Pickle file
23+
file to create Timesteps
24+
test using traied LSTM network
25+
26+
trained_nets: contains,
27+
overlapping
28+
non-overlapping
29+
cnn network
30+
31+
training_files:
32+
cnn architecture
33+
lstm architecture
34+
35+
36+
Procedure:
37+
CNN
38+
Create datasets Inputs and Labels as .png file
39+
Provide the path to CNN architecture file
40+
Train the network
41+
Test the network by passing the image in Matlab(applying CQT) and pass the saved image through CNN network.
42+
43+
44+
LSTM
45+
Create datasets Inputs and Labels using Matlab. (Matlab)
46+
Inputs are the spectrogram as .png file and labels are mat files.
47+
Create a pickle of 96xN by concatenating all spectrograms and mat files.(python)
48+
Now create the timesteps 96x216 using python.
49+
Provide the path of timesteps inputs and labels to LSTM architecture
50+
Train the network
51+
52+
Test the network by passing spectrograms
53+
Create 96xN of spectrograms using pickle
54+
Convert this to 96x216 timesteps
55+
Pass the file to test LSTM script
56+
save the network outputs
57+
Visulize the network outputs using Matlab.
58+
59+
60+
61+
62+
63+
64+
65+
66+

dataset_creation/Normalize.m

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
function [x,t]=Normalize(x,f)
2+
x=(x)/(max(abs(x))); %normalize
3+
O=length(x);
4+
t=linspace(0,O/f,O);
5+

dataset_creation/ReduceSilence.m

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
function Notes = ReduceSilence(Notes)
2+
n1 = Notes(2:end,5);
3+
n2 = Notes(1:end-1,6);
4+
i = 1;
5+
j = 1;
6+
rows = [];
7+
while j < length(n1)
8+
if n1(j)-n2(j) > 0.0001 && n1(j)-n2(j) < 0.5
9+
d = n1(j)-n2(j);
10+
n1(j) = n2(j);
11+
% n2(j+1:end) = n2(j+1:end)-d;
12+
% n1(j+1:end) = n1(j+1:end)-d;
13+
elseif n1(j)-n2(j) > 0.5
14+
d = n1(j)-n2(j);
15+
n1(j) = n2(j)+0.1;
16+
% n2(j+1:end) = n2(j+1:end)-d + 0.1;
17+
% n1(j+1:end) = n1(j+1:end)-d + 0.1;
18+
end
19+
j = j+1;
20+
end
21+
Notes(2:end,5) = n1;
22+
Notes(1:end-1,6) = n2;
23+
end

dataset_creation/addNoise.m

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
function y_noise = addNoise(Signal, noiseLevel)
2+
3+
%% Create uniform noise along the length of the signal
4+
Npts = length(Signal);
5+
Noise = rand(1,Npts);
6+
7+
figure(98)
8+
[Signal_,t]=Normalize(Signal,44100);
9+
plot(t,Signal);
10+
11+
%% Calculate the power and add noise
12+
Signal_Power = sum(abs(Signal).*abs(Signal))/Npts;
13+
Noise_Power = sum(abs(Noise).*abs(Noise))/Npts;
14+
Noise = Noise.*sqrt(Signal_Power/Noise_Power);
15+
Noise_Power = sum(abs(Noise).*abs(Noise))/Npts;
16+
17+
K = (Signal_Power/Noise_Power)*10^((-noiseLevel)/10);
18+
%% Calculate the OLD SNR
19+
Old_SNR = 10*(log10(Signal_Power/Noise_Power));
20+
disp(Old_SNR)
21+
22+
%% Calculate the NEW SNR (verification)
23+
New_Noise = sqrt(K)*Noise;
24+
New_Noise_Power = sum(abs(New_Noise).*abs(New_Noise))/Npts;
25+
New_SNR = 10*(log10(Signal_Power/New_Noise_Power));
26+
disp(New_SNR)
27+
28+
Noisy_Signal = Signal + New_Noise;
29+
[Noisy_Signal_,t]=Normalize(Signal,44100);
30+
31+
%% FFT plots for the verfication
32+
%X=abs(fft(Signal));
33+
% figure(99)
34+
% plot(t,Noisy_Signal);
35+
% figure(99);spectrogram(Signal,2048,2048-256,2048,44100,'yaxis');
36+
%figure(100);spectrogram(Noisy_Signal,2048,2048-256,2048,44100,'yaxis');
37+
%Y=abs(fft(Noisy_Signal));
38+
%figure(7);plot(Y,'b','LineWidth',1.5);hold on;plot(X,'r','LineWidth',0.5);hold off;
39+
% title('Noisy')
40+
41+
%% Return the noisy signal
42+
y_noise=Noisy_Signal;
43+
end
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
'''
2+
Forms 96x216 pickle file if the pickle format of dimension 96xN is supplied
3+
'''
4+
5+
6+
import os
7+
import pickle
8+
import numpy as np
9+
10+
11+
timeSteps=216
12+
value=0
13+
num=0
14+
saveImagefile=r'**Enter the path**'
15+
16+
17+
18+
f = open(r'xx.pckl', 'rb')
19+
imageObj = pickle.load(f)
20+
f.close()
21+
22+
23+
24+
25+
26+
for j in range(int(len(imageObj)/timeSteps)):
27+
num=num+1
28+
image_data = np.array([])
29+
30+
for i in range(216):
31+
32+
image_data=imageObj[value,:]
33+
image_data_expandDims=np.expand_dims(image_data,axis=0)
34+
if i==0:
35+
image_data_=image_data_expandDims
36+
value=value+1
37+
else:
38+
image_data_ = np.concatenate((image_data_, image_data_expandDims), axis=0)
39+
value=value+1
40+
41+
if value % timeSteps==0:
42+
break
43+
44+
print(value)
45+
imageFile = ("imageFile%d" % (num))
46+
f = open((os.path.join(saveImagefile,imageFile + "." + 'pckl')), 'wb')
47+
pickle.dump(image_data_,f)
48+
f.close()

dataset_creation/fmsynth.m

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
function y=fmsynth(freq,dur,amp,Fs,filt)
2+
% y=fmsynth(freq,dur,amp,Fs,type)
3+
%
4+
% Synthesize a single note
5+
%
6+
% Inputs:
7+
% freq - frequency in Hz
8+
% dur - duration in seconds
9+
% amp - Amplitude in range [0,1]
10+
% Fs - sampling frequency in Hz
11+
12+
if nargin<5
13+
error('Five arguments required for synth()');
14+
end
15+
16+
N = floor(dur*Fs);
17+
18+
if N == 0
19+
warning('Note with zero duration.');
20+
y = [];
21+
return;
22+
23+
elseif N < 0
24+
warning('Note with negative duration. Skipping.');
25+
y = [];
26+
return;
27+
end
28+
29+
n=0:N-1;
30+
31+
32+
rn = randn(1);
33+
t = 0:(1/Fs):dur;
34+
envel = interp1([0 dur/2 dur/4 dur/8 dur/16 dur/9 dur/7 dur/3 dur], [0 1 .9 .8 .7 .8 .9 1 0], 0:(1/Fs):dur);
35+
I_env = 5.*envel;%5
36+
s = 0.9;%0.7+randn(1)/5;
37+
%y = s.*sin(2.*pi.*freq.*t);%
38+
y = s*envel.*sin(2.*pi.*freq.*t + I_env.*sin(2.*pi.*freq.*t));% + (amp/10)*sin(2*pi.*(freq+rn).*t) + (amp/10)*sin(2*pi.*(freq-rn).*t);
39+
%y = s*envel.*sin(2.*pi.*freq.*t);
40+
%% Harmonic Parts %%
41+
row = [2,4,8,16,32,64,128];
42+
% row = [2,4,6,8,12,16,20,24,32,48,64,80,96,128,256];
43+
44+
for i = row(1:end)
45+
rn = max(-1,randn(1));
46+
rn = min(1,rn);
47+
hars = 0;
48+
y = y + (1/i)*envel.*(sin(2.*pi.*i*freq.*t + I_env.*sin(2.*pi.*i*freq.*t)));
49+
s = s + (1/i);
50+
end
51+
y = y/s;
52+
53+
% smooth edges w/ 10ms ramp
54+
if (dur > .015)
55+
L = fix((9+randn(1)/10)*dur/10*Fs)+1; % L odd
56+
if rand(1)>0.1
57+
ramp = kaiser(L)'; % odd length
58+
else
59+
ramp = bartlett(L)';
60+
end
61+
% L = ceil(L/2);
62+
y(1:L) = y(1:L) .* ramp(1:L);
63+
y(end-L+1:end) = y(end-L+1:end) .* ramp(end-L+1:end);
64+
end
65+
% figure;plot(y);
66+
% y = filtfilt(filt.b,filt.a,y);
67+
% figure(1);plot(y);
68+
69+
y = filter(filt.b,filt.a,y);
70+
% figure(31);plot(y);
71+
% figure(30);freqz([filt.b,filt.a]);
72+
% disp(1);

dataset_creation/getTempoChanges.m

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
function [tempos,tempos_time]=getTempoChanges(midi)
2+
% [tempos,tempos_time]=getTempoChanges(midi)
3+
%
4+
% input: a midi struct from readmidi.m
5+
% output:
6+
% tempos = tempo values indexed by tempos_time
7+
% tempos_time is in units of ticks
8+
%
9+
% should tempo changes effect across tracks? across channels?
10+
%
11+
12+
% Copyright (c) 2009 Ken Schutte
13+
% more info at: http://www.kenschutte.com/midi
14+
15+
tempos = [];
16+
tempos_time = [];
17+
for i=1:length(midi.track)
18+
cumtime=0;
19+
for j=1:length(midi.track(i).messages)
20+
cumtime = cumtime+midi.track(i).messages(j).deltatime;
21+
% if (strcmp(midi.track(i).messages(j).name,'Set Tempo'))
22+
if (midi.track(i).messages(j).midimeta==0 && midi.track(i).messages(j).type==81)
23+
tempos_time(end+1) = cumtime;
24+
d = midi.track(i).messages(j).data;
25+
tempos(end+1) = d(1)*16^4 + d(2)*16^2 + d(3);
26+
end
27+
end
28+
end
29+
30+
if numel(tempos)==0
31+
tempos = 500000; % default value for midi
32+
tempos_time = 0;
33+
end
34+
35+
36+

dataset_creation/labelsCreation.m

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
close all;
2+
clear all;
3+
minFreq = 8.1757989156;
4+
maxFreq = 1760.0000000000;
5+
Fs = 44100;
6+
7+
saveSpectrogam = 'Enter the path';
8+
saveAudio = 'Enter the path';
9+
saveMidi='Enter the path';
10+
11+
pickMidiformats = 'Enter the path';
12+
filePattern = fullfile(pickMidiformats, '*.mid');
13+
filesList = dir(filePattern);
14+
15+
pickAudioformats = 'Enter the path';
16+
filePattern1 = fullfile(pickAudioformats, '*.wav');
17+
filesList1 = dir(filePattern1);
18+
19+
num = 0;
20+
21+
for k=1:26
22+
%% Convert to time domain samples
23+
midi = readmidi((fullfile(pickMidiformats, filesList(k).name)));
24+
Notes = midiInfo(midi,0);
25+
Notes = ReduceSilence(Notes);
26+
Nnotes = size(Notes,1);
27+
n1 = Notes(:,5);
28+
n2 = Notes(:,6);
29+
N = Notes(:,3);
30+
31+
32+
33+
[y,Fs]=midi2audio(Notes(1:end,:),Fs);
34+
y_write=y;
35+
%% Save the time domain samples
36+
audiowrite(fullfile(saveAudio,['track' num2str(num) '.wav']),y_write,Fs);
37+
38+
39+
%% Read the corresponding audio for duration
40+
aud=audioinfo(fullfile(saveAudio,['track' num2str(num) '.wav']));
41+
42+
43+
%% Find CQT and form Midi labels for chunks of audio
44+
for j = 1:floor(aud.Duration)
45+
num = num+1;
46+
y_=y((j-1)*Fs+1:j*Fs);
47+
[cfs,f,g,fshifts] = cqt(yNoise,'SamplingFrequency',Fs,'FrequencyLimits',[minFreq maxFreq],'Window','hamming','BinsPerOctave',12);
48+
cfss = cfs.c;
49+
acfs = abs(cfss(1:size(cfss,1)/2+1,:));
50+
51+
for i = 1:numel(n1)
52+
t1 = ceil(n1(i)*216)+1;
53+
t2 = ceil(n2(i)*216);
54+
tempvec(1,t1:t2) = N(i);
55+
end
56+
57+
tempvec = tempvec(1,(j-1)*size(acfs,2)+1:j*size(acfs,2));
58+
59+
imwrite(flipud(acfs),fullfile(saveSpectrogam,['image' num2str(num) '.png']))
60+
figure(1);
61+
imagesc(flipud(acfs))
62+
63+
temp = zeros(size(acfs,1),size(acfs,2));
64+
for i = 1:size(acfs,2)
65+
temp(tempvec(i)+1,i) = 1;
66+
end
67+
figure(2);imagesc(flipud(temp))
68+
variable = flipud(temp);
69+
save(fullfile(saveMidi,['midi' num2str(num) '.mat']),'variable');
70+
end
71+
end
72+
73+

0 commit comments

Comments
 (0)