Praveenk8051
diff --git a/‎README.md
Lines changed: 64 additions & 0 deletions b/‎README.md
Lines changed: 64 additions & 0 deletions
diff --git a/‎dataset_creation/Normalize.m
Lines changed: 5 additions & 0 deletions b/‎dataset_creation/Normalize.m
Lines changed: 5 additions & 0 deletions
diff --git a/‎dataset_creation/ReduceSilence.m
Lines changed: 23 additions & 0 deletions b/‎dataset_creation/ReduceSilence.m
Lines changed: 23 additions & 0 deletions
diff --git a/‎dataset_creation/addNoise.m
Lines changed: 43 additions & 0 deletions b/‎dataset_creation/addNoise.m
Lines changed: 43 additions & 0 deletions
diff --git a/‎dataset_creation/createTimesteps_nonoverlapping.py
Lines changed: 48 additions & 0 deletions b/‎dataset_creation/createTimesteps_nonoverlapping.py
Lines changed: 48 additions & 0 deletions
diff --git a/‎dataset_creation/fmsynth.m
Lines changed: 72 additions & 0 deletions b/‎dataset_creation/fmsynth.m
Lines changed: 72 additions & 0 deletions
diff --git a/‎dataset_creation/getTempoChanges.m
Lines changed: 36 additions & 0 deletions b/‎dataset_creation/getTempoChanges.m
Lines changed: 36 additions & 0 deletions
diff --git a/‎dataset_creation/labelsCreation.m
Lines changed: 73 additions & 0 deletions b/‎dataset_creation/labelsCreation.m
Lines changed: 73 additions & 0 deletions
@@ -1,2 +1,66 @@
 # deep-learning-based-pitch-detection
 Deep learning based pitch detection
+
+The folder consists of 5 directories
+
+	dataset_creation:  contains,
+		Files used to convert MIDI data to time domain signals 
+		LSTM Pickle file creation
+		LSTM Non-Overlapping time steps
+		LSTM Overlapping time steps
+		
+	evaluation_matlabFiles: contains,
+		script used to reconstruct the audio files
+		Visualize the files created for real audio
+		Visualize the files created for synthetic audio
+		
+	test_files: contains 2 directories,
+		cnn, contains
+			feed spectrogram as .png file
+			feed spectrogram as .mat file 
+		lstm, contains
+			file to create Pickle file
+			file to create Timesteps 
+			test using traied LSTM network
+	
+	trained_nets: contains,
+		overlapping 
+		non-overlapping
+		cnn network
+		
+	training_files:
+		cnn architecture
+		lstm architecture
+		
+		
+Procedure:
+CNN
+Create datasets Inputs and Labels as .png file
+Provide the path to CNN architecture file
+Train the network
+Test the network by passing the image in Matlab(applying CQT) and pass the saved image through CNN network.
+
+
+LSTM
+Create datasets Inputs and Labels using Matlab. (Matlab)
+Inputs are the spectrogram as .png file and labels are mat files.
+Create a pickle of 96xN by concatenating all spectrograms and mat files.(python)
+Now create the timesteps 96x216 using python.
+Provide the path of timesteps inputs and labels to LSTM architecture
+Train the network
+
+Test the network by passing spectrograms 
+	Create 96xN of spectrograms using pickle
+	Convert this to 96x216 timesteps
+	Pass the file to test LSTM script
+	save the network outputs
+	Visulize the network outputs using Matlab.
+	
+	
+	
+	
+
+
+
+
+
@@ -0,0 +1,5 @@
+function [x,t]=Normalize(x,f)
+x=(x)/(max(abs(x)));       %normalize
+O=length(x);
+t=linspace(0,O/f,O);
+
@@ -0,0 +1,23 @@
+function Notes = ReduceSilence(Notes)
+n1 = Notes(2:end,5);
+n2 = Notes(1:end-1,6);
+i = 1;
+j = 1;
+rows = [];
+while j < length(n1)
+    if n1(j)-n2(j) > 0.0001 && n1(j)-n2(j) < 0.5
+        d = n1(j)-n2(j);
+        n1(j) = n2(j);
+%         n2(j+1:end) = n2(j+1:end)-d;
+%         n1(j+1:end) = n1(j+1:end)-d;
+    elseif n1(j)-n2(j) > 0.5
+        d = n1(j)-n2(j);
+        n1(j) = n2(j)+0.1;
+%         n2(j+1:end) = n2(j+1:end)-d + 0.1;
+%         n1(j+1:end) = n1(j+1:end)-d + 0.1;
+    end
+    j = j+1;
+end
+Notes(2:end,5) = n1;
+Notes(1:end-1,6) = n2;
+end
@@ -0,0 +1,43 @@
+function y_noise = addNoise(Signal, noiseLevel)
+
+%% Create uniform noise along the length of the signal
+Npts = length(Signal);
+Noise = rand(1,Npts);
+
+figure(98)
+[Signal_,t]=Normalize(Signal,44100);
+plot(t,Signal);
+
+%% Calculate the power and add noise 
+Signal_Power = sum(abs(Signal).*abs(Signal))/Npts;
+Noise_Power = sum(abs(Noise).*abs(Noise))/Npts;
+Noise = Noise.*sqrt(Signal_Power/Noise_Power);
+Noise_Power = sum(abs(Noise).*abs(Noise))/Npts;
+
+K = (Signal_Power/Noise_Power)*10^((-noiseLevel)/10);
+%% Calculate the OLD SNR
+Old_SNR = 10*(log10(Signal_Power/Noise_Power));
+disp(Old_SNR)
+
+%% Calculate the NEW SNR (verification)
+New_Noise = sqrt(K)*Noise;
+New_Noise_Power = sum(abs(New_Noise).*abs(New_Noise))/Npts;
+New_SNR = 10*(log10(Signal_Power/New_Noise_Power));
+disp(New_SNR)
+
+Noisy_Signal = Signal + New_Noise;
+[Noisy_Signal_,t]=Normalize(Signal,44100);
+
+%% FFT plots for the verfication
+%X=abs(fft(Signal));
+% figure(99)
+% plot(t,Noisy_Signal);
+% figure(99);spectrogram(Signal,2048,2048-256,2048,44100,'yaxis');
+%figure(100);spectrogram(Noisy_Signal,2048,2048-256,2048,44100,'yaxis');
+%Y=abs(fft(Noisy_Signal));
+%figure(7);plot(Y,'b','LineWidth',1.5);hold on;plot(X,'r','LineWidth',0.5);hold off;
+% title('Noisy')
+
+%% Return the noisy signal 
+y_noise=Noisy_Signal;
+end
@@ -0,0 +1,48 @@
+'''
+Forms 96x216 pickle file if the pickle format of dimension 96xN is supplied 
+'''
+
+
+import os
+import pickle
+import numpy as np
+
+
+timeSteps=216
+value=0
+num=0
+saveImagefile=r'**Enter the path**'
+
+
+
+f = open(r'xx.pckl', 'rb')
+imageObj = pickle.load(f)
+f.close()
+
+
+
+
+
+for j in range(int(len(imageObj)/timeSteps)):
+    num=num+1
+    image_data = np.array([])
+    
+    for i in range(216):
+        
+        image_data=imageObj[value,:]
+        image_data_expandDims=np.expand_dims(image_data,axis=0)
+        if i==0:
+            image_data_=image_data_expandDims
+            value=value+1
+        else:    
+            image_data_ = np.concatenate((image_data_, image_data_expandDims), axis=0)
+            value=value+1
+            
+        if value % timeSteps==0:
+            break
+    
+    print(value)
+    imageFile = ("imageFile%d" % (num))
+    f = open((os.path.join(saveImagefile,imageFile + "." + 'pckl')), 'wb')
+    pickle.dump(image_data_,f)
+    f.close()
@@ -0,0 +1,72 @@
+function y=fmsynth(freq,dur,amp,Fs,filt)
+% y=fmsynth(freq,dur,amp,Fs,type)
+%
+% Synthesize a single note
+%
+% Inputs:
+%  freq - frequency in Hz
+%  dur - duration in seconds
+%  amp - Amplitude in range [0,1]
+%  Fs -  sampling frequency in Hz
+
+if nargin<5
+  error('Five arguments required for synth()');
+end
+
+N = floor(dur*Fs);
+
+if N == 0
+  warning('Note with zero duration.');
+  y = [];
+  return;
+
+elseif N < 0
+  warning('Note with negative duration. Skipping.');
+  y = [];
+  return;
+end
+
+n=0:N-1;
+
+
+rn = randn(1);
+t = 0:(1/Fs):dur;
+envel = interp1([0 dur/2 dur/4 dur/8 dur/16 dur/9 dur/7 dur/3 dur], [0 1 .9 .8 .7 .8 .9 1 0], 0:(1/Fs):dur);
+I_env = 5.*envel;%5
+s = 0.9;%0.7+randn(1)/5;
+%y = s.*sin(2.*pi.*freq.*t);% 
+y = s*envel.*sin(2.*pi.*freq.*t + I_env.*sin(2.*pi.*freq.*t));% + (amp/10)*sin(2*pi.*(freq+rn).*t) + (amp/10)*sin(2*pi.*(freq-rn).*t);
+%y = s*envel.*sin(2.*pi.*freq.*t);
+%% Harmonic Parts %%
+row = [2,4,8,16,32,64,128];
+%     row = [2,4,6,8,12,16,20,24,32,48,64,80,96,128,256];
+
+for i = row(1:end)
+    rn = max(-1,randn(1));
+    rn = min(1,rn);
+    hars = 0;
+    y = y + (1/i)*envel.*(sin(2.*pi.*i*freq.*t + I_env.*sin(2.*pi.*i*freq.*t)));
+    s = s + (1/i);
+end
+y = y/s;
+
+% smooth edges w/ 10ms ramp
+if (dur > .015)
+  L = fix((9+randn(1)/10)*dur/10*Fs)+1;  % L odd
+  if rand(1)>0.1
+      ramp = kaiser(L)';  % odd length
+  else
+      ramp = bartlett(L)';
+  end
+%   L = ceil(L/2);
+  y(1:L) = y(1:L) .* ramp(1:L);
+  y(end-L+1:end) = y(end-L+1:end) .* ramp(end-L+1:end);
+end
+% figure;plot(y);
+% y = filtfilt(filt.b,filt.a,y);
+% figure(1);plot(y);
+
+y = filter(filt.b,filt.a,y);
+% figure(31);plot(y);
+% figure(30);freqz([filt.b,filt.a]);
+% disp(1);
@@ -0,0 +1,36 @@
+function [tempos,tempos_time]=getTempoChanges(midi)
+% [tempos,tempos_time]=getTempoChanges(midi)
+%
+% input: a midi struct from readmidi.m
+% output:
+%  tempos = tempo values indexed by tempos_time
+%    tempos_time is in units of ticks
+%
+% should tempo changes effect across tracks? across channels?
+%
+
+% Copyright (c) 2009 Ken Schutte
+% more info at: http://www.kenschutte.com/midi
+
+tempos = [];
+tempos_time = [];
+for i=1:length(midi.track)
+  cumtime=0;
+  for j=1:length(midi.track(i).messages)
+    cumtime = cumtime+midi.track(i).messages(j).deltatime;
+%    if (strcmp(midi.track(i).messages(j).name,'Set Tempo'))
+    if (midi.track(i).messages(j).midimeta==0 && midi.track(i).messages(j).type==81)
+      tempos_time(end+1) = cumtime;
+      d = midi.track(i).messages(j).data;
+      tempos(end+1) =  d(1)*16^4 + d(2)*16^2 + d(3);
+    end
+  end
+end
+
+if numel(tempos)==0
+    tempos = 500000; % default value for midi
+    tempos_time = 0;
+end
+
+
+
@@ -0,0 +1,73 @@
+close all;
+clear all;
+minFreq = 8.1757989156;
+maxFreq = 1760.0000000000;
+Fs = 44100;
+
+saveSpectrogam = 'Enter the path';
+saveAudio = 'Enter the path';
+saveMidi='Enter the path';
+
+pickMidiformats = 'Enter the path';
+filePattern = fullfile(pickMidiformats, '*.mid');
+filesList = dir(filePattern);
+
+pickAudioformats = 'Enter the path';
+filePattern1 = fullfile(pickAudioformats, '*.wav');
+filesList1 = dir(filePattern1);
+
+num = 0;
+
+for k=1:26
+    %% Convert to time domain samples
+    midi = readmidi((fullfile(pickMidiformats, filesList(k).name)));
+    Notes = midiInfo(midi,0);
+    Notes = ReduceSilence(Notes);
+    Nnotes = size(Notes,1);
+    n1 = Notes(:,5);
+    n2 = Notes(:,6);
+    N = Notes(:,3);
+    
+      
+    
+    [y,Fs]=midi2audio(Notes(1:end,:),Fs);
+    y_write=y;
+    %% Save the time domain samples
+    audiowrite(fullfile(saveAudio,['track' num2str(num) '.wav']),y_write,Fs);
+    
+    
+    %% Read the corresponding audio for duration
+    aud=audioinfo(fullfile(saveAudio,['track' num2str(num) '.wav']));
+    
+    
+    %% Find CQT and form Midi labels for chunks of audio
+    for j = 1:floor(aud.Duration)
+        num = num+1;
+        y_=y((j-1)*Fs+1:j*Fs);
+        [cfs,f,g,fshifts] = cqt(yNoise,'SamplingFrequency',Fs,'FrequencyLimits',[minFreq maxFreq],'Window','hamming','BinsPerOctave',12);
+        cfss = cfs.c;
+        acfs = abs(cfss(1:size(cfss,1)/2+1,:));
+        
+        for i = 1:numel(n1)
+            t1 = ceil(n1(i)*216)+1;
+            t2 = ceil(n2(i)*216);
+            tempvec(1,t1:t2) = N(i);
+        end
+        
+        tempvec = tempvec(1,(j-1)*size(acfs,2)+1:j*size(acfs,2));
+        
+        imwrite(flipud(acfs),fullfile(saveSpectrogam,['image' num2str(num) '.png']))
+        figure(1);
+        imagesc(flipud(acfs))
+        
+        temp = zeros(size(acfs,1),size(acfs,2));
+        for i = 1:size(acfs,2)
+            temp(tempvec(i)+1,i) = 1;
+        end
+        figure(2);imagesc(flipud(temp))
+        variable = flipud(temp);
+        save(fullfile(saveMidi,['midi' num2str(num) '.mat']),'variable');
+    end
+end
+
+