Skip to content

Commit

Permalink
adding parallel cifar10
Browse files Browse the repository at this point in the history
  • Loading branch information
jtchilders committed Mar 16, 2020
1 parent 55733dc commit 1806084
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 24 deletions.
4 changes: 2 additions & 2 deletions cifar10/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
Problem.add_dim('pool_size',[2])
Problem.add_dim('conv2_out_chan',(3,64))
Problem.add_dim('conv2_kern',(3,8))
Problem.add_dim('fc1_out',(64,256))
Problem.add_dim('fc2_out',(32,128))
Problem.add_dim('fc1_out',(64,16384))
Problem.add_dim('fc2_out',(32,16384))
Problem.add_dim('fc3_out',[10])
Problem.add_dim('omp_num_threads',[64])

Expand Down
139 changes: 139 additions & 0 deletions cifar10_parallel/cifar10_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import time
import numpy as np


def run(point):
start = time.time()
try:
batch_size = point['batch_size']
image_size = point['image_size']
conv1_in_chan = point['conv1_in_chan']
conv1_out_chan = point['conv1_out_chan']
conv1_kern = point['conv1_kern']
pool_size = point['pool_size']
conv2_out_chan = point['conv2_out_chan']
conv2_kern = point['conv2_kern']
fc1_out = point['fc1_out']
fc2_out = point['fc2_out']
fc3_out = point['fc3_out']
n_conv_block = point['n_conv_block']

omp_num_threads = point['omp_num_threads']

import os
os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)
os.environ['MKL_NUM_THREADS'] = str(omp_num_threads)
os.environ['KMP_HW_SUBSET'] = '1s,%sc,2t' % str(omp_num_threads)
os.environ['KMP_AFFINITY'] = 'granularity=fine,verbose,compact,1,0'
os.environ['KMP_BLOCKTIME'] = str(0)
#os.environ['MKLDNN_VERBOSE'] = str(1)
import torch

print('torch version: ',torch.__version__,' torch file: ',torch.__file__)

class Net(torch.nn.Module):
def __init__(self, batch_size,
image_size,
conv1_in_chan,conv1_out_chan,conv1_kern,
pool_size,
conv2_out_chan,conv2_kern,
fc1_out,
fc2_out,
fc3_out,
n_conv_block,
):
super(Net, self).__init__()

self.flop = 0
self.n_conv_block = n_conv_block
self.batch_size = batch_size

self.conv1 = torch.nn.Conv2d(conv1_in_chan, conv1_out_chan, conv1_kern)
self.flop += conv1_kern**2 * conv1_in_chan * conv1_out_chan * image_size**2 * batch_size
self.pool = torch.nn.MaxPool2d(pool_size, pool_size)
self.flop += image_size**2 * conv1_out_chan * batch_size
self.conv2 = torch.nn.Conv2d(conv1_out_chan,conv2_out_chan,conv2_kern)
self.flop += conv2_kern**2 * conv1_out_chan * conv2_out_chan * int(image_size/pool_size)**2 * batch_size
self.view_size = conv2_out_chan * conv2_kern * conv2_kern

self.fc1 = torch.nn.Linear(conv2_out_chan * conv2_kern * conv2_kern, fc1_out)
self.flop += (2*self.view_size - 1) * fc1_out * batch_size
self.fc2 = torch.nn.Linear(fc1_out, fc2_out)
self.flop += (2*fc1_out - 1) * fc2_out * batch_size
self.fc3 = torch.nn.Linear(fc2_out, fc3_out)
self.flop += (2*fc2_out - 1) * fc3_out * batch_size

def forward(self, inputs):
block_output = torch.zeros(inputs.shape[0],self.view_size,dtype=torch.float)
for i in range(self.n_conv_block):
batch = inputs[i * self.batch_size:(i + 1) * self.batch_size]

x = self.pool(torch.nn.functional.relu(self.conv1(batch)))
x = self.pool(torch.nn.functional.relu(self.conv2(x)))
x = x.view(-1,self.view_size)
block_output[i * self.batch_size:(i + 1) * self.batch_size] = x

x = torch.nn.functional.relu(self.fc1(block_output))
x = torch.nn.functional.relu(self.fc2(x))
x = self.fc3(x)
return x

inputs = torch.arange(batch_size * n_conv_block * image_size**2 * conv1_in_chan,dtype=torch.float)
inputs = inputs.view((batch_size * n_conv_block,conv1_in_chan,image_size,image_size))
net = Net(batch_size,
image_size,
conv1_in_chan,conv1_out_chan,conv1_kern,
pool_size,
conv2_out_chan,conv2_kern,
fc1_out,
fc2_out,
fc3_out,
n_conv_block)
outputs = net(inputs)

total_flop = net.flop

runs = 5
tot_time = 0.
tt = time.time()
for _ in range(runs):
outputs = net(inputs)
tot_time += time.time() - tt
tt = time.time()

ave_time = tot_time / runs

print('total_flop = ',total_flop,'ave_time = ',ave_time)

ave_flops = total_flop / ave_time
runtime = time.time() - start
print('runtime=',runtime,'ave_flops=',ave_flops)

return ave_flops
except Exception as e:
import traceback
print('received exception: ',str(e),'for point: ',point)
print(traceback.print_exc())
print('runtime=',time.time() - start)
return 0.


if __name__ == '__main__':
point = {
'batch_size': 10,
'image_size': 32,
'conv1_in_chan':3,
'conv1_out_chan':6,
'conv1_kern':5,
'pool_size':2,
'conv2_out_chan':16,
'conv2_kern':5,
'fc1_out':120,
'fc2_out':84,
'fc3_out': 10,
'omp_num_threads':64,
'n_conv_block': 6,
}

print('flops for this setting =',run(point))

23 changes: 23 additions & 0 deletions cifar10_parallel/problem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from deephyper.benchmark import HpProblem

Problem = HpProblem()
Problem.add_dim('batch_size',(1,32))
Problem.add_dim('image_size',[32])
Problem.add_dim('conv1_in_chan',[3])
Problem.add_dim('conv1_out_chan',(3,64))
Problem.add_dim('conv1_kern',(3,8))
Problem.add_dim('pool_size',[2])
Problem.add_dim('conv2_out_chan',(3,64))
Problem.add_dim('conv2_kern',(3,8))
Problem.add_dim('fc1_out',(64,512))
Problem.add_dim('fc2_out',(32,512))
Problem.add_dim('fc3_out',[10])
Problem.add_dim('omp_num_threads',[64])
Problem.add_dim('n_conv_block',(1,10))

Problem.add_starting_point(batch_size=10,image_size=32,conv1_in_chan=3,conv1_out_chan=16,conv1_kern=5,
pool_size=2,conv2_out_chan=16,conv2_kern=5,fc1_out=128,fc2_out=84,
fc3_out=10,omp_num_threads=64,n_conv_block=3)

if __name__ == '__main__':
print(Problem)
67 changes: 46 additions & 21 deletions conv3d/conv3d_run.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,20 @@
import time
import time,psutil,os
import multiprocessing as mp

def print_mem_cpu():
start = time.time()
while True:
mem = psutil.virtual_memory()
print('[%010d] pid=%010d total_mem=%010d free_mem=%05.2f cpu_usage=%05.2f' % (time.time()-start,os.getpid(),mem.total,mem.free/mem.total*100.,psutil.cpu_percent()))
time.sleep(1)



def run(point):
print(point)
start = time.time()
memorymon = mp.Process(target=print_mem_cpu)
memorymon.start()
try:
batch_size = point['batch_size']
image_size = point['image_size']
Expand All @@ -21,48 +34,60 @@ def run(point):

print('torch version: ',torch.__version__,' torch file: ',torch.__file__)


inputs = torch.arange(batch_size * image_size**3 * in_channels,dtype=torch.float).view((batch_size,in_channels,image_size,image_size,image_size))

layer = torch.nn.Conv3d(in_channels,out_channels,kernel_size,stride=1,padding=1)
outputs = layer(inputs)

total_flop = kernel_size**3 * in_channels * out_channels * outputs.shape[-1] * outputs.shape[-2] * outputs.shape[-3] * batch_size

runs = 5
tot_time = 0.
tt = time.time()
for _ in range(runs):
with torch.no_grad():
inputs = torch.arange(batch_size * image_size**3 * in_channels,dtype=torch.float).view((batch_size,in_channels,image_size,image_size,image_size))
print('creating layer')
layer = torch.nn.Conv3d(in_channels,out_channels,kernel_size,stride=1,padding=1)
layer.eval()
print('first execution')
outputs = layer(inputs)
tot_time += time.time() - tt
tt = time.time()

ave_time = tot_time / runs

print('total_flop = ',total_flop,'ave_time = ',ave_time)
total_flop = kernel_size**3 * in_channels * out_channels * outputs.shape[-1] * outputs.shape[-2] * outputs.shape[-3] * batch_size

runs = 25
tot_time = 0.
tt = time.time()
print('loop')
for i in range(runs):
print('step',i)
outputs = layer(inputs)
tot_time += time.time() - tt
tt = time.time()

ave_time = tot_time / runs

ave_flops = total_flop / ave_time
runtime = time.time() - start
print('runtime=',runtime,'ave_flops=',ave_flops)
print('total_flop = ',total_flop,'ave_time = ',ave_time)

ave_flops = total_flop / ave_time
runtime = time.time() - start
print('runtime=',runtime,'ave_flops=',ave_flops)
memorymon.terminate()
memorymon.join()
return ave_flops
except Exception as e:
import traceback
print('received exception: ',str(e),'for point: ',point)
print(traceback.print_exc())
print('runtime=',time.time() - start)
memorymon.terminate()
memorymon.join()

return 0.


if __name__ == '__main__':
point = {
'batch_size': 10,
'image_size': 128,
'image_size': 64,
'in_channels': 3,
'out_channels': 3,
'kernel_size': 4,
'kernel_size': 3,
'omp_num_threads':64,
}

#point = {'batch_size': 4, 'image_size': 88, 'in_channels': 56, 'kernel_size': 10, 'omp_num_threads': 64, 'out_channels': 47}

print('flops for this setting =',run(point))

2 changes: 1 addition & 1 deletion conv3d/problem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from deephyper.benchmark import HpProblem

Problem = HpProblem()
Problem.add_dim('batch_size',(1,32))
Problem.add_dim('batch_size',(1,64))
Problem.add_dim('image_size',(16,128))
Problem.add_dim('in_channels',(2,64))
Problem.add_dim('out_channels',(2,64))
Expand Down

0 comments on commit 1806084

Please sign in to comment.