-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze-data.py
226 lines (187 loc) · 8.03 KB
/
analyze-data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
"""
@file analyze-data.py
@authors James Simmons, Leslie Horace
@brief script to automate data gathering, analysis, and plot creation
@version 1.0
"""
import os
import sys
import subprocess
import seaborn as sns
import matplotlib.pyplot as plt
def parse_cmd(chunk):
tmp_cmd = chunk[0].split(' ')
if "mpirun" in chunk[0]:
proc = tmp_cmd[2]
mat_str = tmp_cmd[5]
size = mat_str[4:-4]
else:
proc = tmp_cmd[-1]
mat_str = tmp_cmd[-4]
size = int(mat_str[4:-4])
p = int(proc)
s = int(size)
return p, s
# parse program output, save in log, return times
def get_times(section, procs, times):
lines = section.split('\n')
proc, size = parse_cmd(lines)
overallTime = float(lines[1].split(' ')[-2])
k = procs.index(proc)
times['overall'][size][k][1] = overallTime
computeTime = float(lines[3].split(' ')[-2])
times['compute'][size][k][1] = computeTime
return times
# runs stencil programs and gathers timing data
def read_data(inFile):
sizes = []
procs = []
sections = inFile.read().split(f"\n{'-'*60}\n")
for sec in sections[0:-2]:
chunk = sec.split('\n')
proc, size = parse_cmd(chunk)
if size not in sizes:
sizes.append(size)
if proc not in procs:
procs.append(proc)
times = {
'overall': {s: [[p, 0.0] for p in procs] for s in sizes},
'compute': {s: [[p, 0.0] for p in procs] for s in sizes}
}
j=0
for s in sizes:
for k in range(len(procs)):
times = get_times(sections[j], procs, times)
j+=1
inFile.close()
return sizes, procs, times
# calculates and returns speedup data set
def calculate_speedup(time_data, procs, size):
speedup_data = {
'overall': {s: [[p, 0.0] for p in procs] for s in size},
'compute': {s: [[p, 0.0] for p in procs] for s in size}
}
for key in speedup_data.keys():
for s in size:
serial_time = time_data[key][s][0][1]
speedup_data[key][s][0][1] = 1.0
for k in range(1, len(procs)):
parallel_time = time_data[key][s][k][1]
speedup_data[key][s][k][1] = (serial_time/parallel_time)
return speedup_data
# calculates and returns efficiency data set
def calculate_efficiency(speedup_data, procs, size):
efficiency_data = {
'overall': {s: [[p, 0.0] for p in procs] for s in size},
'compute': {s: [[p, 0.0] for p in procs] for s in size}
}
eff_max=1.05
for key in efficiency_data.keys():
for s in size:
efficiency_data[key][s][0][1] = 1.0
for k in range(1, len(procs)):
speedup = speedup_data[key][s][k][1]
efficiency_data[key][s][k][1] = (speedup / procs[k])
if efficiency_data[key][s][k][1]>eff_max:
eff_max = efficiency_data[key][s][k][1]
return efficiency_data, eff_max
# calculates and returns karp flatt metric data set
def calculate_karp_flatt(speedup_data, procs, size):
karp_flatt_data = {
'overall': {s: [[p, 0.0] for p in procs] for s in size},
'compute': {s: [[p, 0.0] for p in procs] for s in size}
}
karp_min=0
for key in karp_flatt_data.keys():
for s in size:
for k in range(1, len(procs)):
speedup = speedup_data[key][s][k][1]
e_metric = (((1 / speedup) - (1 / procs[k]))/(1 - (1 / procs[k])))
karp_flatt_data[key][s][k][1] = e_metric
if e_metric<karp_min:
karp_min = e_metric
return karp_flatt_data, karp_min
# generates overall and computation plots for each plot types
def create_plots(plot_data, outlier, sizes, procs, plotPath, plotType, yTitle):
sns.set_style("whitegrid")
# Adjust the figsize and the space between plots
fig, axes = plt.subplots(ncols=2, figsize=(18, 6))
plot_subtype = ["Overall", "Computation"]
compType = "mpi" in plotPath and "OpenMPI" or "Pthreads"
data_set = [
{s: [plot_data['overall'][s][k][1] for k in range(len(procs))] for s in sizes},
{s: [plot_data['compute'][s][k][1] for k in range(len(procs))] for s in sizes}
]
for t, ax in enumerate(axes):
for s in sizes:
sns.lineplot(x=procs, y=data_set[t][s], ax=ax, label=f'{s}x{s}', marker='o')
if plotType == "Time":
outlier = max(data_set[t][s])
if plotType == "Speedup":
ax.set_ylim(1, procs[-1])
sns.lineplot(x=procs, y=procs, ax=ax, label=f'Ideal', linestyle="--")
elif plotType == "Efficiency":
ax.set_ylim(0, (outlier+0.01))
sns.lineplot(x=procs, y=1, ax=ax, label=f'Ideal', linestyle="--")
elif plotType == "e":
ax.set_ylim((outlier-0.01), 1.01)
sns.lineplot(x=procs, y=0, ax=ax, label=f'Ideal', linestyle="--")
else:
ax.set_ylim(0, (outlier+5))
ax.set_ylabel(yTitle)
ax.set_xlabel("mpi" in plotPath and "#p (processes)" or "#t (threads)" )
ax.set_xticks(range(procs[0],procs[-1]+1))
ax.set_xlim(procs[0],procs[-1])
ax.set_title(f"{compType}: {plotType}_{plot_subtype[t]}")
ax.minorticks_on()
ax.grid(True, which='minor', color='gray', linewidth=0.1)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1), ncol=1)
plt.tight_layout()
fname = f'{plotPath}-{plotType}.png'
plt.savefig(fname, dpi=600)
def main():
log_dir = "./logs/"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
script_log_path = f"{log_dir}analyze_data_output.log"
logFile = open(script_log_path, 'w')
data_dir = "./data/"
tmp = sys.argv[1]
prog_prefix = tmp.lower()
if len(sys.argv)==2 and (prog_prefix == 'pthread' or sys.argv[1] == 'mpi'):
data_path = f'{data_dir}{prog_prefix}_stencil_data.txt'
else:
logFile.write(f'Usage: python3 {sys.argv[0]} <mpi | pthread>')
logFile.write("\nNote: files generated from scripts 'generate-matrix.py' and 'gather-data.py' are prerequisites")
exit(1)
plot_dir = "./plots/"
if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
else:
subprocess.run((['rm', '-r', '-f', plot_dir+"*.png"]), stdout=subprocess.PIPE)
plot_path_prefix = f'{plot_dir}{prog_prefix}'
if not os.path.isfile(data_path):
logFile.write(f"Error [open]: '{data_path}' does not exist\nNote: 'generate-matrix.py' => 'gather-data.py' are prerequisites scripts ")
exit(1)
dataFile = open(data_path, 'r')
logFile.write(f"> reading in time data from '{data_path}'...\n")
mat_size, node_count, times = read_data(dataFile)
plot_type = ["Time", "Speedup", "Efficiency", "e"]
y_title = ["Time (sec)","Sp (x's)", "Eff (%)", "e (%)"]
logFile.write(f"\n> calculating {prog_prefix} overall and computation speedup...")
speedup = calculate_speedup(times, node_count, mat_size)
logFile.write(f"\n> calculating {prog_prefix} overall and computation efficiency...")
efficiency, eff_max = calculate_efficiency(speedup, node_count, mat_size)
logFile.write(f"\n> calculating {prog_prefix} overall and computation karp_flatt metric...")
karp_flatt, karp_min = calculate_karp_flatt(speedup, node_count, mat_size)
logFile.write(f"\n> generating {prog_prefix} time plots...")
create_plots(times, 0, mat_size, node_count, plot_path_prefix, plot_type[0], y_title[0])
logFile.write(f"\n> generating {prog_prefix} speedup plots...")
create_plots(speedup, 0, mat_size, node_count, plot_path_prefix , plot_type[1], y_title[1])
logFile.write(f"\n> generating {prog_prefix} efficiency plots...")
create_plots(efficiency, eff_max, mat_size, node_count, plot_path_prefix , plot_type[2], y_title[2])
logFile.write(f"\n> generating {prog_prefix} karp flatt metric plots...")
create_plots(karp_flatt, karp_min, mat_size, node_count, plot_path_prefix, plot_type[3], y_title[3])
logFile.write(f"\n# saved all plots in directory '{plot_dir}'")
logFile.close()
main()