diff --git a/amgx_precon_postprocess-base.py b/amgx_precon_postprocess-base.py new file mode 100644 index 0000000..18ea9a0 --- /dev/null +++ b/amgx_precon_postprocess-base.py @@ -0,0 +1,145 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supportted by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +from sys import stdout as out +import fileinput +import pprint + +##### Read all input files specified on the command line, or stdin and parse +##### the content, storing it as a list of dictionaries - one dictionary for +##### each test run. + +it=fileinput.input() +state=0 +line='' +i=0 +mesh_p=1 +config='unknown' +compiler='unknown' +test='unknown' +num_procs=0 +num_procs_node=0 +lnfmt='%05i' +data={} +runs=[] + +while True: + ## + if state%2==0: + ## + try: + line=it.next() + i=i+1 + except StopIteration: + break + state=state+1 + ## + elif state==1: + ## + state=0 + if 'Reading configuration' in line: + ## + ## This is the beginning of a new file. + ## + # out.write('\n'+lnfmt%i+': %s'%line) + config=line.split()[2] + num_procs=0 + num_procs_node=0 + elif 'Setting up compiler' in line: + # out.write(lnfmt%i+': %s'%line) + compiler=line.split()[3] + elif 'Reading test file: ' in line: + # out.write(lnfmt%i+': %s'%line) + test=line.strip().split('Reading test file: ',1)[-1] + elif 'Running the tests using a total of' in line: + # out.write(lnfmt%i+': %s'%line) + num_procs=int(line.split('a total of ',1)[1].split(None,1)[0]) + elif 'tasks per node' in line: + # out.write(lnfmt%i+': %s'%line) + num_procs_node=int(line.split(' tasks per',1)[0].rsplit(None,1)[1]) + elif line == 'Options used:\n': + ## + ## This is the beginning of a new run. + ## + if 'cg-iteration-dps' in data: + runs.append(data) + # print + # pprint.pprint(data) + data={} + data['file']=fileinput.filename() + data['config']=config + data['compiler']=compiler + data['test']=test + data['num-procs']=num_procs + data['num-procs-node']=num_procs_node + data['mesh-order']=mesh_p + data['code']="MFEM" + test_=test.rsplit('/',1)[-1] + data['case']='scalar' if (('bp1' in test_) or ('bp3' in test_) or + ('bp5' in test_)) else 'vector' + # out.write('\n'+lnfmt%i+': %s'%line) + # out.write('*'*len(lnfmt%i)+': --mesh-p %i\n'%mesh_p) + state=2 + elif 'setup' in line: + data['amg-setup'] = float(line.split()[1]) + #Used when using AmgX as a solver + elif 'solve(per iteration)' in line: + data['cost-of-precon'] = float(line.split()[2]) + elif 'Total iterations' in line: + data['iterations'] = float(line.split()[2]) + elif 'Time per CG step' in line: + data['time-per-cg-step'] = float(line.split()[4]) + elif 'Total CG time' in line: + data['total-cg-time'] = float(line.split()[3]) + elif '"DOFs/sec" in CG' in line: + # out.write(lnfmt%i+': %s'%line) + data['cg-iteration-dps']=1e6*float(line.split(' ')[3]) + elif 'Number of finite element unknowns' in line: + # out.write(lnfmt%i+': %s'%line) + data['num-unknowns']=long(line.rsplit(None,1)[1]) + elif 'Number of qudrature points per element' in line: + # out.write(lnfmt%i+': %s'%line) + data['quadrature-pts']=int(line.split(' = ',1)[1]) + elif 'Total number of elements:' in line: + # out.write(lnfmt%i+': %s'%line) + data['num-elem']=int(line.rsplit(None,1)[1]) + ## + elif state==3: + ## + if line[:5] == ' --': + # out.write(lnfmt%i+': %s'%line) + state=2 + opt=line[5:].strip().split(' ',1) + if opt[0] in ('order'): + data[opt[0]]=int(opt[1]) + elif opt[0] in ('device'): + data['mfem-device']=opt[1] + else: + state=1 + +if 'cg-iteration-dps' in data: + runs.append(data) + # print + # pprint.pprint(data) +for run in runs: + if not 'quadrature-pts' in run: + run['quadrature-pts']=0 +# print +# print +# pprint.pprint(runs) + +print 'Number of test runs read: %i'%len(runs) diff --git a/amgx_precon_postprocess-plot-2.py b/amgx_precon_postprocess-plot-2.py new file mode 100644 index 0000000..d217186 --- /dev/null +++ b/amgx_precon_postprocess-plot-2.py @@ -0,0 +1,220 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +#my_y_data = 'amg-setup' +#my_y_data = 'time-per-cg-step' +my_y_data = 'total-cg-time' + +##### Adjustable plot parameters: +log_y=0 # use log scale on the y-axis? +#x_range=(1e3,6e6) # plot range for the x-axis; comment out for auto +#y_range=(0,2.2e7) # plot range for the y-axis; comment out for auto +#y_range=(0,2.5e6) # plot range for the y-axis; comment out for auto +draw_iter_lines=0 # draw the "iter/s" lines? +ymin_iter_lines=3e5 # minimal y value for the "iter/s" lines +ymax_iter_lines=8e8 # maximal y value for the "iter/s" lines +legend_ncol=(2 if log_y else 1) # number of columns in the legend +write_figures=1 # save the figures to files? +show_figures=0 # display the figures on the screen? + + +##### Load the data +execfile('amgx_precon_postprocess-base.py') + + +##### Sample plot output + +from matplotlib import use +if not show_figures: + use('pdf') +from pylab import * + +rcParams['font.sans-serif'].insert(0,'Noto Sans') +rcParams['font.sans-serif'].insert(1,'Open Sans') +rcParams['figure.figsize']=[10, 8] # default: 8 x 6 + +cm=get_cmap('Set1') # 'Accent', 'Dark2', 'Set1', 'Set2', 'Set3' +if '_segmentdata' in cm.__dict__: + cm_size=len(cm.__dict__['_segmentdata']['red']) +elif 'colors' in cm.__dict__: + cm_size=len(cm.__dict__['colors']) +colors=[cm(1.*i/(cm_size-1)) for i in range(cm_size)] + +# colors=['blue','green','crimson','turquoise','m','gold','cornflowerblue', +# 'darkorange'] + +sel_runs=runs +configs=list(set([run['config'].rsplit('/',1)[-1].rsplit('.sh',1)[0] + for run in sel_runs])) +# print 'Present configurations:', configs +config=configs[0] +print 'Using configuration:', config +config_short=config +sel_runs=[run for run in sel_runs if + run['config'].rsplit('/',1)[-1].rsplit('.sh',1)[0]==config] + +tests=list(set([run['test'].rsplit('/',1)[-1].rsplit('.sh',1)[0] + for run in sel_runs])) +# print 'Present tests:', tests +test=tests[0] +print 'Using test:', test +test_short=test +sel_runs=[run for run in sel_runs if + run['test'].rsplit('/',1)[-1].rsplit('.sh',1)[0]==test] + +vdim=1 +prob=test + +codes = list(set([run['code'] for run in sel_runs])) +code = codes[0] +sel_runs=[run for run in sel_runs if run['code']==code] + +pl_set=[(run['compiler'],run['num-procs'],run['num-procs-node'], + run['mfem-device']) + for run in sel_runs] +pl_set=sorted(set(pl_set)) +print +pprint.pprint(pl_set) + +for plt in pl_set: + compiler=plt[0] + num_procs=plt[1] + num_procs_node=plt[2] + mfem_dev=plt[3] + num_nodes=num_procs/num_procs_node + pl_runs=[run for run in sel_runs + if run['compiler']==compiler and + run['num-procs']==num_procs and + run['num-procs-node']==num_procs_node and + run['mfem-device']==mfem_dev] + pl_runs=sorted(pl_runs) + if len(pl_runs)==0: + continue + + print + print 'compiler: %s, compute nodes: %i, number of MPI tasks = %i'%( + compiler,num_nodes,num_procs) + print 'mfem device: %s'%mfem_dev + + figure() + i=0 + sol_p_set=sorted(set([run['order'] for run in pl_runs])) + for sol_p in sol_p_set: + qpts=sorted(list(set([run['quadrature-pts'] for run in pl_runs + if run['order']==sol_p]))) + qpts.reverse() + print 'Order: %i, quadrature points:'%sol_p, qpts + qpts_1d=[int(q**(1./3)+0.5) for q in qpts] + + d=[[run['order'],run['num-elem'],1.*run['num-unknowns']/num_nodes/vdim, + (run['num-unknowns']/run[my_y_data])/1e6] +# run['cg-iteration-dps']/num_nodes] + for run in pl_runs + if run['order']==sol_p and + run['quadrature-pts']==qpts[0]] + # print + # print 'order = %i'%sol_p + # pprint.pprint(sorted(d)) + d=[[e[2],e[3]] for e in d if e[0]==sol_p] + # (DOFs/[sec/iter]/node)/(DOFs/node) = iter/sec + d=[[nun, + min([e[1] for e in d if e[0]==nun]), + max([e[1] for e in d if e[0]==nun])] + for nun in set([e[0] for e in d])] + d=asarray(sorted(d)) + ## Legend with 'q=...' + # plot(d[:,0],d[:,2],'o-',color=colors[i%cm_size], + # label='p=%i, q=p+%i'%(sol_p,qpts_1d[0]-sol_p)) + ## Legend without 'q=...' + print(d) + plot(d[:,0],d[:,2],'o-',color=colors[i%cm_size], + label='p=%i'%sol_p) + if list(d[:,1]) != list(d[:,2]): + plot(d[:,0],d[:,1],'o-',color=colors[i]) + fill_between(d[:,0],d[:,1],d[:,2],facecolor=colors[i],alpha=0.2) + ## + if len(qpts)==1: + i=i+1 + continue + d=[[run['order'],run['num-elem'],1.*run['num-unknowns']/num_nodes/vdim, + run['cg-iteration-dps']/num_nodes] + for run in pl_runs + if run['order']==sol_p and (run['quadrature-pts']==qpts[1])] + d=[[e[2],e[3]] for e in d if e[0]==sol_p] + if len(d)==0: + i=i+1 + continue + d=[[nun, + min([e[1] for e in d if e[0]==nun]), + max([e[1] for e in d if e[0]==nun])] + for nun in set([e[0] for e in d])] + d=asarray(sorted(d)) + plot(d[:,0],d[:,2],'s--',color=colors[i], + label='p=%i, q=p+%i'%(sol_p,qpts_1d[1]-sol_p)) + if list(d[:,1]) != list(d[:,2]): + plot(d[:,0],d[:,1],'s--',color=colors[i]) + ## + i=i+1 + ## + if draw_iter_lines: + y0,y1=ymin_iter_lines,ymax_iter_lines + y=asarray([y0,y1]) if log_y else exp(linspace(log(y0), log(y1))) + slope1=600. + slope2=6000. + plot(y/slope1,y,'k--',label='%g iter/s'%(slope1/vdim)) + plot(y/slope2,y,'k-',label='%g iter/s'%(slope2/vdim)) + + title('Config: %s/%s, host: %s (%i node%s, %i task%s/node), %s, %s'%( + code,mfem_dev,config_short,num_nodes,'' if num_nodes==1 else 's', + num_procs_node,'' if num_procs_node==1 else 's', compiler, + test)) + xscale('log') # subsx=[2,4,6,8] + if log_y: + yscale('log') + if 'x_range' in vars() and len(x_range)==2: + xlim(x_range) + if 'y_range' in vars() and len(y_range)==2: + ylim(y_range) + # rng=arange(1e7,1.02e8,1e7) + # yticks(rng,['%i'%int(v/1e6) for v in rng]) + # ylim(min(rng),max(rng)) + # xlim(0.5,max([run['order'] for run in pl_runs])+0.5) + grid('on', color='gray', ls='dotted') + grid('on', axis='both', which='minor', color='gray', ls='dotted') + gca().set_axisbelow(True) + xlabel('Points per compute node') + my_y_label = 'DOFS /' + my_y_data + ylabel(my_y_label) + legend(ncol=legend_ncol, loc='best') + + if write_figures: # write .pdf file? + pdf_file='plot2_%s_%s_%s_%s_%s_N%03i_pn%i.pdf'%( + code,mfem_dev,test,config_short,compiler, + num_nodes,num_procs_node) + print 'saving figure --> %s'%pdf_file + savefig(pdf_file, format='pdf', bbox_inches='tight') + + if write_figures: # write .png file? + png_file='plot2_%s_%s_%s_%s_%s_N%03i_pn%i.png'%( + code,mfem_dev,test,config_short,compiler, + num_nodes,num_procs_node) + print 'saving figure --> %s'%png_file + savefig(png_file, format='png', bbox_inches='tight') + +if show_figures: # show the figures? + print '\nshowing figures ...' + show() diff --git a/amgx_precon_postprocess-table.py b/amgx_precon_postprocess-table.py new file mode 100644 index 0000000..59dd8b4 --- /dev/null +++ b/amgx_precon_postprocess-table.py @@ -0,0 +1,45 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supportted by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + + +##### Load the data +execfile('amgx_precon_postprocess-base.py') + +def safe_div(x,y): + if y == 0: + return 0 + return x / y + +##### Sample data output + +set1=sorted( + [(run['mfem-device'], + run['order'],run['compiler'],run['num-procs'], + run['num-unknowns'], safe_div(run['num-unknowns'],run['cost-of-precon'])/1e6 , run['iterations'], + safe_div(run['num-unknowns'],run['time-per-cg-step'])/1e6, safe_div(run['num-unknowns'], + run['total-cg-time'])/1e6, safe_div(run['num-unknowns'],run['amg-setup'])/1e6 + ,run['cg-iteration-dps']/1e6) + for run in runs]) + +out.write('''\ + mfem | | comp | |number of | precon dps | number of | cg-1-it dps | cg-a-it dps | amg-set dps |cg-iter dps + device | p | iler | np |unknowns | millions | iterations | millions | millions | millions |millions +-----------+----+--------+-----+---------+-------------+-------------+-------------+--------------+--------------+------------- +''') +line_fmt=' %9s | %2i | %6s | %3i | %6i | %11.6f | %7i | %11.8f | %11.6f | %11.6f |%11.6f\n' +for run in set1: + out.write(line_fmt%run) diff --git a/amgx_solver_postprocess-base.py b/amgx_solver_postprocess-base.py new file mode 100644 index 0000000..e01095b --- /dev/null +++ b/amgx_solver_postprocess-base.py @@ -0,0 +1,143 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supportted by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +from sys import stdout as out +import fileinput +import pprint + +##### Read all input files specified on the command line, or stdin and parse +##### the content, storing it as a list of dictionaries - one dictionary for +##### each test run. + +it=fileinput.input() +state=0 +line='' +i=0 +mesh_p=1 +config='unknown' +compiler='unknown' +test='unknown' +num_procs=0 +num_procs_node=0 +lnfmt='%05i' +data={} +runs=[] + +while True: + ## + if state%2==0: + ## + try: + line=it.next() + i=i+1 + except StopIteration: + break + state=state+1 + ## + elif state==1: + ## + state=0 + if 'Reading configuration' in line: + ## + ## This is the beginning of a new file. + ## + # out.write('\n'+lnfmt%i+': %s'%line) + config=line.split()[2] + num_procs=0 + num_procs_node=0 + elif 'Setting up compiler' in line: + # out.write(lnfmt%i+': %s'%line) + compiler=line.split()[3] + elif 'Reading test file: ' in line: + # out.write(lnfmt%i+': %s'%line) + test=line.strip().split('Reading test file: ',1)[-1] + elif 'Running the tests using a total of' in line: + # out.write(lnfmt%i+': %s'%line) + num_procs=int(line.split('a total of ',1)[1].split(None,1)[0]) + elif 'tasks per node' in line: + # out.write(lnfmt%i+': %s'%line) + num_procs_node=int(line.split(' tasks per',1)[0].rsplit(None,1)[1]) + elif line == 'Options used:\n': + ## + ## This is the beginning of a new run. + ## + if 'cg-iteration-dps' in data: + runs.append(data) + # print + # pprint.pprint(data) + data={} + data['file']=fileinput.filename() + data['config']=config + data['compiler']=compiler + data['test']=test + data['num-procs']=num_procs + data['num-procs-node']=num_procs_node + data['mesh-order']=mesh_p + data['code']="MFEM" + test_=test.rsplit('/',1)[-1] + data['case']='scalar' if (('bp1' in test_) or ('bp3' in test_) or + ('bp5' in test_)) else 'vector' + # out.write('\n'+lnfmt%i+': %s'%line) + # out.write('*'*len(lnfmt%i)+': --mesh-p %i\n'%mesh_p) + state=2 + elif 'setup' in line: + data['amg-setup'] = float(line.split()[1]) + #Used when using AmgX as a solver + elif 'solve(per iteration)' in line: + data['time-per-cg-step'] = float(line.split()[2]) + elif 'Total iterations' in line: + data['iterations'] = float(line.split()[2]) + elif 'Total CG time' in line: + data['total-cg-time'] = float(line.split()[3]) + elif '"DOFs/sec" in CG' in line: + # out.write(lnfmt%i+': %s'%line) + data['cg-iteration-dps']=1e6*float(line.split(' ')[3]) + elif 'Number of finite element unknowns' in line: + # out.write(lnfmt%i+': %s'%line) + data['num-unknowns']=long(line.rsplit(None,1)[1]) + elif 'Number of qudrature points per element' in line: + # out.write(lnfmt%i+': %s'%line) + data['quadrature-pts']=int(line.split(' = ',1)[1]) + elif 'Total number of elements:' in line: + # out.write(lnfmt%i+': %s'%line) + data['num-elem']=int(line.rsplit(None,1)[1]) + ## + elif state==3: + ## + if line[:5] == ' --': + # out.write(lnfmt%i+': %s'%line) + state=2 + opt=line[5:].strip().split(' ',1) + if opt[0] in ('order'): + data[opt[0]]=int(opt[1]) + elif opt[0] in ('device'): + data['mfem-device']=opt[1] + else: + state=1 + +if 'cg-iteration-dps' in data: + runs.append(data) + # print + # pprint.pprint(data) +for run in runs: + if not 'quadrature-pts' in run: + run['quadrature-pts']=0 +# print +# print +# pprint.pprint(runs) + +print 'Number of test runs read: %i'%len(runs) diff --git a/amgx_solver_postprocess-plot-2.py b/amgx_solver_postprocess-plot-2.py new file mode 100644 index 0000000..3f83c01 --- /dev/null +++ b/amgx_solver_postprocess-plot-2.py @@ -0,0 +1,225 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +#my_y_data = 'amg-setup' +#my_y_data = 'time-per-cg-step' +my_y_data = 'total-cg-time' + +def safe_div(x,y): + if y == 0: + return 0 + return x / y + +##### Adjustable plot parameters: +log_y=0 # use log scale on the y-axis? +#x_range=(1e3,6e6) # plot range for the x-axis; comment out for auto +#y_range=(0,2.2e7) # plot range for the y-axis; comment out for auto +#y_range=(0,2.5e6) # plot range for the y-axis; comment out for auto +draw_iter_lines=0 # draw the "iter/s" lines? +ymin_iter_lines=3e5 # minimal y value for the "iter/s" lines +ymax_iter_lines=8e8 # maximal y value for the "iter/s" lines +legend_ncol=(2 if log_y else 1) # number of columns in the legend +write_figures=1 # save the figures to files? +show_figures=0 # display the figures on the screen? + + +##### Load the data +execfile('amgx_solver_postprocess-base.py') + + +##### Sample plot output + +from matplotlib import use +if not show_figures: + use('pdf') +from pylab import * + +rcParams['font.sans-serif'].insert(0,'Noto Sans') +rcParams['font.sans-serif'].insert(1,'Open Sans') +rcParams['figure.figsize']=[10, 8] # default: 8 x 6 + +cm=get_cmap('Set1') # 'Accent', 'Dark2', 'Set1', 'Set2', 'Set3' +if '_segmentdata' in cm.__dict__: + cm_size=len(cm.__dict__['_segmentdata']['red']) +elif 'colors' in cm.__dict__: + cm_size=len(cm.__dict__['colors']) +colors=[cm(1.*i/(cm_size-1)) for i in range(cm_size)] + +# colors=['blue','green','crimson','turquoise','m','gold','cornflowerblue', +# 'darkorange'] + +sel_runs=runs +configs=list(set([run['config'].rsplit('/',1)[-1].rsplit('.sh',1)[0] + for run in sel_runs])) +# print 'Present configurations:', configs +config=configs[0] +print 'Using configuration:', config +config_short=config +sel_runs=[run for run in sel_runs if + run['config'].rsplit('/',1)[-1].rsplit('.sh',1)[0]==config] + +tests=list(set([run['test'].rsplit('/',1)[-1].rsplit('.sh',1)[0] + for run in sel_runs])) +# print 'Present tests:', tests +test=tests[0] +print 'Using test:', test +test_short=test +sel_runs=[run for run in sel_runs if + run['test'].rsplit('/',1)[-1].rsplit('.sh',1)[0]==test] + +vdim=1 +prob=test + +codes = list(set([run['code'] for run in sel_runs])) +code = codes[0] +sel_runs=[run for run in sel_runs if run['code']==code] + +pl_set=[(run['compiler'],run['num-procs'],run['num-procs-node'], + run['mfem-device']) + for run in sel_runs] +pl_set=sorted(set(pl_set)) +print +pprint.pprint(pl_set) + +for plt in pl_set: + compiler=plt[0] + num_procs=plt[1] + num_procs_node=plt[2] + mfem_dev=plt[3] + num_nodes=num_procs/num_procs_node + pl_runs=[run for run in sel_runs + if run['compiler']==compiler and + run['num-procs']==num_procs and + run['num-procs-node']==num_procs_node and + run['mfem-device']==mfem_dev] + pl_runs=sorted(pl_runs) + if len(pl_runs)==0: + continue + + print + print 'compiler: %s, compute nodes: %i, number of MPI tasks = %i'%( + compiler,num_nodes,num_procs) + print 'mfem device: %s'%mfem_dev + + figure() + i=0 + sol_p_set=sorted(set([run['order'] for run in pl_runs])) + for sol_p in sol_p_set: + qpts=sorted(list(set([run['quadrature-pts'] for run in pl_runs + if run['order']==sol_p]))) + qpts.reverse() + print 'Order: %i, quadrature points:'%sol_p, qpts + qpts_1d=[int(q**(1./3)+0.5) for q in qpts] + + d=[[run['order'],run['num-elem'],1.*run['num-unknowns']/num_nodes/vdim, + safe_div(run['num-unknowns'],run[my_y_data])/1e6] +# run['cg-iteration-dps']/num_nodes] + for run in pl_runs + if run['order']==sol_p and + run['quadrature-pts']==qpts[0]] + # print + # print 'order = %i'%sol_p + # pprint.pprint(sorted(d)) + d=[[e[2],e[3]] for e in d if e[0]==sol_p] + # (DOFs/[sec/iter]/node)/(DOFs/node) = iter/sec + d=[[nun, + min([e[1] for e in d if e[0]==nun]), + max([e[1] for e in d if e[0]==nun])] + for nun in set([e[0] for e in d])] + d=asarray(sorted(d)) + ## Legend with 'q=...' + # plot(d[:,0],d[:,2],'o-',color=colors[i%cm_size], + # label='p=%i, q=p+%i'%(sol_p,qpts_1d[0]-sol_p)) + ## Legend without 'q=...' + print(d) + plot(d[:,0],d[:,2],'o-',color=colors[i%cm_size], + label='p=%i'%sol_p) + if list(d[:,1]) != list(d[:,2]): + plot(d[:,0],d[:,1],'o-',color=colors[i]) + fill_between(d[:,0],d[:,1],d[:,2],facecolor=colors[i],alpha=0.2) + ## + if len(qpts)==1: + i=i+1 + continue + d=[[run['order'],run['num-elem'],1.*run['num-unknowns']/num_nodes/vdim, + run['cg-iteration-dps']/num_nodes] + for run in pl_runs + if run['order']==sol_p and (run['quadrature-pts']==qpts[1])] + d=[[e[2],e[3]] for e in d if e[0]==sol_p] + if len(d)==0: + i=i+1 + continue + d=[[nun, + min([e[1] for e in d if e[0]==nun]), + max([e[1] for e in d if e[0]==nun])] + for nun in set([e[0] for e in d])] + d=asarray(sorted(d)) + plot(d[:,0],d[:,2],'s--',color=colors[i], + label='p=%i, q=p+%i'%(sol_p,qpts_1d[1]-sol_p)) + if list(d[:,1]) != list(d[:,2]): + plot(d[:,0],d[:,1],'s--',color=colors[i]) + ## + i=i+1 + ## + if draw_iter_lines: + y0,y1=ymin_iter_lines,ymax_iter_lines + y=asarray([y0,y1]) if log_y else exp(linspace(log(y0), log(y1))) + slope1=600. + slope2=6000. + plot(y/slope1,y,'k--',label='%g iter/s'%(slope1/vdim)) + plot(y/slope2,y,'k-',label='%g iter/s'%(slope2/vdim)) + + title('Config: %s/%s, host: %s (%i node%s, %i task%s/node), %s, %s'%( + code,mfem_dev,config_short,num_nodes,'' if num_nodes==1 else 's', + num_procs_node,'' if num_procs_node==1 else 's', compiler, + test)) + xscale('log') # subsx=[2,4,6,8] + if log_y: + yscale('log') + if 'x_range' in vars() and len(x_range)==2: + xlim(x_range) + if 'y_range' in vars() and len(y_range)==2: + ylim(y_range) + # rng=arange(1e7,1.02e8,1e7) + # yticks(rng,['%i'%int(v/1e6) for v in rng]) + # ylim(min(rng),max(rng)) + # xlim(0.5,max([run['order'] for run in pl_runs])+0.5) + grid('on', color='gray', ls='dotted') + grid('on', axis='both', which='minor', color='gray', ls='dotted') + gca().set_axisbelow(True) + xlabel('Points per compute node') + my_y_label = 'DOFS /' + my_y_data + ylabel(my_y_label) + legend(ncol=legend_ncol, loc='best') + + if write_figures: # write .pdf file? + pdf_file='plot2_%s_%s_%s_%s_%s_N%03i_pn%i.pdf'%( + code,mfem_dev,test,config_short,compiler, + num_nodes,num_procs_node) + print 'saving figure --> %s'%pdf_file + savefig(pdf_file, format='pdf', bbox_inches='tight') + + if write_figures: # write .png file? + png_file='plot2_%s_%s_%s_%s_%s_N%03i_pn%i.png'%( + code,mfem_dev,test,config_short,compiler, + num_nodes,num_procs_node) + print 'saving figure --> %s'%png_file + savefig(png_file, format='png', bbox_inches='tight') + +if show_figures: # show the figures? + print '\nshowing figures ...' + show() diff --git a/amgx_solver_postprocess-table.py b/amgx_solver_postprocess-table.py new file mode 100644 index 0000000..995bd2a --- /dev/null +++ b/amgx_solver_postprocess-table.py @@ -0,0 +1,44 @@ + +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supportted by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + + +##### Load the data +execfile('amgx_solver_postprocess-base.py') + +def safe_div(x,y): + if y == 0: + return 0 + return x / y + +##### Sample data output + +set1=sorted( + [(run['mfem-device'], + run['order'],run['compiler'],run['num-procs'], + run['num-unknowns'], run['iterations'], + safe_div(run['num-unknowns'],run['time-per-cg-step'])/1e6, safe_div(run['num-unknowns'],run['total-cg-time'])/1e6, safe_div(run['num-unknowns'],run['amg-setup'])/1e6, run['cg-iteration-dps']/1e6) + for run in runs]) + +out.write('''\ + mfem | | comp | |number of | number of | cg-1-it dps | cg-a-it dps | amg-set dps |cg-iter dps + device | p | iler | np |unknowns | iterations | millions | millions | millions | millions +-----------+----+--------+-----+----------+-------------+--------------+--------------+--------------+------------- +''') +line_fmt=' %9s | %2i | %6s | %3i | %6i | %11i | %11.6f| %11.6f |%11.6f | %11.6f \n' +for run in set1: + out.write(line_fmt%run) diff --git a/run_all_amgx.sh b/run_all_amgx.sh new file mode 100644 index 0000000..0703554 --- /dev/null +++ b/run_all_amgx.sh @@ -0,0 +1,5 @@ +#Using Block Jacobi Smoother + ./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_solver.sh num_proc_build=36 >> amgx_solver.txt && + ./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_precon.sh num_proc_build=36 >> amgx_precon.txt && + ./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_agg_precon.sh num_proc_build=36 >> amgx_agg_precon.txt && + ./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_agg_solver.sh num_proc_build=36 >> amgx_agg_solver.txt diff --git a/run_all_amgx_l1.sh b/run_all_amgx_l1.sh new file mode 100644 index 0000000..0786ec2 --- /dev/null +++ b/run_all_amgx_l1.sh @@ -0,0 +1,5 @@ +#Using L1 Jacobi Smoother +./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh num_proc_build=36 >> amgx_solver_l1_jacobi.txt && +./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.sh num_proc_build=36 >> amgx_precon_l1_jacobi.txt && +./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.sh num_proc_build=36 >> amgx_agg_precon_l1_jacobi.txt && +./go.sh -c lassen -m xlc -r tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.sh num_proc_build=36 >> amgx_agg_solver_l1_jacobi.txt diff --git a/tests/mfem_amgx/ex1p_amgx_agg_precon.cpp b/tests/mfem_amgx/ex1p_amgx_agg_precon.cpp new file mode 100644 index 0000000..2b510b1 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_precon.cpp @@ -0,0 +1,323 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{\n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"algorithm\": \"AGGREGATION\", \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"BLOCK_JACOBI\", \n" + " \"relaxation_factor\": 0.7 \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"selector\": \"SIZE_2\", \n" + " \"max_row_sum\" : 0.9, \n" + " \"strength_threshold\" : 0.25, \n" + " \"postsweeps\": 1, \n" + " \"max_iters\": 1, \n" + " \"cycle\": \"V\""; + if (amgx_verbose) + { + amgx_config = amgx_config + ",\n" + " \"obtain_timings\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " }\n" + "}\n"; + + AmgXSolver *prec = new AmgXSolver; + prec->SetConvergenceCheck(false); + prec->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + prec->InitExclusiveGPU(MPI_COMM_WORLD); + + CGSolver cg(MPI_COMM_WORLD); + const int max_cg_iter = 200; + const int cg_print_level = 3; + cg.SetRelTol(1e-12); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + if (prec) { cg.SetPreconditioner(*prec); } + cg.SetOperator(*A); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + cg.SetMaxIter(2); + cg.SetPrintLevel(-1); + cg.Mult(B, Xtmp); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + cg.Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = cg.GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete prec; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_precon.sh b/tests/mfem_amgx/ex1p_amgx_agg_precon.sh new file mode 100644 index 0000000..595d577 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_precon.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_agg_precon" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.cpp b/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.cpp new file mode 100644 index 0000000..9fb62f3 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.cpp @@ -0,0 +1,322 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{\n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"algorithm\": \"AGGREGATION\", \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"JACOBI_L1\" \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"selector\": \"SIZE_2\", \n" + " \"max_row_sum\" : 0.9, \n" + " \"strength_threshold\" : 0.25, \n" + " \"postsweeps\": 1, \n" + " \"max_iters\": 1, \n" + " \"cycle\": \"V\""; + if (amgx_verbose) + { + amgx_config = amgx_config + ",\n" + " \"obtain_timings\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " }\n" + "}\n"; + + AmgXSolver *prec = new AmgXSolver; + prec->SetConvergenceCheck(false); + prec->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + prec->InitExclusiveGPU(MPI_COMM_WORLD); + + CGSolver cg(MPI_COMM_WORLD); + const int max_cg_iter = 200; + const int cg_print_level = 3; + cg.SetRelTol(1e-12); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + if (prec) { cg.SetPreconditioner(*prec); } + cg.SetOperator(*A); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + cg.SetMaxIter(2); + cg.SetPrintLevel(-1); + cg.Mult(B, Xtmp); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + cg.Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = cg.GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete prec; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.sh b/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.sh new file mode 100644 index 0000000..e331554 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_precon_l1_jacobi.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_agg_precon_l1_jacobi" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_solver.cpp b/tests/mfem_amgx/ex1p_amgx_agg_solver.cpp new file mode 100644 index 0000000..8b10142 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_solver.cpp @@ -0,0 +1,323 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{ \n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"preconditioner\": { \n" + " \"algorithm\": \"AGGREGATION\", \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"BLOCK_JACOBI\", \n" + " \"relaxation_factor\": 0.7 \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"max_iters\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"selector\": \"SIZE_2\", \n" + " \"scope\": \"amg\", \n" + " \"max_levels\": 100, \n" + " \"cycle\": \"V\", \n" + " \"postsweeps\": 1 \n" + " }, \n" + " \"solver\": \"PCG\", \n" + " \"max_iters\": 100, \n" + " \"convergence\": \"RELATIVE_INI_CORE\", \n" + " \"scope\": \"main\", \n" + " \"tolerance\": 1e-12, \n" + " \"norm\": \"L2\" "; + if (amgx_verbose) + { + amgx_config = amgx_config + ", \n" + " \"obtain_timings\": 1, \n" + " \"monitor_residual\": 1, \n" + " \"print_grid_stats\": 1, \n" + " \"print_solve_stats\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " } \n" + "} \n"; + + AmgXSolver *amgx = new AmgXSolver; + amgx->SetConvergenceCheck(true); + amgx->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + amgx->InitExclusiveGPU(MPI_COMM_WORLD); + + amgx->SetOperator(*A.As()); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + amgx->Mult(B, Xtmp); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + amgx->Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = amgx->GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete amgx; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_solver.sh b/tests/mfem_amgx/ex1p_amgx_agg_solver.sh new file mode 100644 index 0000000..8a8524c --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_solver.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_agg_solver" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.cpp b/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.cpp new file mode 100644 index 0000000..38cd996 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.cpp @@ -0,0 +1,322 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{ \n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"preconditioner\": { \n" + " \"algorithm\": \"AGGREGATION\", \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"JACOBI_L1\" \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"max_iters\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"selector\": \"SIZE_2\", \n" + " \"scope\": \"amg\", \n" + " \"max_levels\": 100, \n" + " \"cycle\": \"V\", \n" + " \"postsweeps\": 1 \n" + " }, \n" + " \"solver\": \"PCG\", \n" + " \"max_iters\": 100, \n" + " \"convergence\": \"RELATIVE_INI_CORE\", \n" + " \"scope\": \"main\", \n" + " \"tolerance\": 1e-12, \n" + " \"norm\": \"L2\" "; + if (amgx_verbose) + { + amgx_config = amgx_config + ", \n" + " \"obtain_timings\": 1, \n" + " \"monitor_residual\": 1, \n" + " \"print_grid_stats\": 1, \n" + " \"print_solve_stats\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " } \n" + "} \n"; + + AmgXSolver *amgx = new AmgXSolver; + amgx->SetConvergenceCheck(true); + amgx->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + amgx->InitExclusiveGPU(MPI_COMM_WORLD); + + amgx->SetOperator(*A.As()); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + amgx->Mult(B, Xtmp); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + amgx->Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = amgx->GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete amgx; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.sh b/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.sh new file mode 100644 index 0000000..d41dea0 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_agg_solver_l1_jacobi.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_agg_solver_l1_jacobi" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_precon.cpp b/tests/mfem_amgx/ex1p_amgx_precon.cpp index 3c69958..29ef60f 100644 --- a/tests/mfem_amgx/ex1p_amgx_precon.cpp +++ b/tests/mfem_amgx/ex1p_amgx_precon.cpp @@ -154,7 +154,7 @@ int main(int argc, char *argv[]) // 13. Solve the linear system A X = B. // * With full assembly, use the preconditioner from AmgX. - bool amgx_verbose = false; + bool amgx_verbose = true; string amgx_config = "{\n" " \"config_version\": 2, \n" @@ -171,15 +171,11 @@ int main(int argc, char *argv[]) " \"strength_threshold\" : 0.25, \n" " \"postsweeps\": 1, \n" " \"max_iters\": 1, \n" - " \"convergence\": \"ABSOLUTE\", \n" " \"cycle\": \"V\""; if (amgx_verbose) { amgx_config = amgx_config + ",\n" - " \"obtain_timings\": 1, \n" - " \"monitor_residual\": 1, \n" - " \"print_grid_stats\": 1, \n" - " \"print_solve_stats\": 1 \n"; + " \"obtain_timings\": 1 \n"; } else { @@ -188,7 +184,7 @@ int main(int argc, char *argv[]) amgx_config = amgx_config + " }\n" + "}\n"; AmgXSolver *prec = new AmgXSolver; - prec->ConfigureAs(AmgXSolver::PRECONDITIONER); + prec->SetConvergenceCheck(false); prec->ReadParameters(amgx_config, AmgXSolver::INTERNAL); prec->InitExclusiveGPU(MPI_COMM_WORLD); @@ -232,6 +228,8 @@ int main(int argc, char *argv[]) int cg_iter = cg.GetNumIterations(); // Note: In the pcg algorithm, the number of operator Mult() calls is // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; cout << '\n' << "Total CG time: " << rt_max << " (" << rt_min << ") sec." << endl; diff --git a/tests/mfem_amgx/ex1p_amgx_precon.sh b/tests/mfem_amgx/ex1p_amgx_precon.sh index a16279d..f2c6e02 100644 --- a/tests/mfem_amgx/ex1p_amgx_precon.sh +++ b/tests/mfem_amgx/ex1p_amgx_precon.sh @@ -30,7 +30,7 @@ function build_tests() function run_tests() { - local test_name="ex1p_amgx" + local test_name="ex1p_amgx_precon" set_mpi_options # 'min_p' can be set on the command line local l_min_p=${min_p:-1} @@ -74,7 +74,7 @@ function build_and_run_tests() } -mfem_branch=${mfem_branch:-amgx/artv3/configure} +mfem_branch=${mfem_branch:-master} libceed_branch=${libceed_branch:-master} # Uncomment the next line to enable 64-bit HYPRE_Int: diff --git a/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.cpp b/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.cpp new file mode 100644 index 0000000..174373d --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.cpp @@ -0,0 +1,320 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{\n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"JACOBI_L1\" \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"max_row_sum\" : 0.9, \n" + " \"strength_threshold\" : 0.25, \n" + " \"postsweeps\": 1, \n" + " \"max_iters\": 1, \n" + " \"cycle\": \"V\""; + if (amgx_verbose) + { + amgx_config = amgx_config + ",\n" + " \"obtain_timings\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " }\n" + "}\n"; + + AmgXSolver *prec = new AmgXSolver; + prec->SetConvergenceCheck(false); + prec->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + prec->InitExclusiveGPU(MPI_COMM_WORLD); + + CGSolver cg(MPI_COMM_WORLD); + const int max_cg_iter = 200; + const int cg_print_level = 3; + cg.SetRelTol(1e-12); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + if (prec) { cg.SetPreconditioner(*prec); } + cg.SetOperator(*A); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + cg.SetMaxIter(2); + cg.SetPrintLevel(-1); + cg.Mult(B, Xtmp); + cg.SetMaxIter(max_cg_iter); + cg.SetPrintLevel(cg_print_level); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + cg.Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = cg.GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete prec; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.sh b/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.sh new file mode 100644 index 0000000..ffdb5a1 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_precon_l1_jacobi.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_precon_l1_jacobi" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_solver.cpp b/tests/mfem_amgx/ex1p_amgx_solver.cpp index 37e25a9..3caddee 100644 --- a/tests/mfem_amgx/ex1p_amgx_solver.cpp +++ b/tests/mfem_amgx/ex1p_amgx_solver.cpp @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) " \"interpolator\": \"D2\", \n" " \"max_row_sum\" : 0.9, \n" " \"strength_threshold\" : 0.25, \n" - " \"max_iters\": 2, \n" + " \"max_iters\": 1, \n" " \"scope\": \"amg\", \n" " \"max_levels\": 100, \n" " \"cycle\": \"V\", \n" @@ -178,7 +178,7 @@ int main(int argc, char *argv[]) " }, \n" " \"solver\": \"PCG\", \n" " \"max_iters\": 100, \n" - " \"convergence\": \"RELATIVE_MAX\", \n" + " \"convergence\": \"RELATIVE_INI_CORE\", \n" " \"scope\": \"main\", \n" " \"tolerance\": 1e-12, \n" " \"norm\": \"L2\" "; @@ -197,7 +197,7 @@ int main(int argc, char *argv[]) amgx_config = amgx_config + " } \n" + "} \n"; AmgXSolver *amgx = new AmgXSolver; - amgx->ConfigureAs(AmgXSolver::SOLVER); + amgx->SetConvergenceCheck(true); amgx->ReadParameters(amgx_config, AmgXSolver::INTERNAL); amgx->InitExclusiveGPU(MPI_COMM_WORLD); diff --git a/tests/mfem_amgx/ex1p_amgx_solver.sh b/tests/mfem_amgx/ex1p_amgx_solver.sh index 6644dba..67ef6e0 100644 --- a/tests/mfem_amgx/ex1p_amgx_solver.sh +++ b/tests/mfem_amgx/ex1p_amgx_solver.sh @@ -74,7 +74,7 @@ function build_and_run_tests() } -mfem_branch=${mfem_branch:-amgx/artv3/configure} +mfem_branch=${mfem_branch:-master} libceed_branch=${libceed_branch:-master} # Uncomment the next line to enable 64-bit HYPRE_Int: diff --git a/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.cpp b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.cpp new file mode 100644 index 0000000..d28aced --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.cpp @@ -0,0 +1,322 @@ +// MFEM Example 1 - Parallel Version +// +// Compile with: make ex1p +// +// Description: This example code demonstrates the use of MFEM to define a +// simple finite element discretization of the Laplace problem +// -Delta u = 1 with homogeneous Dirichlet boundary conditions. +// Specifically, we discretize using a FE space of the specified +// order, or if order < 1 using an isoparametric/isogeometric +// space (i.e. quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz); + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + int dim = 3; + int level = 0; + int order = 1; + const char *device_config = "cuda"; + + OptionsParser args(argc, argv); + args.AddOption(&dim, "-dim", "--mesh-dimension", + "Solve 2D or 3D problem."); + args.AddOption(&level, "-l", "--refinement-level", + "Set the problem size: 2^level mesh elements per processor."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + int par_ref_levels; + Array nxyz; + Mesh *mesh = make_mesh(myid, num_procs, dim, level, par_ref_levels, nxyz); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 10,000 elements. + { + // Disabled + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. + int *partitioning = nxyz.Size() ? mesh->CartesianPartitioning(nxyz) : NULL; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning); + delete [] partitioning; + delete mesh; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + // pmesh->PrintInfo(); + long global_ne = pmesh->ReduceInt(pmesh->GetNE()); + if (myid == 0) + { + cout << "Total number of elements: " << global_ne << endl; + } + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + MFEM_VERIFY(order > 0, "invalid 'order': " << order); + fec = new H1_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_Int size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + if (pmesh->bdr_attributes.Size()) + { + Array ess_bdr(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (1,phi_i) where phi_i are the basis functions in fespace. + ParLinearForm *b = new ParLinearForm(fespace); + ConstantCoefficient one(1.0); + b->AddDomainIntegrator(new DomainLFIntegrator(one)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x with initial guess of zero, + // which satisfies the boundary conditions. + ParGridFunction x(fespace); + x = 0.0; + + // 11. Set up the parallel bilinear form a(.,.) on the finite element space + // corresponding to the Laplacian operator -Delta, by adding the Diffusion + // domain integrator. + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the linear system A X = B. + // * With full assembly, use the preconditioner from AmgX. + + bool amgx_verbose = true; + + string amgx_config = "{ \n" + " \"config_version\": 2, \n" + " \"solver\": { \n" + " \"preconditioner\": { \n" + " \"solver\": \"AMG\", \n" + " \"smoother\": { \n" + " \"scope\": \"jacobi\", \n" + " \"solver\": \"JACOBI_L1\" \n" + " }, \n" + " \"presweeps\": 1, \n" + " \"interpolator\": \"D2\", \n" + " \"max_row_sum\" : 0.9, \n" + " \"strength_threshold\" : 0.25, \n" + " \"max_iters\": 1, \n" + " \"scope\": \"amg\", \n" + " \"max_levels\": 100, \n" + " \"cycle\": \"V\", \n" + " \"postsweeps\": 1 \n" + " }, \n" + " \"solver\": \"PCG\", \n" + " \"max_iters\": 100, \n" + " \"convergence\": \"RELATIVE_INI_CORE\", \n" + " \"scope\": \"main\", \n" + " \"tolerance\": 1e-12, \n" + " \"norm\": \"L2\" "; + if (amgx_verbose) + { + amgx_config = amgx_config + ", \n" + " \"obtain_timings\": 1, \n" + " \"monitor_residual\": 1, \n" + " \"print_grid_stats\": 1, \n" + " \"print_solve_stats\": 1 \n"; + } + else + { + amgx_config = amgx_config + "\n"; + } + amgx_config = amgx_config + " } \n" + "} \n"; + + AmgXSolver *amgx = new AmgXSolver; + amgx->SetConvergenceCheck(true); + amgx->ReadParameters(amgx_config, AmgXSolver::INTERNAL); + amgx->InitExclusiveGPU(MPI_COMM_WORLD); + + amgx->SetOperator(*A.As()); + + // Warm-up CG solve (in case of JIT to avoid timing it) + { + Vector Xtmp(X); + amgx->Mult(B, Xtmp); + } + + // Sync all ranks + MPI_Barrier(pmesh->GetComm()); + + // Start CG timing. + tic_toc.Clear(); + + // Start & Stop CG timing. + tic_toc.Start(); + amgx->Mult(B, X); + tic_toc.Stop(); + double rt_min, rt_max, my_rt; + my_rt = tic_toc.RealTime(); + MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, pmesh->GetComm()); + MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, pmesh->GetComm()); + + // Print timing results. + if (myid == 0) + { + int cg_iter = amgx->GetNumIterations(); + // Note: In the pcg algorithm, the number of operator Mult() calls is + // N_iter and the number of preconditioner Mult() calls is N_iter+1. + cout << '\n' + << "Total iterations "<< cg_iter; + cout << '\n' + << "Total CG time: " << rt_max << " (" << rt_min << ") sec." + << endl; + cout << "Time per CG step: " + << rt_max / cg_iter << " (" + << rt_min / cg_iter << ") sec." << endl; + cout << "\n\"DOFs/sec\" in CG: " + << 1e-6*size*cg_iter/rt_max << " (" + << 1e-6*size*cg_iter/rt_min << ") million.\n" + << endl; + } + delete amgx; + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + //disabled + } + + // 16. Send the solution by socket to a GLVis server. + //if (visualization) + { + //disabled + } + + // 17. Free the used memory. + delete a; + delete b; + delete fespace; + if (order > 0) { delete fec; } + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +Mesh *make_mesh(int myid, int num_procs, int dim, int level, + int &par_ref_levels, Array &nxyz) +{ + int log_p = (int)floor(log((double)num_procs)/log(2.0) + 0.5); + MFEM_VERIFY((1 << log_p) == num_procs, + "number of processor is not a power of 2: " << num_procs); + MFEM_VERIFY(dim == 3, "dim = " << dim << " is NOT implemented!"); + + // Determine processor decomposition. + int s[3]; + s[0] = log_p/3 + (log_p%3 > 0 ? 1 : 0); + s[1] = log_p/3 + (log_p%3 > 1 ? 1 : 0); + s[2] = log_p/3; + nxyz.SetSize(dim); + nxyz[0] = 1 << s[0]; + nxyz[1] = 1 << s[1]; + nxyz[2] = 1 << s[2]; + + // Determine mesh size. + int ser_level = level%3; + par_ref_levels = level/3; + int log_n = log_p + ser_level; + int t[3]; + t[0] = log_n/3 + (log_n%3 > 0 ? 1 : 0); + t[1] = log_n/3 + (log_n%3 > 1 ? 1 : 0); + t[2] = log_n/3; + + // Create the Mesh. + const bool gen_edges = true; + const bool sfc_ordering = true; + Mesh *mesh = new Mesh(1 << t[0], 1 << t[1], 1 << t[2], + Element::HEXAHEDRON, gen_edges, + 1.0, 1.0, 1.0, sfc_ordering); + if (myid == 0) + { + cout << "Processor partitioning: "; + nxyz.Print(cout, dim); + + // Mesh dimensions AFTER parallel refinement: + cout << "Mesh dimensions: " + << (1 << (t[0]+par_ref_levels)) << ' ' + << (1 << (t[1]+par_ref_levels)) << ' ' + << (1 << (t[2]+par_ref_levels)) << endl; + } + + return mesh; +} diff --git a/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh new file mode 100644 index 0000000..43d31b3 --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_solver_l1_jacobi" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-master} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh~ b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh~ new file mode 100644 index 0000000..9af721f --- /dev/null +++ b/tests/mfem_amgx/ex1p_amgx_solver_l1_jacobi.sh~ @@ -0,0 +1,85 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +# reserved. See files LICENSE and NOTICE for details. +# +# This file is part of CEED, a collection of benchmarks, miniapps, software +# libraries and APIs for efficient high-order finite element and spectral +# element discretizations for exascale applications. For more information and +# source code availability see http://github.com/ceed. +# +# The CEED research is supported by the Exascale Computing Project +# (17-SC-20-SC), a collaborative effort of two U.S. Department of Energy +# organizations (Office of Science and the National Nuclear Security +# Administration) responsible for the planning and preparation of a capable +# exascale ecosystem, including software, applications, hardware, advanced +# system engineering and early testbed platforms, in support of the nation's +# exascale computing imperative. + +problem=${problem:-0} + +function build_tests() +{ + local make_opts=( + "-j" "$num_proc_build" + "MFEM_DIR=$MFEM_DIR" + "BLD=$test_exe_dir/") + quoted_echo make "${make_opts[@]}" + [[ -n "$dry_run" ]] || make "${make_opts[@]}" +} + + +function run_tests() +{ + local test_name="ex1p_amgx_precon_l1_jacobi" + set_mpi_options + # 'min_p' can be set on the command line + local l_min_p=${min_p:-1} + # 'max_p' can be set on the command line + local l_max_p=${max_p:-4} + # 'max_dofs_proc' can be set on the command line + local l_max_dofs_proc=${max_dofs_proc:-4200000} + # 'mfem_devs' can be set on the command line + local l_mfem_devs=(${mfem_devs:-cuda}) + local dim=3 + local args= + for dev in ${l_mfem_devs[@]}; do + for ((p = l_min_p; p <= l_max_p; p++)) do + for ((l = 0; (p**dim)*(2**l) <= l_max_dofs_proc; l++)) do + args=(-o $p -l $l -d $dev) + if [ -z "$dry_run" ]; then + echo "Running test:" + quoted_echo $mpi_run ./$test_name "${args[@]}" + $mpi_run ./$test_name "${args[@]}" || \ + printf "\nError in the test, error code: $?\n\n" + else + $dry_run $mpi_run ./$test_name "${args[@]}" + fi + done + done + done +} + + +function build_and_run_tests() +{ + $dry_run cd "$test_dir" + + build_tests || return 1 + echo + + [[ -n "$build_only" ]] && return + + $dry_run cd "$test_exe_dir" + run_tests +} + + +mfem_branch=${mfem_branch:-amgx/artv3/configure} +libceed_branch=${libceed_branch:-master} + +# Uncomment the next line to enable 64-bit HYPRE_Int: +# hypre_big_int=1 + +packages=${packages:-cuda metis amgx hypre mfem} + +test_required_packages=${packages} diff --git a/tests/mfem_amgx/makefile b/tests/mfem_amgx/makefile index 59d7e82..3ba3262 100644 --- a/tests/mfem_amgx/makefile +++ b/tests/mfem_amgx/makefile @@ -27,6 +27,12 @@ MFEM_LIB_FILE = mfem_is_not_built EX1 = $(BLD)ex1p_amgx_precon EX2 = $(BLD)ex1p_amgx_solver +EX3 = $(BLD)ex1p_amgx_agg_solver +EX4 = $(BLD)ex1p_amgx_agg_precon +EX5 = $(BLD)ex1p_amgx_precon_l1_jacobi +EX6 = $(BLD)ex1p_amgx_solver_l1_jacobi +EX7 = $(BLD)ex1p_amgx_agg_solver_l1_jacobi +EX8 = $(BLD)ex1p_amgx_agg_precon_l1_jacobi ifeq ($(MFEM_USE_MPI),NO) $(error A parallel MFEM build is required.) endif @@ -42,7 +48,7 @@ endif $(BLD)%: $(SRC)%.cpp $(MFEM_LIB_FILE) $(CONFIG_MK) $(MFEM_CXX) $(MFEM_FLAGS) $< -o $@ $(MFEM_LIBS) -all: $(EX1) $(EX2) +all: $(EX1) $(EX2) $(EX3) $(EX4) $(EX5) $(EX6) $(EX7) $(EX8) # Generate an error message if the MFEM library is not built and exit $(MFEM_LIB_FILE):