diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..07ee29c57 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +*.o +*.obj +*.a +*.so +*.out.* +*.err.* +*.err +*.out +*.perf +config.log +config.status +Makefile.config +HYPRE_config.h +autom4te.cache diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 000000000..d138bc42b --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,24 @@ +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: src/docs/usr-manual/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: all + +# Optionally set the version of Python and requirements required to build your docs +python: + version: 3.7 + install: + - requirements: src/docs/usr-manual/requirements.txt diff --git a/AUTOTEST/check-headers.filters b/AUTOTEST/check-headers.filters new file mode 100644 index 000000000..580e2dff6 --- /dev/null +++ b/AUTOTEST/check-headers.filters @@ -0,0 +1,6 @@ +mpicc +mpif77 +include/fortran_matrix.h +include/multivector.h +include/interpreter.h +include/temp_multivector.h diff --git a/AUTOTEST/check-headers.sh b/AUTOTEST/check-headers.sh new file mode 100755 index 000000000..c0db0d669 --- /dev/null +++ b/AUTOTEST/check-headers.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +testname=`basename $0 .sh` + +# Echo usage information +case $1 in + -h|-help) + cat <&2 diff --git a/AUTOTEST/check-license.filters b/AUTOTEST/check-license.filters index 5f155e689..99d2f8ae8 100644 --- a/AUTOTEST/check-license.filters +++ b/AUTOTEST/check-license.filters @@ -2,6 +2,7 @@ ./AUTOTEST/.*.fil ./AUTOTEST/.*.err ./AUTOTEST/.*.log +./AUTOTEST/runtests-.* ./src/test/struct_migrate ./src/test/maxwell_unscaled ./src/test/sstruct_fac @@ -14,4 +15,4 @@ ./src/zerr ./src/config.log ./src/config.status -TVD.v3breakpoints \ No newline at end of file +TVD.v3breakpoints diff --git a/AUTOTEST/check-license.sh b/AUTOTEST/check-license.sh index 12a6fbb6e..d285dfd80 100755 --- a/AUTOTEST/check-license.sh +++ b/AUTOTEST/check-license.sh @@ -67,6 +67,7 @@ cat > check-license.remove < check-license.remove < check-mem.files + +egrep '(^|[^[:alnum:]_]+)malloc[[:space:]]*\(' `cat check-mem.files` >&2 +egrep '(^|[^[:alnum:]_]+)calloc[[:space:]]*\(' `cat check-mem.files` >&2 +egrep '(^|[^[:alnum:]_]+)realloc[[:space:]]*\(' `cat check-mem.files` >&2 +egrep '(^|[^[:alnum:]_]+)free[[:space:]]*\(' `cat check-mem.files` >&2 + +rm -f check-mem.files diff --git a/AUTOTEST/cmake.sh b/AUTOTEST/cmake.sh index 671828ec2..3f6ab0f7a 100755 --- a/AUTOTEST/cmake.sh +++ b/AUTOTEST/cmake.sh @@ -81,7 +81,7 @@ rm -fr hypre # Configure cd $src_dir/cmbuild -cmake $copts .. +eval cmake $copts .. make $mopts install # Make diff --git a/AUTOTEST/examples.sh b/AUTOTEST/examples.sh index 290db8cbc..610766faa 100755 --- a/AUTOTEST/examples.sh +++ b/AUTOTEST/examples.sh @@ -14,9 +14,10 @@ case $1 in $0 [-h] {src_dir} [options] [-rt ] - where: -h|-help prints this usage information and exits - {src_dir} is the hypre source directory - - run (test = default, bigint, maxdim, complex) + where: {src_dir} is the hypre source directory + - run (test = default, bigint, maxdim, complex) + -spack compile and link drivers to spack build + -h|-help prints this usage information and exits This script builds the hypre example codes in {src_dir}/examples and runs the example regression tests in test/TEST_examples. @@ -40,6 +41,9 @@ do shift break ;; + -spack) + shift; spackdir="$1"; shift + ;; -*) tname=`echo $1 | sed 's/-//'` tests="$tests $tname" @@ -61,9 +65,17 @@ mkdir -p $output_dir # Run make in the examples directory cd $src_dir/examples make clean +mopt="" +if [ -n "$spackdir" ]; then + mopt="HYPRE_DIR=$spackdir" +fi for tname in $tests do - make $tname + if [ "$tname" = "gpu" ]; then + make -j -f Makefile_gpu $mopt $tname + else + make $mopt $tname + fi done # Run the examples regression test diff --git a/AUTOTEST/machine-lassen.sh b/AUTOTEST/machine-lassen.sh index 1818a27c1..b9a6877b5 100755 --- a/AUTOTEST/machine-lassen.sh +++ b/AUTOTEST/machine-lassen.sh @@ -11,7 +11,7 @@ case $1 in -h|-help) cat <&2 done - diff --git a/AUTOTEST/machine-ray.sh b/AUTOTEST/machine-ray.sh index c9f2d4de9..139848792 100755 --- a/AUTOTEST/machine-ray.sh +++ b/AUTOTEST/machine-ray.sh @@ -11,7 +11,7 @@ case $1 in -h|-help) cat <&2 done - diff --git a/AUTOTEST/machine-tux-exlibs.sh b/AUTOTEST/machine-tux-exlibs.sh index 2882edae4..30e105d09 100755 --- a/AUTOTEST/machine-tux-exlibs.sh +++ b/AUTOTEST/machine-tux-exlibs.sh @@ -49,7 +49,7 @@ co="--enable-debug --with-mli --with-superlu --with-superlu-include=/home/falgou ./test.sh basic.sh $src_dir -co: $co -mo: $mo ./renametest.sh basic $output_dir/basic-superlu -co="--enable-debug --with-mli --with-superlu --with-superlu-include=/home/falgout2/codes/superlu/SuperLU_5.2.1/SRC --with-dsuperlu --with-dsuperlu-include=/home/falgout2/codes/superlu/SuperLU_DIST_5.2.1/SRC --with-blas-lib=\\'-L/home/falgout2/codes/blas/BLAS-3.7.1 -lblas -lgfortran\\' --with-dsuperlu-lib=\\'-L/home/falgout2/codes/superlu/SuperLU_DIST_5.2.1/lib -lsuperlu_dist -L/home/falgout2/codes/parmetis/parmetis-4.0.3/build/Linux-x86_64/libparmetis -lparmetis -L/home/falgout2/codes/parmetis/parmetis-4.0.3/build/Linux-x86_64/libmetis -lmetis\\'" +co="--enable-debug --with-mli --with-superlu --with-superlu-include=/home/falgout2/codes/superlu/SuperLU_5.2.1/SRC --with-dsuperlu --with-dsuperlu-include=/home/falgout2/codes/superlu/superlu_dist-6.3.1/SRC --with-blas-lib=\\'-L/home/falgout2/codes/blas/BLAS-3.7.1 -lblas -lgfortran\\' --with-dsuperlu-lib=\\'-L/home/falgout2/codes/superlu/superlu_dist-6.3.1/lib -lsuperlu_dist -L/home/falgout2/codes/parmetis/parmetis-4.0.3/build/Linux-x86_64/libparmetis -lparmetis -L/home/falgout2/codes/parmetis/parmetis-4.0.3/build/Linux-x86_64/libmetis -lmetis -lstdc++\\'" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro ./renametest.sh basic $output_dir/basic-dsuperlu diff --git a/AUTOTEST/machine-tux-spack.filters b/AUTOTEST/machine-tux-spack.filters new file mode 100644 index 000000000..68c17b0df --- /dev/null +++ b/AUTOTEST/machine-tux-spack.filters @@ -0,0 +1 @@ +==> Warning: diff --git a/AUTOTEST/machine-tux-spack.sh b/AUTOTEST/machine-tux-spack.sh new file mode 100755 index 000000000..0800559fb --- /dev/null +++ b/AUTOTEST/machine-tux-spack.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +testname=`basename $0 .sh` + +# Echo usage information +case $1 in + -h|-help) + cat <&2 +done diff --git a/AUTOTEST/machine-tux.sh b/AUTOTEST/machine-tux.sh index 030a511c7..578149fad 100755 --- a/AUTOTEST/machine-tux.sh +++ b/AUTOTEST/machine-tux.sh @@ -43,13 +43,17 @@ shift ./test.sh check-license.sh $src_dir/.. mv -f check-license.??? $output_dir -# Check for 'int', 'double', and 'MPI_' +# Check usage of int, double, MPI, memory, headers ./test.sh check-int.sh $src_dir mv -f check-int.??? $output_dir ./test.sh check-double.sh $src_dir mv -f check-double.??? $output_dir ./test.sh check-mpi.sh $src_dir mv -f check-mpi.??? $output_dir +./test.sh check-mem.sh $src_dir +mv -f check-mem.??? $output_dir +./test.sh check-headers.sh $src_dir +mv -f check-headers.??? $output_dir # Basic build and run tests mo="-j test" @@ -84,10 +88,6 @@ co="--with-strict-checking" ./test.sh basic.sh $src_dir -co: $co -mo: $mo ./renametest.sh basic $output_dir/basic--with-strict-checking -co="--with-strict-checking --enable-global-partition" -./test.sh basic.sh $src_dir -co: $co -mo: $mo -./renametest.sh basic $output_dir/basic--with-strict-global - co="--enable-shared" ./test.sh basic.sh $src_dir -co: $co -mo: $mo ./renametest.sh basic $output_dir/basic--enable-shared @@ -108,16 +108,16 @@ grep -v make.err basic.err > basic.tmp mv basic.tmp basic.err ./renametest.sh basic $output_dir/basic--enable-complex -co="--enable-debug --enable-global-partition" -RO="-fac" -./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $RO -eo: $eo -./renametest.sh basic $output_dir/basic-debug2 - co="--with-openmp" RO="-ams -ij -sstruct -struct -lobpcg -rt -D HYPRE_NO_SAVED -nthreads 2" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $RO ./renametest.sh basic $output_dir/basic--with-openmp +co="--with-openmp --enable-hopscotch" +RO="-ij -sstruct -struct -lobpcg -rt -D HYPRE_NO_SAVED -nthreads 2" +./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $RO +./renametest.sh basic $output_dir/basic--with-concurrent-hopscotch + co="--enable-single --enable-debug" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: -single ./renametest.sh basic $output_dir/basic--enable-single @@ -135,16 +135,13 @@ co="--enable-bigint --enable-debug" ./renametest.sh basic $output_dir/basic--enable-bigint co="--enable-mixedint --enable-debug" -./test.sh basic.sh $src_dir -co: $co -mo: $mo +RO="-ams -ij-mixed -sstruct-mixed -struct -lobpcg-mixed" +./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $RO ./renametest.sh basic $output_dir/basic--enable-mixedint co="--enable-debug --with-print-errors" ./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -rt -valgrind -./renametest.sh basic $output_dir/basic--valgrind1 - -co="--enable-debug --enable-global-partition" -./test.sh basic.sh $src_dir -co: $co -mo: $mo -ro: $ro -rt -valgrind -./renametest.sh basic $output_dir/basic--valgrind2 +./renametest.sh basic $output_dir/basic--valgrind # CMake build and run tests mo="-j" @@ -155,10 +152,6 @@ co="" ./test.sh cmake.sh $src_dir -co: $co -mo: $mo ./renametest.sh cmake $output_dir/cmake-default -co="-DHYPRE_NO_GLOBAL_PARTITION=OFF" -./test.sh cmake.sh $src_dir -co: $co -mo: $mo -./renametest.sh cmake $output_dir/cmake-global-partition - co="-DHYPRE_SEQUENTIAL=ON" ./test.sh cmake.sh $src_dir -co: $co -mo: $mo ./renametest.sh cmake $output_dir/cmake-sequential diff --git a/AUTOTEST/make.filters b/AUTOTEST/make.filters index 58713ceeb..2fe2ca2e6 100644 --- a/AUTOTEST/make.filters +++ b/AUTOTEST/make.filters @@ -16,3 +16,4 @@ WARNING in dReadVector: Infinite loop. Program may not stop. icc: command line warning .*: overriding WARNING: no debugging flags detected The device linker only supports static linking. Any device code placed into a shared library by the qmkshrobj option will be inaccessible. +tcmalloc: large alloc diff --git a/AUTOTEST/make.sh b/AUTOTEST/make.sh index 4f708f0ff..6dd7e6604 100755 --- a/AUTOTEST/make.sh +++ b/AUTOTEST/make.sh @@ -13,8 +13,9 @@ case $1 in $0 [-h] {src_dir} [options for make] - where: {src_dir} is the hypre source directory - -h|-help prints this usage information and exits + where: {src_dir} is the hypre source directory + -spack compile and link drivers to spack build + -h|-help prints this usage information and exits This script runs make clean; make [options] in {src_dir}. @@ -29,7 +30,26 @@ esac src_dir=`cd $1; pwd` shift +# Parse the rest of the command line +mopts="" +while [ "$*" ] +do + case $1 in + -spack) + shift; spackdir="$1"; shift + ;; + *) + mopts="$mopts $1"; shift + ;; + esac +done + # Run make cd $src_dir make clean -make $@ +if [ -n "$spackdir" ]; then + cd $src_dir/test + make HYPRE_BUILD_DIR="$spackdir" $mopts +else + make $mopts +fi diff --git a/AUTOTEST/run.sh b/AUTOTEST/run.sh index dbc3ae830..1ac1f7b08 100755 --- a/AUTOTEST/run.sh +++ b/AUTOTEST/run.sh @@ -14,10 +14,10 @@ case $1 in $0 [-h] {src_dir} [options] [-rt ] - where: -h|-help prints this usage information and exits - {src_dir} is the hypre source directory + where: {src_dir} is the hypre source directory - run (test = ams, fac, ij, sstruct, struct) -all run all tests (default behavior) + -h|-help prints this usage information and exits This script runs runtest.sh in {src_dir}/test with optional parameters. @@ -43,8 +43,7 @@ do break ;; -*) - tname=`echo $1 | sed 's/-//'` - tests="$tests $tname" + tests="$tests $1" shift ;; esac @@ -52,10 +51,11 @@ done # If no tests were specified, run all tests if [ "$tests" = "" ]; then - tests="ams fac ij sstruct struct" + tests="-ams -fac -ij -sstruct -struct" fi # Setup +test_dir=`pwd` output_dir=`pwd`/$testname.dir rm -fr $output_dir mkdir -p $output_dir @@ -65,7 +65,8 @@ cd $src_dir/test ./cleantest.sh for tname in $tests do - ./runtest.sh $@ TEST_$tname/*.sh + rtests=`cat $test_dir/runtests$tname` + ./runtest.sh $@ `echo $rtests` done # Collect all error files from the tests diff --git a/AUTOTEST/runtests-ams b/AUTOTEST/runtests-ams new file mode 100644 index 000000000..07532c46b --- /dev/null +++ b/AUTOTEST/runtests-ams @@ -0,0 +1,2 @@ +TEST_ams/*.sh + diff --git a/AUTOTEST/runtests-bench b/AUTOTEST/runtests-bench new file mode 100644 index 000000000..1c72cef65 --- /dev/null +++ b/AUTOTEST/runtests-bench @@ -0,0 +1 @@ +TEST_bench/*.sh diff --git a/AUTOTEST/runtests-fac b/AUTOTEST/runtests-fac new file mode 100644 index 000000000..1aa9c9e88 --- /dev/null +++ b/AUTOTEST/runtests-fac @@ -0,0 +1,2 @@ +TEST_fac/*.sh + diff --git a/AUTOTEST/runtests-ij b/AUTOTEST/runtests-ij new file mode 100644 index 000000000..9205a9a1a --- /dev/null +++ b/AUTOTEST/runtests-ij @@ -0,0 +1,2 @@ +TEST_ij/*.sh + diff --git a/AUTOTEST/runtests-ij-gpu b/AUTOTEST/runtests-ij-gpu new file mode 100644 index 000000000..dba8f3530 --- /dev/null +++ b/AUTOTEST/runtests-ij-gpu @@ -0,0 +1,3 @@ +TEST_ij/agg_interp.sh +TEST_ij/[^a]*.sh + diff --git a/AUTOTEST/runtests-ij-mixed b/AUTOTEST/runtests-ij-mixed new file mode 100644 index 000000000..9b18c392b --- /dev/null +++ b/AUTOTEST/runtests-ij-mixed @@ -0,0 +1,2 @@ +TEST_ij/[^n]*.sh + diff --git a/AUTOTEST/runtests-lobpcg b/AUTOTEST/runtests-lobpcg new file mode 100644 index 000000000..cdcff06a5 --- /dev/null +++ b/AUTOTEST/runtests-lobpcg @@ -0,0 +1,2 @@ +TEST_lobpcg/*.sh + diff --git a/AUTOTEST/runtests-lobpcg-mixed b/AUTOTEST/runtests-lobpcg-mixed new file mode 100644 index 000000000..2903fff55 --- /dev/null +++ b/AUTOTEST/runtests-lobpcg-mixed @@ -0,0 +1,2 @@ +TEST_lobpcg/[^n]*.sh + diff --git a/AUTOTEST/runtests-longdouble b/AUTOTEST/runtests-longdouble new file mode 100644 index 000000000..28abe5786 --- /dev/null +++ b/AUTOTEST/runtests-longdouble @@ -0,0 +1,2 @@ +TEST_longdouble/*.sh + diff --git a/AUTOTEST/runtests-single b/AUTOTEST/runtests-single new file mode 100644 index 000000000..eef86b081 --- /dev/null +++ b/AUTOTEST/runtests-single @@ -0,0 +1,2 @@ +TEST_single/*.sh + diff --git a/AUTOTEST/runtests-sstruct b/AUTOTEST/runtests-sstruct new file mode 100644 index 000000000..d2f82f00a --- /dev/null +++ b/AUTOTEST/runtests-sstruct @@ -0,0 +1,2 @@ +TEST_sstruct/*.sh + diff --git a/AUTOTEST/runtests-sstruct-mixed b/AUTOTEST/runtests-sstruct-mixed new file mode 100644 index 000000000..95554d1a1 --- /dev/null +++ b/AUTOTEST/runtests-sstruct-mixed @@ -0,0 +1,4 @@ +TEST_sstruct/[^n]*.sh +TEST_sstruct/neighborpart.sh +TEST_sstruct/neumann.sh + diff --git a/AUTOTEST/runtests-struct b/AUTOTEST/runtests-struct new file mode 100644 index 000000000..0d28db497 --- /dev/null +++ b/AUTOTEST/runtests-struct @@ -0,0 +1,2 @@ +TEST_struct/[^b]*.sh + diff --git a/AUTOTEST/runtests-superlu b/AUTOTEST/runtests-superlu new file mode 100644 index 000000000..94ec74bf9 --- /dev/null +++ b/AUTOTEST/runtests-superlu @@ -0,0 +1,2 @@ +TEST_superlu/*.sh + diff --git a/CHANGELOG b/CHANGELOG index 4633dc373..736e29275 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,59 @@ # This file chronicles user-level changes, beginning with the most recent. # ============================================================================= +Version 2.21.0 released 2021/05/25 + +- Changed GPU defaults and changed default CUDA arch to 70. +- Added new interfaces: HYPRE_SetSpGemmUseCusparse(1) to select cuSPARSE SpGeMM + or hypre's MM (if 0); HYPRE_SetUseGpuRand(1) to use cuRand for PMIS or CPU RNG + (if 0); HYPRE_SetMemoryLocation to select default memory location; + HYPRE_SetExecutionPolicy to select default execution policy. +- Changed configure option: --enable-cub --> --enable-device-memory-pool; + --enable-nvtx --> --enable-gpu-profiling +- Added new configure options: --with-cuda-home=DIR; --with-gpu-arch=ARG +- Added GPU build options to CMake. +- Added a new HYPRE_RELEASE_NUMBER macro. +- Improved support for ParCSR matrices with many zero rows. +- Improved BoomerAMG setup efficiency when there are many off-diagonal coefficients. +- Fixed a bug in the strength matrix generation for nodal AMG. +- Removed unified virtual memory (UVM) requirement from GPU implementation. +- Removed --enable-global-partition option. +- Added UMPIRE support for memory pooling on GPUs. +- Added HIP/AMD support. +- New CUDA based triangular smoothers. +- New parallel ILU solver features and GPU support. +- Added CUDA 11 support. +- Various bug fixes. + +#==================================== + +Version 2.20.0 released 2020/09/24 + +- New matrix-based interpolation routines for AMG (CPU and GPU) +- Added GPU support for aggressive coarsening in AMG +- New AMG-DD solver +- Improved distributed sparse matrix-matrix and triple-matrix product + performance on GPUs +- IJMatrix/Vector assembly on GPUs (with pointers to GPU memory) +- Updated caliper usage +- Separated C and C++ headers +- Various bug fixes. + +#==================================== + +Version 2.19.0 released 2020/05/26 + +- Updated to support superlu-dist version to 6.3.1 +- Moved reference manual API to online documentation +- New AMG features to keep specified F-points and/or C-points. +- Added GPU support for AMG setup and several interpolation approaches. +- New parallel ILU solvers and smoothers. +- New MGR features. +- Added several interpolation routines based on matrix-matrix interpolations. +- Various bug fixes. + +#==================================== + Version 2.18.2 released 2019/10/28 - Fixed mixedint bugs. diff --git a/README.md b/README.md index 203b30455..54bab09ec 100644 --- a/README.md +++ b/README.md @@ -12,14 +12,15 @@ SPDX-License-Identifier: (Apache-2.0 OR MIT) preconditioners and solvers featuring multigrid methods for the solution of large, sparse linear systems of equations on massively parallel computers. -Documentation can be found [here](https://hypre.readthedocs.io/en/latest/). +For documentation, see our [readthedocs page](https://hypre.readthedocs.io/en/latest/). To install HYPRE, please see either the documentation or the file [INSTALL.md](./INSTALL.md). An overview of the HYPRE release history can be found in the file [CHANGELOG](./CHANGELOG). We appreciate feedback from users. Please submit comments, suggestions, and -issue reports to hypre-support@llnl.gov. +report issues on our [issues page](https://github.com/hypre-space/hypre/issues). +See also [SUPPORT.md](./SUPPORT.md). License diff --git a/SUPPORT.md b/SUPPORT.md new file mode 100644 index 000000000..3d52f59d5 --- /dev/null +++ b/SUPPORT.md @@ -0,0 +1,12 @@ + + +HYPRE Support Information +========================= + +For any questions or issues concerning hypre, submit an issue at +https://github.com/hypre-space/hypre/issues. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0979577..5687d2c74 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,37 +3,52 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -cmake_minimum_required (VERSION 2.8.8) -if (POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif (POLICY CMP0074) +cmake_minimum_required(VERSION 3.13...3.16) -project (hypre) +if (${CMAKE_VERSION} VERSION_LESS 3.16) + cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}) +else () + cmake_policy(VERSION 3.16) +endif () # The version number. -set (HYPRE_VERSION 2.18.2) -set (HYPRE_DATE 2019/10/28) -set (HYPRE_TIME 00:00:00) -set (HYPRE_BUGS hypre-support@llnl.gov) -set (HYPRE_SRCDIR "${PROJECT_SOURCE_DIR}") - -if (${hypre_SOURCE_DIR} STREQUAL ${hypre_BINARY_DIR}) +set(HYPRE_VERSION 2.21.0) +set(HYPRE_NUMBER 22100) +set(HYPRE_DATE 2021/05/25) +set(HYPRE_TIME 00:00:00) +set(HYPRE_BUGS https://github.com/hypre-space/hypre/issues) +set(HYPRE_SRCDIR "${PROJECT_SOURCE_DIR}") + +set(PROJECT_NAME HYPRE) +project(${PROJECT_NAME} + VERSION ${HYPRE_VERSION} + LANGUAGES C) + +if (${HYPRE_SOURCE_DIR} STREQUAL ${HYPRE_BINARY_DIR}) message(FATAL_ERROR "In-place build not allowed! Please use a separate build directory. See the Users Manual or INSTALL file for details.") endif () -# Set cmake module path -set(CMAKE_MODULE_PATH ${hypre_SOURCE_DIR}/config/cmake) -include(hypre_CMakeUtilities) +# Set cmake module path +set(CMAKE_MODULE_PATH "${HYPRE_SOURCE_DIR}/config/cmake" "${CMAKE_MODULE_PATH}") +include(HYPRE_CMakeUtilities) # Set default installation directory, but provide a means for users to change -set (HYPRE_INSTALL_PREFIX "${PROJECT_SOURCE_DIR}/hypre" CACHE PATH - "Installation directory for HYPRE") -set (CMAKE_INSTALL_PREFIX "${HYPRE_INSTALL_PREFIX}" CACHE INTERNAL "" FORCE) +set(HYPRE_INSTALL_PREFIX "${PROJECT_SOURCE_DIR}/hypre" CACHE PATH + "Installation directory for HYPRE") +if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + set(CMAKE_INSTALL_PREFIX "${HYPRE_INSTALL_PREFIX}" CACHE INTERNAL "" FORCE) +endif () # Set default compile optimization flag -set (HYPRE_BUILD_TYPE "Release" CACHE STRING - "Optimization flags: set to Debug, Release, RelWithDebInfo, or MinSizeRel") -set (CMAKE_BUILD_TYPE "${HYPRE_BUILD_TYPE}" CACHE INTERNAL "" FORCE) +set(HYPRE_BUILD_TYPE "Release" CACHE STRING + "Optimization flags: set to Debug, Release, RelWithDebInfo, or MinSizeRel") + +if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE "${HYPRE_BUILD_TYPE}" CACHE INTERNAL "" FORCE) + # Set the possible values of build type for cmake-gui + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS + "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif () # Configuration options option(HYPRE_ENABLE_SHARED "Build a shared library" OFF) @@ -45,238 +60,352 @@ option(HYPRE_ENABLE_COMPLEX "Use complex values" OFF) option(HYPRE_ENABLE_HYPRE_BLAS "Use internal BLAS library" ON) option(HYPRE_ENABLE_HYPRE_LAPACK "Use internal LAPACK library" ON) option(HYPRE_ENABLE_PERSISTENT_COMM "Use persistent communication" OFF) -option(HYPRE_ENABLE_GLOBAL_PARTITION "Use global partitioning" OFF) option(HYPRE_ENABLE_FEI "Use FEI" OFF) # TODO: Add this cmake feature option(HYPRE_WITH_MPI "Compile with MPI" ON) option(HYPRE_WITH_OPENMP "Use OpenMP" OFF) -option(HYPRE_WITH_HOPSCOTCH "Use hopscotch hashing with OpenMP" OFF) +option(HYPRE_ENABLE_HOPSCOTCH "Use hopscotch hashing with OpenMP" OFF) option(HYPRE_WITH_DSUPERLU "Use TPL SuperLU_Dist" OFF) option(HYPRE_WITH_CALIPER "Use Caliper" OFF) # TODO: Finish this cmake feature option(HYPRE_PRINT_ERRORS "Print HYPRE errors" OFF) option(HYPRE_TIMING "Use HYPRE timing routines" OFF) option(HYPRE_BUILD_EXAMPLES "Build examples" OFF) option(HYPRE_BUILD_TESTS "Build tests" OFF) +option(HYPRE_USING_HOST_MEMORY "Use host memory" ON) +set(HYPRE_WITH_EXTRA_CFLAGS "" CACHE STRING "Define extra C compile flags") +set(HYPRE_WITH_EXTRA_CXXFLAGS "" CACHE STRING "Define extra CXX compile flags") +# CUDA options +option(HYPRE_WITH_CUDA "Use CUDA. Require cuda-8.0 or higher" OFF) +option(HYPRE_ENABLE_UNIFIED_MEMORY "Use unified memory for allocating the memory" OFF) +option(HYPRE_ENABLE_CUDA_STREAMS "Use CUDA streams" ON) +option(HYPRE_ENABLE_CUSPARSE "Use cuSPARSE" ON) +option(HYPRE_ENABLE_DEVICE_POOL "Use device memory pool" OFF) +option(HYPRE_ENABLE_CUBLAS "Use cuBLAS" OFF) +option(HYPRE_ENABLE_CURAND "Use cuRAND" ON) +option(HYPRE_ENABLE_GPU_PROFILING "Use NVTX on CUDA" OFF) +set(HYPRE_CUDA_SM "70" CACHE STRING "Target CUDA architecture.") option(TPL_DSUPERLU_LIBRARIES "List of absolute paths to SuperLU_Dist link libraries [].") option(TPL_DSUPERLU_INCLUDE_DIRS "List of absolute paths to SuperLU_Dist include directories [].") +option(TPL_BLAS_LIBRARIES "Optional list of absolute paths to BLAS libraries, otherwise use FindBLAS to locate [].") +option(TPL_LAPACK_LIBRARIES "Optional list of absolute paths to LAPACK libraries, otherwise use FindLAPACK to locate [].") # Set config name values if (HYPRE_ENABLE_SHARED) set(HYPRE_SHARED ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_BIGINT) set(HYPRE_BIGINT ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_MIXEDINT) set(HYPRE_MIXEDINT ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_SINGLE) set(HYPRE_SINGLE ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_LONG_DOUBLE) set(HYPRE_LONG_DOUBLE ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_COMPLEX) set(HYPRE_COMPLEX ON CACHE BOOL "" FORCE) -endif() +endif () + +if (CMAKE_BUILD_TYPE STREQUAL "Debug") + set(HYPRE_DEBUG ON CACHE BOOL "" FORCE) +endif () if (HYPRE_ENABLE_HYPRE_BLAS) set(HYPRE_USING_HYPRE_BLAS ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_HYPRE_LAPACK) set(HYPRE_USING_HYPRE_LAPACK ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_ENABLE_PERSISTENT_COMM) set(HYPRE_USING_PERSISTENT_COMM ON CACHE BOOL "" FORCE) -endif() - -if (HYPRE_ENABLE_GLOBAL_PARTITION) - set(HYPRE_NO_GLOBAL_PARTITION OFF CACHE BOOL "" FORCE) -elseif() - set(HYPRE_NO_GLOBAL_PARTITION ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_WITH_MPI) + set(HYPRE_HAVE_MPI ON CACHE BOOL "" FORCE) set(HYPRE_SEQUENTIAL OFF CACHE BOOL "" FORCE) -elseif() +else () set(HYPRE_SEQUENTIAL ON CACHE BOOL "" FORCE) -endif() +endif () if (HYPRE_WITH_OPENMP) - set (HYPRE_USING_OPENMP ON CACHE BOOL "" FORCE) -endif() + set(HYPRE_USING_OPENMP ON CACHE BOOL "" FORCE) +endif () -if (HYPRE_WITH_HOPSCOTCH) - set (HYPRE_HOPSCOTCH ON CACHE BOOL "" FORCE) -endif() +if (HYPRE_ENABLE_HOPSCOTCH) + set(HYPRE_HOPSCOTCH ON CACHE BOOL "" FORCE) +endif () if (HYPRE_WITH_DSUPERLU) - set (HYPRE_USING_DSUPERLU ON CACHE BOOL "" FORCE) - set (HYPRE_USING_HYPRE_BLAS OFF CACHE BOOL "" FORCE) - set (HYPRE_USING_HYPRE_LAPACK OFF CACHE BOOL "" FORCE) -endif() + set(HYPRE_USING_DSUPERLU ON CACHE BOOL "" FORCE) + set(HYPRE_USING_HYPRE_BLAS OFF CACHE BOOL "" FORCE) + set(HYPRE_USING_HYPRE_LAPACK OFF CACHE BOOL "" FORCE) +endif () if (HYPRE_ENABLE_FEI) - set (HYPRE_USING_FEI ON CACHE BOOL "" FORCE) + set(HYPRE_USING_FEI ON CACHE BOOL "" FORCE) message(WARNING "CMake support for FEI is not complete!") -endif() +endif () if (HYPRE_WITH_CALIPER) - set (HYPRE_USING_CALIPER ON CACHE BOOL "" FORCE) -endif() + set(HYPRE_USING_CALIPER ON CACHE BOOL "" FORCE) +endif () if (HYPRE_SHARED OR HYPRE_BIGINT OR HYPRE_SINGLE OR HYPRE_LONG_DOUBLE) # FEI doesn't currently compile with shared - set (HYPRE_USING_FEI OFF CACHE BOOL "" FORCE) + set(HYPRE_USING_FEI OFF CACHE BOOL "" FORCE) endif () if (HYPRE_SEQUENTIAL) - set (HYPRE_NO_GLOBAL_PARTITION OFF CACHE BOOL "" FORCE) - set (HYPRE_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + set(HYPRE_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) endif () +# CUDA +if (HYPRE_WITH_CUDA) + enable_language(CXX) + message(STATUS "Enabled support for CXX.") + + # Enforce C++11 + if (NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 11) + set(CMAKE_CXX_STANDARD 11) + endif () + set(CMAKE_CXX_STANDARD_REQUIRED ON) + + message(STATUS "Using CXX standard: c++${CMAKE_CXX_STANDARD}") + + # Use ${CMAKE_CXX_COMPILER} as the cuda host compiler. + if (NOT CMAKE_CUDA_HOST_COMPILER) + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + endif () + + # Add any extra CXX compiler flags HYPRE_WITH_EXTRA_CXXFLAGS + if (NOT HYPRE_WITH_EXTRA_CXXFLAGS STREQUAL "") + string(REPLACE " " ";" HYPRE_WITH_EXTRA_CXXFLAGS "${HYPRE_WITH_EXTRA_CXXFLAGS}") + add_compile_options("$<$:${HYPRE_WITH_EXTRA_CXXFLAGS}>") + endif () + + # Check if CUDA is available, then enable it + include(CheckLanguage) + check_language(CUDA) + if (CMAKE_CUDA_COMPILER) + + enable_language(CUDA) + message(STATUS "Enabled support for CUDA.") + + if (NOT CMAKE_CUDA_STANDARD OR CMAKE_CUDA_STANDARD EQUAL 98) + set(CMAKE_CUDA_STANDARD 11) + endif () + + set(CMAKE_CUDA_STANDARD_REQUIRED ON CACHE BOOL "" FORCE) + + set(HYPRE_USING_CUDA ON CACHE BOOL "" FORCE) + set(HYPRE_USING_GPU ON CACHE BOOL "" FORCE) + + if (HYPRE_ENABLE_UNIFIED_MEMORY) + set(HYPRE_USING_UNIFIED_MEMORY ON CACHE BOOL "" FORCE) + else () + set(HYPRE_USING_DEVICE_MEMORY ON CACHE BOOL "" FORCE) + endif () + + # Check if examples are enabled, but not unified memory + if (HYPRE_BUILD_EXAMPLES AND NOT HYPRE_ENABLE_UNIFIED_MEMORY) + message(WARNING "Running the examples on GPUs requires Unified Memory! + Examples will not be built!") + set(HYPRE_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) + endif () + + if (CMAKE_VERSION VERSION_LESS 3.18.0) + add_compile_options("$<$:-arch=sm_${HYPRE_CUDA_SM}>") + else () + set(CMAKE_CUDA_ARCHITECTURES "${HYPRE_CUDA_SM}") + endif () + message(STATUS "Using CUDA architecture: ${HYPRE_CUDA_SM}") + + add_compile_options("$<$:-expt-extended-lambda>") + + set(HYPRE_USING_HOST_MEMORY OFF CACHE BOOL "" FORCE) + + if (HYPRE_ENABLE_CUDA_STREAMS) + set(HYPRE_USING_CUDA_STREAMS ON CACHE BOOL "" FORCE) + endif (HYPRE_ENABLE_CUDA_STREAMS) + + if (HYPRE_ENABLE_DEVICE_POOL) + set(HYPRE_USING_DEVICE_POOL ON CACHE BOOL "" FORCE) + endif (HYPRE_ENABLE_DEVICE_POOL) + + # TODO Eventually should require cmake>=3.17 + # and use cmake's FindCUDAToolkit. Now collect + # CUDA optional libraries. + include(HYPRE_SetupCUDAToolkit) + else () + message(WARNING "No CUDA support!") + set(HYPRE_USING_HOST_MEMORY ON CACHE BOOL "" FORCE) + endif (CMAKE_CUDA_COMPILER) +endif (HYPRE_WITH_CUDA) + +# Add any extra C compiler flags HYPRE_WITH_EXTRA_CFLAGS +if (NOT HYPRE_WITH_EXTRA_CFLAGS STREQUAL "") + string(REPLACE " " ";" HYPRE_WITH_EXTRA_CFLAGS "${HYPRE_WITH_EXTRA_CFLAGS}") + add_compile_options("$<$:${HYPRE_WITH_EXTRA_CFLAGS}>") +endif () + +# Create the HYPRE library object +add_library(${PROJECT_NAME}) + # Headers and sources -set (HYPRE_HEADERS "") -set (HYPRE_SOURCES "") +set(HYPRE_HEADERS "") # Headers and sources: . -list (APPEND HYPRE_HEADERS - "${PROJECT_BINARY_DIR}/HYPRE_config.h" +set(HYPRE_MAIN_HEADERS + ${CMAKE_CURRENT_BINARY_DIR}/HYPRE_config.h HYPREf.h HYPRE.h -) - -# This is a list of TPLs that are used by all targets -set(TPL_LIBRARIES "") + ) -# This is a list of linker flags to be used with TPLs for all targets -set(TPL_LINKER_FLAGS "") +set(HYPRE_HEADERS ${HYPRE_HEADERS} ${HYPRE_MAIN_HEADERS}) # Headers and sources: blas if (HYPRE_USING_HYPRE_BLAS) - add_subdirectory(blas) -else() - # Find system blas - find_package(BLAS REQUIRED) - list(APPEND TPL_LIBRARIES ${BLAS_LIBRARIES}) - set(CMAKE_C_FLAGS "-DUSE_VENDOR_BLAS ${CMAKE_C_FLAGS}") -endif() + add_subdirectory(blas) +else () + # Use TPL_BLAS_LIBRARIES if set. + if (TPL_BLAS_LIBRARIES) + message(STATUS "Using TPL_BLAS_LIBRARIES='${TPL_BLAS_LIBRARIES}'") + target_link_libraries(${PROJECT_NAME} PUBLIC "${TPL_BLAS_LIBRARIES}") + else () + # Find system blas + find_package(BLAS REQUIRED) + target_link_libraries(${PROJECT_NAME} PUBLIC "${BLAS_LIBRARIES}") + endif () + target_compile_definitions(${PROJECT_NAME} PUBLIC "USE_VENDOR_BLAS") +endif () # Headers and sources: lapack if (HYPRE_USING_HYPRE_LAPACK) - add_subdirectory(lapack) -else() - # Find system lapack - find_package(LAPACK REQUIRED) - list(APPEND TPL_LIBRARIES ${LAPACK_LIBRARIES}) -endif() - -# Find DSUPERLU, if requested + add_subdirectory(lapack) +else () + # Use TPL_LAPACK_LIBRARIES if set. + if (TPL_LAPACK_LIBRARIES) + message(STATUS "Using TPL_LAPACK_LIBRARIES='${TPL_LAPACK_LIBRARIES}'") + target_link_libraries(${PROJECT_NAME} PUBLIC "${TPL_LAPACK_LIBRARIES}") + else () + # Find system lapack + find_package(LAPACK REQUIRED) + target_link_libraries(${PROJECT_NAME} PUBLIC "${LAPACK_LIBRARIES}") + endif () +endif () + +# Find DSUPERLU, if requested if (HYPRE_USING_DSUPERLU) if (NOT TPL_DSUPERLU_LIBRARIES) message(FATAL_ERROR "TPL_DSUPERLU_LIBRARIES option should be set for SuperLU_Dist support.") - endif() + endif () if (NOT TPL_DSUPERLU_INCLUDE_DIRS) message(FATAL_ERROR "TPL_DSUPERLU_INCLUDE_DIRS option be set for SuperLU_Dist support.") - endif() - foreach(dir ${TPL_DSUPERLU_INCLUDE_DIRS}) + endif () + + foreach (dir ${TPL_DSUPERLU_INCLUDE_DIRS}) if (NOT EXISTS ${dir}) message(FATAL_ERROR "SuperLU_Dist include directory not found: ${dir}") - endif() + endif () set(CMAKE_C_FLAGS "-I${dir} ${CMAKE_C_FLAGS}") - endforeach() - message("-- Enabled support for using DSUPERLU.") + endforeach () + message(STATUS "Enabled support for using DSUPERLU.") set(DSUPERLU_FOUND TRUE) - list(APPEND TPL_LIBRARIES ${TPL_DSUPERLU_LIBRARIES} stdc++) -endif() + target_link_libraries(${PROJECT_NAME} PUBLIC ${TPL_DSUPERLU_LIBRARIES} stdc++) + target_include_directories(${PROJECT_NAME} PUBLIC ${TPL_DSUPERLU_INCLUDE_DIRS}) +endif (HYPRE_USING_DSUPERLU) -if (TPL_DSUPERLU_INCLUDE_DIRS) - include_directories(${TPL_DSUPERLU_INCLUDE_DIRS}) +if (DSUPERLU_FOUND) + set(HYPRE_USING_DSUPERLU TRUE) endif () -if(DSUPERLU_FOUND) - set(HYPRE_USING_DSUPERLU TRUE) -endif() +if (HYPRE_USING_CUDA) + target_link_libraries(${PROJECT_NAME} PUBLIC "${EXPORT_INTERFACE_CUDA_LIBS}") + if (HYPRE_HAVE_MPI) + target_include_directories(${PROJECT_NAME} PUBLIC + ${MPI_CXX_INCLUDE_DIRS}) + endif () +endif () +# Configure a header file to pass CMake settings to the source code +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/config/HYPRE_config.h.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/HYPRE_config.h" + ) # Headers and sources: remaining subdirectories set(HYPRE_DIRS utilities multivector krylov seq_mv parcsr_mv parcsr_block_mv distributed_matrix IJ_mv matrix_matrix distributed_ls parcsr_ls struct_mv struct_ls sstruct_mv sstruct_ls) -foreach(DIR IN LISTS HYPRE_DIRS) +foreach (DIR IN LISTS HYPRE_DIRS) add_subdirectory(${DIR}) -endforeach() + target_include_directories(${PROJECT_NAME} PUBLIC + $) +endforeach () -# Configure a header file to pass CMake settings to the source code -configure_file ( - "${PROJECT_SOURCE_DIR}/config/HYPRE_config.h.cmake.in" - "${PROJECT_BINARY_DIR}/HYPRE_config.h" +# BINARY must be first in order to get the correct HYPRE_config.h file +target_include_directories(${PROJECT_NAME} PUBLIC + $ + $ + $ + $ + $ ) -# BINARY must be first in order to get the correct HYPRE_config.h file -include_directories(${hypre_BINARY_DIR}) -include_directories(${hypre_SOURCE_DIR}) -include_directories(blas) -include_directories(lapack) -# Add remaining subdirectories -foreach(DIR IN LISTS HYPRE_DIRS) - include_directories(${DIR}) -endforeach() -include_directories(distributed_ls/Euclid) -include_directories(distributed_ls/ParaSails) +if (HYPRE_USING_CUDA) + set_source_files_properties(${HYPRE_CUDA_SOURCES} PROPERTIES LANGUAGE CUDA) +endif () # Set library build type if (HYPRE_SHARED) - set (BUILD_SHARED_LIBS ON CACHE INTERNAL "" FORCE) + set(BUILD_SHARED_LIBS ON CACHE INTERNAL "" FORCE) else () - set (BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) + set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) endif () # Set MPI compile flags if (NOT HYPRE_SEQUENTIAL) - find_package (MPI) - if ((MPI_C_FOUND) AND (NOT CMAKE_C_COMPILER STREQUAL MPI_C_COMPILER)) - include_directories (${MPI_C_INCLUDE_PATH}) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MPI_C_COMPILE_FLAGS}") - list(APPEND TPL_LIBRARIES ${MPI_C_LIBRARIES}) - endif () + find_program(MPIEXEC_EXECUTABLE NAMES mpiexec mpirun) + find_package(MPI REQUIRED) + target_link_libraries(${PROJECT_NAME} PUBLIC MPI::MPI_C) endif (NOT HYPRE_SEQUENTIAL) # Set OpenMP compile flags if (HYPRE_USING_OPENMP) - find_package (OpenMP) - if (OPENMP_FOUND) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - endif (OPENMP_FOUND) + find_package(OpenMP REQUIRED) + target_link_libraries(${PROJECT_NAME} PUBLIC OpenMP::OpenMP_C) endif (HYPRE_USING_OPENMP) if (MSVC) - add_definitions(-D_CRT_SECURE_NO_WARNINGS) + target_compile_definitions(${PROJECT_NAME} PRIVATE _CRT_SECURE_NO_WARNINGS) # Use the C++ compiler to compile these files to get around lack of C99 support - set_source_files_properties (utilities/hypre_hopscotch_hash.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (utilities/hypre_merge_sort.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (seq_mv/csr_matop.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_mv/par_csr_matop.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_mv/par_csr_matvec.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/ams.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/aux_interp.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_add_cycle.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_amg_setup.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_coarsen.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_cgc_coarsen.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_jacobi_interp.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_mgr_setup.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_rap.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_relax.c PROPERTIES COMPILE_FLAGS /TP) - set_source_files_properties (parcsr_ls/par_strength.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(utilities/hypre_hopscotch_hash.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(utilities/hypre_merge_sort.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(seq_mv/csr_matop.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_mv/par_csr_matop.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_mv/par_csr_matvec.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/ams.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/aux_interp.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_add_cycle.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_amg_setup.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_coarsen.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_cgc_coarsen.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_jacobi_interp.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_mgr_setup.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_rap.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_relax.c PROPERTIES COMPILE_FLAGS /TP) + set_source_files_properties(parcsr_ls/par_strength.c PROPERTIES COMPILE_FLAGS /TP) #Fix issue with visual studio 2013 - set_source_files_properties (struct_ls/pfmg3_setup_rap.c PROPERTIES COMPILE_FLAGS /Od) + set_source_files_properties(struct_ls/pfmg3_setup_rap.c PROPERTIES COMPILE_FLAGS /Od) endif () if (HYPRE_USING_FEI) @@ -286,20 +415,48 @@ endif () # Build the examples directory, if requested if (HYPRE_BUILD_EXAMPLES) add_subdirectory(examples) -endif() +endif () # Build the test directory, if requested if (HYPRE_BUILD_TESTS) add_subdirectory(test) -endif() +endif () -# Cleanup the TPL list -list(REMOVE_DUPLICATES TPL_LIBRARIES) +include(GNUInstallDirs) +install(TARGETS ${PROJECT_NAME} + EXPORT HYPRETargets + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + INCLUDES DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +install(FILES ${HYPRE_HEADERS} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + HYPREConfigVersion.cmake + VERSION ${PACKAGE_VERSION} + COMPATIBILITY SameMajorVersion + ) -add_library (HYPRE ${HYPRE_SOURCES} ${HYPRE_HEADERS} ${FEI_LIBS}) +install(EXPORT HYPRETargets + FILE HYPRETargets.cmake + NAMESPACE HYPRE:: + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/HYPRE" + ) -target_link_libraries(HYPRE PUBLIC ${TPL_LIBRARIES}) +configure_package_config_file( + config/HYPREConfig.cmake.in HYPREConfig.cmake + INSTALL_DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/HYPREConfig.cmake" + ) +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/HYPREConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/HYPREConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/HYPRE" + ) -install (TARGETS HYPRE DESTINATION lib) -install (FILES ${HYPRE_HEADERS} DESTINATION include) +export(EXPORT HYPRETargets + FILE "${CMAKE_CURRENT_BINARY_DIR}/HYPRETargets.cmake" + NAMESPACE HYPRE:: + ) +export(PACKAGE ${PROJECT_NAME}) diff --git a/src/FEI_mv/fei-hypre/FEI_HYPRE_Impl.cxx b/src/FEI_mv/fei-hypre/FEI_HYPRE_Impl.cxx index b8684a15f..0984e0e44 100644 --- a/src/FEI_mv/fei-hypre/FEI_HYPRE_Impl.cxx +++ b/src/FEI_mv/fei-hypre/FEI_HYPRE_Impl.cxx @@ -13,11 +13,10 @@ #include #include #include -#include #include /*------------------------------------------------------------------------- - MPI definitions + MPI definitions -------------------------------------------------------------------------*/ #include "FEI_HYPRE_include.h" @@ -48,13 +47,13 @@ extern "C" /************************************************************************** ************************************************************************** - Each element block contains a number of elements of the same type (e.g. + Each element block contains a number of elements of the same type (e.g. hex or tet element). For this implementation, all element block should - have the same number of degree of freedom per node. + have the same number of degree of freedom per node. **************************************************************************/ /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ FEI_HYPRE_Elem_Block::FEI_HYPRE_Elem_Block( int blockID ) { @@ -75,7 +74,7 @@ FEI_HYPRE_Elem_Block::FEI_HYPRE_Elem_Block( int blockID ) } /************************************************************************** - destructor + destructor -------------------------------------------------------------------------*/ FEI_HYPRE_Elem_Block::~FEI_HYPRE_Elem_Block() { @@ -84,29 +83,29 @@ FEI_HYPRE_Elem_Block::~FEI_HYPRE_Elem_Block() if ( elemIDs_ != NULL ) delete [] elemIDs_; if ( elemNodeLists_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( elemNodeLists_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( elemNodeLists_[iE] != NULL ) delete [] elemNodeLists_[iE]; delete [] elemNodeLists_; } if ( elemMatrices_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( elemMatrices_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( elemMatrices_[iE] != NULL ) delete [] elemMatrices_[iE]; delete [] elemMatrices_; } if ( rhsVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( rhsVectors_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( rhsVectors_[iE] != NULL ) delete [] rhsVectors_[iE]; delete [] rhsVectors_; } if ( solnVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( solnVectors_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( solnVectors_[iE] != NULL ) delete [] solnVectors_[iE]; delete [] solnVectors_; } @@ -117,7 +116,7 @@ FEI_HYPRE_Elem_Block::~FEI_HYPRE_Elem_Block() } /************************************************************************** - initialization + initialization -------------------------------------------------------------------------*/ int FEI_HYPRE_Elem_Block::initialize(int numElements, int numNodesPerElement, int dofPerNode) @@ -127,29 +126,29 @@ int FEI_HYPRE_Elem_Block::initialize(int numElements, int numNodesPerElement, if ( elemIDs_ != NULL ) delete [] elemIDs_; if ( elemNodeLists_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( elemNodeLists_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( elemNodeLists_[iE] != NULL ) delete [] elemNodeLists_[iE]; delete [] elemNodeLists_; } if ( elemMatrices_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( elemMatrices_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( elemMatrices_[iE] != NULL ) delete [] elemMatrices_[iE]; delete [] elemMatrices_; } if ( rhsVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( rhsVectors_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( rhsVectors_[iE] != NULL ) delete [] rhsVectors_[iE]; delete [] rhsVectors_; } if ( solnVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - if ( solnVectors_[iE] != NULL ) + for ( iE = 0; iE < numElems_; iE++ ) + if ( solnVectors_[iE] != NULL ) delete [] solnVectors_[iE]; delete [] solnVectors_; } @@ -170,33 +169,33 @@ int FEI_HYPRE_Elem_Block::initialize(int numElements, int numNodesPerElement, } /************************************************************************** - reset the system for reloading (no reinitialization needed) + reset the system for reloading (no reinitialization needed) -------------------------------------------------------------------------*/ int FEI_HYPRE_Elem_Block::reset() { if ( elemNodeLists_ != NULL ) { - for ( int iE = 0; iE < numElems_; iE++ ) + for ( int iE = 0; iE < numElems_; iE++ ) { - if ( elemNodeLists_[iE] != NULL ) + if ( elemNodeLists_[iE] != NULL ) delete [] elemNodeLists_[iE]; elemNodeLists_[iE] = NULL; } } if ( elemMatrices_ != NULL ) { - for ( int iE = 0; iE < numElems_; iE++ ) + for ( int iE = 0; iE < numElems_; iE++ ) { - if ( elemMatrices_[iE] != NULL ) + if ( elemMatrices_[iE] != NULL ) delete [] elemMatrices_[iE]; elemMatrices_[iE] = NULL; } } if ( rhsVectors_ != NULL ) { - for ( int iE = 0; iE < numElems_; iE++ ) + for ( int iE = 0; iE < numElems_; iE++ ) { - if ( rhsVectors_[iE] != NULL ) + if ( rhsVectors_[iE] != NULL ) delete [] rhsVectors_[iE]; rhsVectors_[iE] = NULL; } @@ -214,8 +213,8 @@ int FEI_HYPRE_Elem_Block::resetRHSVectors(double s) if ( rhsVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - for ( iD = 0; iD < matDim; iD++ ) rhsVectors_[iE][iD] = s; + for ( iE = 0; iE < numElems_; iE++ ) + for ( iD = 0; iD < matDim; iD++ ) rhsVectors_[iE][iD] = s; } currElem_ = 0; return 0; @@ -230,17 +229,17 @@ int FEI_HYPRE_Elem_Block::resetSolnVectors(double s) if ( solnVectors_ != NULL ) { - for ( iE = 0; iE < numElems_; iE++ ) - for ( iD = 0; iD < matDim; iD++ ) solnVectors_[iE][iD] = s; + for ( iE = 0; iE < numElems_; iE++ ) + for ( iD = 0; iD < matDim; iD++ ) solnVectors_[iE][iD] = s; } currElem_ = 0; return 0; } /************************************************************************** - load individual element information + load individual element information -------------------------------------------------------------------------*/ -int FEI_HYPRE_Elem_Block::loadElemInfo(int elemID, int *elemConn, +int FEI_HYPRE_Elem_Block::loadElemInfo(int elemID, int *elemConn, double **elemStiff, double *elemLoad) { if ( currElem_ >= numElems_ ) @@ -249,7 +248,7 @@ int FEI_HYPRE_Elem_Block::loadElemInfo(int elemID, int *elemConn, exit(1); } #if 0 - printf("Loading element %d : ", elemID); + printf("Loading element %d : ", elemID); for ( int iN2 = 0; iN2 < nodesPerElem_; iN2++ ) printf("%d ", elemConn[iN2]); printf("\n"); @@ -276,9 +275,9 @@ int FEI_HYPRE_Elem_Block::loadElemInfo(int elemID, int *elemConn, } /************************************************************************** - load individual element matrix only + load individual element matrix only -------------------------------------------------------------------------*/ -int FEI_HYPRE_Elem_Block::loadElemMatrix(int elemID, int *elemConn, +int FEI_HYPRE_Elem_Block::loadElemMatrix(int elemID, int *elemConn, double **elemStiff) { if ( currElem_ >= numElems_ ) @@ -287,7 +286,7 @@ int FEI_HYPRE_Elem_Block::loadElemMatrix(int elemID, int *elemConn, exit(1); } #if 0 - printf("Loading element %d : ", elemID); + printf("Loading element %d : ", elemID); for ( int iN = 0; iN < nodesPerElem_; iN++ ) printf("%d ", elemConn[iN]); printf("\n"); @@ -311,7 +310,7 @@ int FEI_HYPRE_Elem_Block::loadElemMatrix(int elemID, int *elemConn, } /************************************************************************** - load individual load information + load individual load information -------------------------------------------------------------------------*/ int FEI_HYPRE_Elem_Block::loadElemRHS(int elemID, double *elemLoad) { @@ -330,7 +329,7 @@ int FEI_HYPRE_Elem_Block::loadElemRHS(int elemID, double *elemLoad) } currElem_ = HYPRE_LSI_Search(sortedIDs_, elemID, numElems_); } - if ( rhsVectors_ == NULL ) + if ( rhsVectors_ == NULL ) { rhsVectors_ = new double*[numElems_]; for ( iE = 0; iE < numElems_; iE++ ) rhsVectors_[iE] = NULL; @@ -360,12 +359,12 @@ int FEI_HYPRE_Elem_Block::checkLoadComplete() } /************************************************************************** - FEI_HYPRE_Impl is the core linear system interface. Each + FEI_HYPRE_Impl is the core linear system interface. Each instantiation supports multiple elememt blocks. **************************************************************************/ /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ FEI_HYPRE_Impl::FEI_HYPRE_Impl( MPI_Comm comm ) { @@ -454,7 +453,7 @@ FEI_HYPRE_Impl::FEI_HYPRE_Impl( MPI_Comm comm ) } /************************************************************************** - destructor + destructor -------------------------------------------------------------------------*/ FEI_HYPRE_Impl::~FEI_HYPRE_Impl() { @@ -466,14 +465,14 @@ FEI_HYPRE_Impl::~FEI_HYPRE_Impl() if ( globalNodeOffsets_ != NULL ) delete [] globalNodeOffsets_; if ( recvLengs_ != NULL ) delete [] recvLengs_; if ( recvProcs_ != NULL ) delete [] recvProcs_; - if ( recvProcIndices_ != NULL ) + if ( recvProcIndices_ != NULL ) { for (int iP = 0; iP < nRecvs_; iP++) delete [] recvProcIndices_[iP]; delete [] recvProcIndices_; } if ( sendLengs_ != NULL ) delete [] sendLengs_; if ( sendProcs_ != NULL ) delete [] sendProcs_; - if ( sendProcIndices_ != NULL ) + if ( sendProcIndices_ != NULL ) { for (int iP = 0; iP < nSends_; iP++) delete [] sendProcIndices_[iP]; delete [] sendProcIndices_; @@ -488,17 +487,17 @@ FEI_HYPRE_Impl::~FEI_HYPRE_Impl() if ( solnVector_ != NULL ) delete [] solnVector_; if ( rhsVector_ != NULL ) delete [] rhsVector_; if ( BCNodeIDs_ != NULL ) delete [] BCNodeIDs_; - if ( BCNodeAlpha_ != NULL ) + if ( BCNodeAlpha_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeAlpha_[iD]; delete [] BCNodeAlpha_; } - if ( BCNodeBeta_ != NULL ) + if ( BCNodeBeta_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeBeta_[iD]; delete [] BCNodeBeta_; } - if ( BCNodeGamma_ != NULL ) + if ( BCNodeGamma_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeGamma_[iD]; delete [] BCNodeGamma_; @@ -562,7 +561,7 @@ int FEI_HYPRE_Impl::parameters(int numParams, char **paramString) else if ( ! strcmp(param, "cgs") ) solverID_ = 2; else if ( ! strcmp(param, "bicgstab")) solverID_ = 3; #ifdef HAVE_SUPERLU - else if ( ! strcmp(param, "superlu") ) + else if ( ! strcmp(param, "superlu") ) { MPI_Comm_size( mpiComm_, &nprocs ); if ( nprocs == 1 ) solverID_ = 4; @@ -587,7 +586,7 @@ int FEI_HYPRE_Impl::parameters(int numParams, char **paramString) } /************************************************************************** - initialize nodal degree of freedom + initialize nodal degree of freedom -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::initFields(int numFields, int *fieldSizes, int *fieldIDs) { @@ -596,7 +595,7 @@ int FEI_HYPRE_Impl::initFields(int numFields, int *fieldSizes, int *fieldIDs) { printf("%4d : FEI_HYPRE_Impl::initFields WARNING - numFields != 1.", mypid_); - printf(" Take field 0.\n"); + printf(" Take field 0.\n"); nodeDOF_ = fieldSizes[0]; return -1; } @@ -607,17 +606,17 @@ int FEI_HYPRE_Impl::initFields(int numFields, int *fieldSizes, int *fieldIDs) /************************************************************************** set element and node information -------------------------------------------------------------------------*/ -int FEI_HYPRE_Impl::initElemBlock(int elemBlockID, int numElements, - int numNodesPerElement, int *numFieldsPerNode, - int **nodalFieldIDs, int numElemDOFFieldsPerElement, +int FEI_HYPRE_Impl::initElemBlock(int elemBlockID, int numElements, + int numNodesPerElement, int *numFieldsPerNode, + int **nodalFieldIDs, int numElemDOFFieldsPerElement, int *elemDOFFieldIDs, int interleaveStrategy) { (void) numFieldsPerNode; (void) nodalFieldIDs; - (void) numElemDOFFieldsPerElement; + (void) numElemDOFFieldsPerElement; (void) elemDOFFieldIDs; (void) interleaveStrategy; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { printf("%4d : FEI_HYPRE_Impl::initElemBlock begins... \n", mypid_); printf(" elemBlockID = %d \n", elemBlockID); @@ -631,7 +630,7 @@ int FEI_HYPRE_Impl::initElemBlock(int elemBlockID, int numElements, printf("\n"); } for ( int iE = 0; iE < numElemDOFFieldsPerElement; iE++ ) - printf(" Element field IDs %d = %d\n", iE, + printf(" Element field IDs %d = %d\n", iE, elemDOFFieldIDs[iE]); } if ( numBlocks_ == 0 ) @@ -650,7 +649,7 @@ int FEI_HYPRE_Impl::initElemBlock(int elemBlockID, int numElements, printf("repeated blockID\n"); exit(1); } - } + } FEI_HYPRE_Elem_Block **tempBlocks = elemBlocks_; numBlocks_++; elemBlocks_ = new FEI_HYPRE_Elem_Block*[numBlocks_]; @@ -659,9 +658,9 @@ int FEI_HYPRE_Impl::initElemBlock(int elemBlockID, int numElements, elemBlocks_[numBlocks_-1] = new FEI_HYPRE_Elem_Block(elemBlockID); } elemBlocks_[numBlocks_-1]->initialize(numElements, numNodesPerElement, - nodeDOF_); + nodeDOF_); FLAG_LoadComplete_= 0; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::initElemBlock ends.\n", mypid_); return 0; } @@ -675,7 +674,7 @@ int FEI_HYPRE_Impl::initSharedNodes(int nShared, int *sharedIDs, int iN, iP, newNumShared, *oldSharedIDs, *oldSharedNProcs; int **oldSharedProcs; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::initSharedNodes begins... \n", mypid_); TimerLoadStart_ = MPI_Wtime(); if ( numSharedNodes_ > 0 ) @@ -683,24 +682,24 @@ int FEI_HYPRE_Impl::initSharedNodes(int nShared, int *sharedIDs, newNumShared = numSharedNodes_ + nShared; oldSharedIDs = sharedNodeIDs_; sharedNodeIDs_ = new int[newNumShared]; - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) sharedNodeIDs_[iN] = oldSharedIDs[iN]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) sharedNodeIDs_[iN+numSharedNodes_] = sharedIDs[iN]; oldSharedNProcs = sharedNodeNProcs_; sharedNodeNProcs_ = new int[newNumShared]; - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) sharedNodeNProcs_[iN] = oldSharedNProcs[iN]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) sharedNodeNProcs_[iN+numSharedNodes_] = sharedNProcs[iN]; oldSharedProcs = sharedNodeProcs_; sharedNodeProcs_ = new int*[newNumShared]; - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) sharedNodeProcs_[iN] = oldSharedProcs[iN]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) { sharedNodeProcs_[iN+numSharedNodes_] = new int[sharedNProcs[iN]]; - for ( iP = 0; iP < sharedNProcs[iN]; iP++ ) + for ( iP = 0; iP < sharedNProcs[iN]; iP++ ) sharedNodeProcs_[iN+numSharedNodes_][iP] = sharedProcs[iN][iP]; } numSharedNodes_ = newNumShared; @@ -712,21 +711,21 @@ int FEI_HYPRE_Impl::initSharedNodes(int nShared, int *sharedIDs, { numSharedNodes_ = nShared; sharedNodeIDs_ = new int[nShared]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) sharedNodeIDs_[iN] = sharedIDs[iN]; sharedNodeNProcs_ = new int[nShared]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) sharedNodeNProcs_[iN] = sharedNProcs[iN]; sharedNodeProcs_ = new int*[nShared]; - for ( iN = 0; iN < nShared; iN++ ) + for ( iN = 0; iN < nShared; iN++ ) { sharedNodeProcs_[iN] = new int[sharedNProcs[iN]]; - for ( iP = 0; iP < sharedNProcs[iN]; iP++ ) + for ( iP = 0; iP < sharedNProcs[iN]; iP++ ) sharedNodeProcs_[iN][iP] = sharedProcs[iN][iP]; } } TimerLoad_ += MPI_Wtime() - TimerLoadStart_; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::initSharedNodes ends. \n", mypid_); return 0; } @@ -747,14 +746,14 @@ int FEI_HYPRE_Impl::resetSystem(double s) if ( globalNodeOffsets_ != NULL ) delete [] globalNodeOffsets_; if ( recvLengs_ != NULL ) delete [] recvLengs_; if ( recvProcs_ != NULL ) delete [] recvProcs_; - if ( recvProcIndices_ != NULL ) + if ( recvProcIndices_ != NULL ) { for (int iP = 0; iP < nRecvs_; iP++) delete [] recvProcIndices_[iP]; delete [] recvProcIndices_; } if ( sendLengs_ != NULL ) delete [] sendLengs_; if ( sendProcs_ != NULL ) delete [] sendProcs_; - if ( sendProcIndices_ != NULL ) + if ( sendProcIndices_ != NULL ) { for (int iP = 0; iP < nSends_; iP++) delete [] sendProcIndices_[iP]; delete [] sendProcIndices_; @@ -766,23 +765,23 @@ int FEI_HYPRE_Impl::resetSystem(double s) if ( offdJA_ != NULL ) delete [] offdJA_; if ( offdAA_ != NULL ) delete [] offdAA_; if ( diagonal_ != NULL ) delete [] diagonal_; - if ( BCNodeAlpha_ != NULL ) + if ( BCNodeAlpha_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeAlpha_[iD]; delete [] BCNodeAlpha_; } - if ( BCNodeBeta_ != NULL ) + if ( BCNodeBeta_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeBeta_[iD]; delete [] BCNodeBeta_; } - if ( BCNodeGamma_ != NULL ) + if ( BCNodeGamma_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeGamma_[iD]; delete [] BCNodeGamma_; } - if ( BCNodeIDs_ != NULL ) delete [] BCNodeIDs_; - if ( rhsVector_ != NULL ) delete [] rhsVector_; + if ( BCNodeIDs_ != NULL ) delete [] BCNodeIDs_; + if ( rhsVector_ != NULL ) delete [] rhsVector_; nSends_ = 0; nRecvs_ = 0; nodeGlobalIDs_ = NULL; @@ -833,14 +832,14 @@ int FEI_HYPRE_Impl::resetMatrix(double s) if ( globalNodeOffsets_ != NULL ) delete [] globalNodeOffsets_; if ( recvLengs_ != NULL ) delete [] recvLengs_; if ( recvProcs_ != NULL ) delete [] recvProcs_; - if ( recvProcIndices_ != NULL ) + if ( recvProcIndices_ != NULL ) { for (int iP = 0; iP < nRecvs_; iP++) delete [] recvProcIndices_[iP]; delete [] recvProcIndices_; } if ( sendLengs_ != NULL ) delete [] sendLengs_; if ( sendProcs_ != NULL ) delete [] sendProcs_; - if ( sendProcIndices_ != NULL ) + if ( sendProcIndices_ != NULL ) { for (int iP = 0; iP < nSends_; iP++) delete [] sendProcIndices_[iP]; delete [] sendProcIndices_; @@ -852,22 +851,22 @@ int FEI_HYPRE_Impl::resetMatrix(double s) if ( offdJA_ != NULL ) delete [] offdJA_; if ( offdAA_ != NULL ) delete [] offdAA_; if ( diagonal_ != NULL ) delete [] diagonal_; - if ( BCNodeAlpha_ != NULL ) + if ( BCNodeAlpha_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeAlpha_[iD]; delete [] BCNodeAlpha_; } - if ( BCNodeBeta_ != NULL ) + if ( BCNodeBeta_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeBeta_[iD]; delete [] BCNodeBeta_; } - if ( BCNodeGamma_ != NULL ) + if ( BCNodeGamma_ != NULL ) { for ( int iD = 0; iD < numBCNodes_; iD++ ) delete [] BCNodeGamma_[iD]; delete [] BCNodeGamma_; } - if ( BCNodeIDs_ != NULL ) delete [] BCNodeIDs_; + if ( BCNodeIDs_ != NULL ) delete [] BCNodeIDs_; nSends_ = 0; nRecvs_ = 0; nodeGlobalIDs_ = NULL; @@ -909,7 +908,7 @@ int FEI_HYPRE_Impl::resetRHSVector(double s) (void) s; if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::resetRHSVector begins...\n", mypid_); - for ( int iB = 0; iB < numBlocks_; iB++ ) + for ( int iB = 0; iB < numBlocks_; iB++ ) elemBlocks_[iB]->resetRHSVectors(s); if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::resetRHSVector ends.\n", mypid_); @@ -924,7 +923,7 @@ int FEI_HYPRE_Impl::resetInitialGuess(double s) (void) s; if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::resetInitialGuess begins...\n", mypid_); - for ( int iB = 0; iB < numBlocks_; iB++ ) + for ( int iB = 0; iB < numBlocks_; iB++ ) elemBlocks_[iB]->resetSolnVectors(s); if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::resetInitialGuess ends (%e).\n", mypid_, s); @@ -941,7 +940,7 @@ int FEI_HYPRE_Impl::loadNodeBCs(int numNodes, int *nodeIDs, int fieldID, double **oldBCAlpha, **oldBCBeta, **oldBCGamma; (void) fieldID; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::loadNodeBCs begins...(%d)\n",mypid_,numNodes); TimerLoadStart_ = MPI_Wtime(); if ( numNodes > 0 ) @@ -953,13 +952,13 @@ int FEI_HYPRE_Impl::loadNodeBCs(int numNodes, int *nodeIDs, int fieldID, BCNodeAlpha_ = new double*[numBCNodes_]; BCNodeBeta_ = new double*[numBCNodes_]; BCNodeGamma_ = new double*[numBCNodes_]; - for ( iN = 0; iN < numNodes; iN++ ) + for ( iN = 0; iN < numNodes; iN++ ) { BCNodeIDs_[iN] = nodeIDs[iN]; BCNodeAlpha_[iN] = new double[nodeDOF_]; BCNodeBeta_[iN] = new double[nodeDOF_]; BCNodeGamma_[iN] = new double[nodeDOF_]; - for ( iD = 0; iD < nodeDOF_; iD++ ) + for ( iD = 0; iD < nodeDOF_; iD++ ) { BCNodeAlpha_[iN][iD] = alpha[iN][iD]; BCNodeBeta_[iN][iD] = beta[iN][iD]; @@ -979,24 +978,24 @@ int FEI_HYPRE_Impl::loadNodeBCs(int numNodes, int *nodeIDs, int fieldID, BCNodeAlpha_ = new double*[numBCNodes_]; BCNodeBeta_ = new double*[numBCNodes_]; BCNodeGamma_ = new double*[numBCNodes_]; - for ( iN = 0; iN < oldNumBCNodes; iN++ ) + for ( iN = 0; iN < oldNumBCNodes; iN++ ) { BCNodeIDs_[iN] = oldBCNodeIDs[iN]; BCNodeAlpha_[iN] = oldBCAlpha[iN]; BCNodeBeta_[iN] = oldBCBeta[iN]; BCNodeGamma_[iN] = oldBCGamma[iN]; - } + } delete [] oldBCNodeIDs; delete [] oldBCAlpha; delete [] oldBCBeta; delete [] oldBCGamma; - for ( iN = 0; iN < numNodes; iN++ ) + for ( iN = 0; iN < numNodes; iN++ ) { BCNodeIDs_[oldNumBCNodes+iN] = nodeIDs[iN]; BCNodeAlpha_[oldNumBCNodes+iN] = new double[nodeDOF_]; BCNodeBeta_[oldNumBCNodes+iN] = new double[nodeDOF_]; BCNodeGamma_[oldNumBCNodes+iN] = new double[nodeDOF_]; - for ( iD = 0; iD < nodeDOF_; iD++ ) + for ( iD = 0; iD < nodeDOF_; iD++ ) { BCNodeAlpha_[oldNumBCNodes+iN][iD] = alpha[iN][iD]; BCNodeBeta_[oldNumBCNodes+iN][iD] = beta[iN][iD]; @@ -1006,16 +1005,16 @@ int FEI_HYPRE_Impl::loadNodeBCs(int numNodes, int *nodeIDs, int fieldID, } } TimerLoad_ += MPI_Wtime() - TimerLoadStart_; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::loadNodeBCs ends.\n", mypid_); return 0; } /************************************************************************** - load element connectivities, stiffness matrices, and element load + load element connectivities, stiffness matrices, and element load -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::sumInElem(int elemBlockID, int elemID, int *elemConn, - double **elemStiff, double *elemLoad, + double **elemStiff, double *elemLoad, int elemFormat) { int iB=0; @@ -1035,17 +1034,17 @@ int FEI_HYPRE_Impl::sumInElem(int elemBlockID, int elemID, int *elemConn, } #endif #ifdef HAVE_DEBUG - if ( outputLevel_ > 0 && elemBlocks_[iB]->getCurrentElem()==0 ) - printf("%4d : FEI_HYPRE_Impl::sumInElem begins... \n", mypid_); + if ( outputLevel_ > 0 && elemBlocks_[iB]->getCurrentElem()==0 ) + printf("%4d : FEI_HYPRE_Impl::sumInElem begins... \n", mypid_); #endif if ( elemBlocks_[iB]->getCurrentElem()==0 ) TimerLoadStart_ = MPI_Wtime(); elemBlocks_[iB]->loadElemInfo(elemID, elemConn, elemStiff, elemLoad); - if ( elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) + if ( elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) TimerLoad_ += MPI_Wtime() - TimerLoadStart_; #ifdef HAVE_DEBUG - if ( outputLevel_ > 0 && - elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) - printf("%4d : FEI_HYPRE_Impl::sumInElem ends. \n", mypid_); + if ( outputLevel_ > 0 && + elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) + printf("%4d : FEI_HYPRE_Impl::sumInElem ends. \n", mypid_); #endif return 0; } @@ -1073,17 +1072,17 @@ int FEI_HYPRE_Impl::sumInElemMatrix(int elemBlockID, int elemID, int *elemConn, } #endif #ifdef HAVE_DEBUG - if ( outputLevel_ > 0 && elemBlocks_[iB]->getCurrentElem()==0 ) - printf("%4d : FEI_HYPRE_Impl::sumInElemMatrix begins... \n", mypid_); + if ( outputLevel_ > 0 && elemBlocks_[iB]->getCurrentElem()==0 ) + printf("%4d : FEI_HYPRE_Impl::sumInElemMatrix begins... \n", mypid_); #endif if ( elemBlocks_[iB]->getCurrentElem()==0 ) TimerLoadStart_ = MPI_Wtime(); elemBlocks_[iB]->loadElemMatrix(elemID, elemConn, elemStiff); - if ( elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) + if ( elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) TimerLoad_ += MPI_Wtime() - TimerLoadStart_; #ifdef HAVE_DEBUG - if ( outputLevel_ > 0 && - elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) - printf("%4d : FEI_HYPRE_Impl::sumInElemMatrix ends. \n", mypid_); + if ( outputLevel_ > 0 && + elemBlocks_[iB]->getCurrentElem()==elemBlocks_[iB]->getNumElems() ) + printf("%4d : FEI_HYPRE_Impl::sumInElemMatrix ends. \n", mypid_); #endif return 0; } @@ -1115,7 +1114,7 @@ int FEI_HYPRE_Impl::sumInElemRHS(int elemBlockID, int elemID, int *elemConn, } /************************************************************************** - assemble matrix information + assemble matrix information -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::loadComplete() { @@ -1133,7 +1132,7 @@ int FEI_HYPRE_Impl::loadComplete() * get machine information * ----------------------------------------------------------------*/ - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::loadComplete begins.... \n", mypid_); TimerLoadStart_ = MPI_Wtime(); MPI_Comm_size( mpiComm_, &nprocs ); @@ -1146,7 +1145,7 @@ int FEI_HYPRE_Impl::loadComplete() for ( iB = 0; iB < numBlocks_; iB++ ) { ierr = elemBlocks_[iB]->checkLoadComplete(); - assert( !ierr ); + hypre_assert( !ierr ); } /* ----------------------------------------------------------------- @@ -1160,32 +1159,32 @@ int FEI_HYPRE_Impl::loadComplete() sharedNodeProcAux = new int*[numSharedNodes_]; for ( iN = 0; iN < numSharedNodes_; iN++ ) nodeIDs[iN] = iN; IntSort2(sharedNodeIDs_, nodeIDs, 0, numSharedNodes_-1); - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) { - sharedNodeProcAux[iN] = sharedNodeProcs_[iN]; + sharedNodeProcAux[iN] = sharedNodeProcs_[iN]; nodeIDAux[iN] = sharedNodeNProcs_[iN]; } - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) { index = nodeIDs[iN]; - sharedNodeProcs_[iN] = sharedNodeProcAux[index]; + sharedNodeProcs_[iN] = sharedNodeProcAux[index]; sharedNodeNProcs_[iN] = nodeIDAux[index]; } delete [] sharedNodeProcAux; delete [] nodeIDAux; delete [] nodeIDs; index = 0; - for ( iN = 1; iN < numSharedNodes_; iN++ ) + for ( iN = 1; iN < numSharedNodes_; iN++ ) { if ( sharedNodeIDs_[iN] == sharedNodeIDs_[index] ) { nodeIDAux = sharedNodeProcs_[index]; - sharedNodeProcs_[index] = + sharedNodeProcs_[index] = new int[sharedNodeNProcs_[index]+sharedNodeNProcs_[iN]]; - for ( iP = 0; iP < sharedNodeNProcs_[index]; iP++ ) - sharedNodeProcs_[index][iP] = nodeIDAux[iP]; - for ( iP = 0; iP < sharedNodeNProcs_[iN]; iP++ ) - sharedNodeProcs_[index][sharedNodeNProcs_[index]+iP] = + for ( iP = 0; iP < sharedNodeNProcs_[index]; iP++ ) + sharedNodeProcs_[index][iP] = nodeIDAux[iP]; + for ( iP = 0; iP < sharedNodeNProcs_[iN]; iP++ ) + sharedNodeProcs_[index][sharedNodeNProcs_[index]+iP] = sharedNodeProcs_[iN][iP]; sharedNodeNProcs_[index] += sharedNodeNProcs_[iN]; @@ -1201,11 +1200,11 @@ int FEI_HYPRE_Impl::loadComplete() } } if ( numSharedNodes_ > 0 ) numSharedNodes_ = index + 1; - for ( iN = 0; iN < numSharedNodes_; iN++ ) + for ( iN = 0; iN < numSharedNodes_; iN++ ) { IntSort(sharedNodeProcs_[iN], 0, sharedNodeNProcs_[iN]-1); index = 0; - for ( iP = 1; iP < sharedNodeNProcs_[iN]; iP++ ) + for ( iP = 1; iP < sharedNodeNProcs_[iN]; iP++ ) if (sharedNodeProcs_[iN][iP] != sharedNodeProcs_[iN][index]) sharedNodeProcs_[iN][++index] = sharedNodeProcs_[iN][iP]; sharedNodeNProcs_[iN] = index + 1; @@ -1261,13 +1260,13 @@ int FEI_HYPRE_Impl::loadComplete() { for ( iN2 = index-1; iN2 >= 0; iN2-- ) { - if ( nodeIDs[iN2] == nodeIDs[index] ) + if ( nodeIDs[iN2] == nodeIDs[index] ) nodeIDAux[iN2] = - nodeIDAux[iN2] - 1; else break; } for ( iN2 = index+1; iN2 < totalNNodes; iN2++ ) { - if ( nodeIDs[iN2] == nodeIDs[index] ) + if ( nodeIDs[iN2] == nodeIDs[index] ) nodeIDAux[iN2] = - nodeIDAux[iN2] - 1; else break; } @@ -1280,10 +1279,10 @@ int FEI_HYPRE_Impl::loadComplete() localNNodes = numLocalNodes_ = 0; for ( iN = 1; iN < totalNNodes; iN++ ) { - if ( nodeIDs[iN] != nodeIDs[iN-1] ) + if ( nodeIDs[iN] != nodeIDs[iN-1] ) { localNNodes++; - if ( nodeIDAux[iN] >= 0 ) numLocalNodes_++; + if ( nodeIDAux[iN] >= 0 ) numLocalNodes_++; } } if ( totalNNodes > 0 ) localNNodes++; @@ -1327,7 +1326,7 @@ int FEI_HYPRE_Impl::loadComplete() } /* ----------------------------------------------------------------- - * rewrite the element connectivities with local node numbers + * rewrite the element connectivities with local node numbers * ----------------------------------------------------------------*/ if ( totalNNodes > 0 ) nodeIDAux2 = new int[totalNNodes]; @@ -1348,7 +1347,7 @@ int FEI_HYPRE_Impl::loadComplete() elemNodeList[iE][iN] = nodeIDAux2[totalNNodes++]; } } - if ( totalNNodes > 0 ) + if ( totalNNodes > 0 ) { delete [] nodeIDAux; delete [] nodeIDAux2; @@ -1362,15 +1361,15 @@ int FEI_HYPRE_Impl::loadComplete() globalNodeOffsets_ = new int[nprocs+1]; MPI_Allgather(&numLocalNodes_, 1, MPI_INT, globalNodeOffsets_, 1, MPI_INT, mpiComm_); - for ( iP = nprocs; iP > 0; iP-- ) + for ( iP = nprocs; iP > 0; iP-- ) globalNodeOffsets_[iP] = globalNodeOffsets_[iP-1]; globalNodeOffsets_[0] = 0; - for ( iP = 1; iP <= nprocs; iP++ ) + for ( iP = 1; iP <= nprocs; iP++ ) globalNodeOffsets_[iP] += globalNodeOffsets_[iP-1]; nodeOffset = globalNodeOffsets_[mypid_]; /* ----------------------------------------------------------------- - * next construct communication pattern + * next construct communication pattern * ----------------------------------------------------------------*/ /* -- create an aux array for holding mapped external node IDs -- */ @@ -1396,7 +1395,7 @@ int FEI_HYPRE_Impl::loadComplete() sndrcvReg[iN] = 1; // recv pnum = mypid_; for ( iP = 0; iP < sharedNodeNProcs_[iN]; iP++ ) - if (sharedNodeProcs_[iN][iP] < pnum) + if (sharedNodeProcs_[iN][iP] < pnum) pnum = sharedNodeProcs_[iN][iP]; ownerProcs[index] = pnum; pArrayAux[index] = pnum; @@ -1424,7 +1423,7 @@ int FEI_HYPRE_Impl::loadComplete() for ( iP = 0; iP < nRecv; iP++ ) recvLengs[iP] = 0; for ( iN = 0; iN < numSharedNodes_; iN++ ) { - if ( sndrcvReg[iN] == 1 ) + if ( sndrcvReg[iN] == 1 ) { index = HYPRE_LSI_Search(&(nodeGlobalIDs_[numLocalNodes_]), sharedNodeIDs_[iN], numExtNodes_); @@ -1483,7 +1482,7 @@ int FEI_HYPRE_Impl::loadComplete() } } if ( nSend > 0 ) sendBuf = new int*[nSend]; - for ( iP = 0; iP < nSend; iP++ ) + for ( iP = 0; iP < nSend; iP++ ) { sendBuf[iP] = new int[sendLengs[iP]]; sendLengs[iP] = 0; @@ -1522,7 +1521,7 @@ int FEI_HYPRE_Impl::loadComplete() /* -- fix the send index array -- */ - for (iP = 0; iP < nSend; iP++) + for (iP = 0; iP < nSend; iP++) for ( iN = 0; iN < sendLengs[iP]; iN++ ) sendBuf[iP][iN] -= nodeOffset; @@ -1543,7 +1542,7 @@ int FEI_HYPRE_Impl::loadComplete() /* -- construct the receive communication pattern -- */ nRecvs_ = nRecv; - if ( nRecv > 0 ) + if ( nRecv > 0 ) { recvProcs_ = recvProcs; recvLengs_ = recvLengs; @@ -1551,15 +1550,15 @@ int FEI_HYPRE_Impl::loadComplete() else recvProcs_ = recvLengs_ = NULL; if ( nRecv > 0 ) recvProcIndices_ = recvBuf; else recvProcIndices_ = NULL; - + /* -- construct the send communication pattern -- */ nSends_ = nSend; - if ( nSend > 0 ) + if ( nSend > 0 ) { sendLengs_ = sendLengs; sendProcs_ = sendProcs; - } + } else sendLengs_ = sendProcs_ = NULL; if ( nSend > 0 ) sendProcIndices_ = sendBuf; else sendProcIndices_ = NULL; @@ -1572,7 +1571,7 @@ int FEI_HYPRE_Impl::loadComplete() TimerLoad_ += MPI_Wtime() - TimerLoadStart_; if ( FLAG_PrintMatrix_ > 0 ) printLinearSystem(); FLAG_LoadComplete_ = 1; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) printf("%4d : FEI_HYPRE_Impl::loadComplete ends. \n", mypid_); return 0; } @@ -1675,15 +1674,15 @@ int FEI_HYPRE_Impl::residualNorm(int whichNorm, int numFields, int* fieldIDs, extNRows = numExtNodes_ * nodeDOF_; totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; - switch(whichNorm) + switch(whichNorm) { case 0: rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { dtemp = fabs( rVec[irow] ); if ( dtemp > rnorm ) rnorm = dtemp; @@ -1693,14 +1692,14 @@ int FEI_HYPRE_Impl::residualNorm(int whichNorm, int numFields, int* fieldIDs, break; case 1: rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += fabs( rVec[irow] ); MPI_Allreduce(&rnorm, &dtemp, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); (*norms) = dtemp; break; case 2: rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, &dtemp, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); (*norms) = sqrt(dtemp); @@ -1716,16 +1715,16 @@ int FEI_HYPRE_Impl::residualNorm(int whichNorm, int numFields, int* fieldIDs, int FEI_HYPRE_Impl::getNumBlockActNodes(int blockID, int *numNodes) { int localNNodes, iB, iE, iN, totalNNodes, nElems; - int elemNNodes, **elemNodeLists, *nodeIDs; + int elemNNodes, **elemNodeLists, *nodeIDs; - if ( numBlocks_ == 1 ) + if ( numBlocks_ == 1 ) { (*numNodes) = numLocalNodes_ + numExtNodes_; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { - printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes blockID = %d.\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes blockID = %d.\n", mypid_, blockID); - printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes numNodes = %d\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes numNodes = %d\n", mypid_, (*numNodes)); } return 0; @@ -1739,8 +1738,8 @@ int FEI_HYPRE_Impl::getNumBlockActNodes(int blockID, int *numNodes) printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes ERROR -",mypid_); printf(" invalid blockID\n"); exit(1); - } - totalNNodes = numLocalNodes_ + numExtNodes_; + } + totalNNodes = numLocalNodes_ + numExtNodes_; nodeIDs = new int[totalNNodes]; for ( iN = 0; iN < totalNNodes; iN++ ) nodeIDs[iN] = 0; nElems = elemBlocks_[iB]->getNumElems(); @@ -1750,16 +1749,16 @@ int FEI_HYPRE_Impl::getNumBlockActNodes(int blockID, int *numNodes) for ( iN = 0; iN < elemNNodes; iN++ ) nodeIDs[elemNodeLists[iE][iN]] = 1; localNNodes = 0; - for ( iN = 0; iN < totalNNodes; iN++ ) + for ( iN = 0; iN < totalNNodes; iN++ ) if ( nodeIDs[iN] == 1 ) localNNodes++; delete [] nodeIDs; (*numNodes) = localNNodes; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { - printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes blockID = %d.\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes blockID = %d.\n", mypid_, blockID); - printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes numNodes = %d\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActNodes numNodes = %d\n", mypid_, (*numNodes)); } } @@ -1775,11 +1774,11 @@ int FEI_HYPRE_Impl::getNumBlockActEqns(int blockID, int *numEqns) getNumBlockActNodes(blockID, &numNodes); (*numEqns) = numNodes * nodeDOF_; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { - printf("%4d : FEI_HYPRE_Impl::getNumBlockActEqns blockID = %d\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActEqns blockID = %d\n", mypid_, blockID); - printf("%4d : FEI_HYPRE_Impl::getNumBlockActEqns numEqns = %d\n", + printf("%4d : FEI_HYPRE_Impl::getNumBlockActEqns numEqns = %d\n", mypid_, (*numEqns)); } return 0; @@ -1791,16 +1790,16 @@ int FEI_HYPRE_Impl::getNumBlockActEqns(int blockID, int *numEqns) int FEI_HYPRE_Impl::getBlockNodeIDList(int blockID,int numNodes,int *nodeList) { int localNNodes, iB, iE, iN, totalNNodes, nElems; - int elemNNodes, **elemNodeLists, *nodeIDs; + int elemNNodes, **elemNodeLists, *nodeIDs; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { - printf("%4d : FEI_HYPRE_Impl::getBlockNodeIDList blockID = %d\n", + printf("%4d : FEI_HYPRE_Impl::getBlockNodeIDList blockID = %d\n", mypid_, blockID); - printf("%4d : FEI_HYPRE_Impl::getBlockNodeIDList numNodes = %d\n", + printf("%4d : FEI_HYPRE_Impl::getBlockNodeIDList numNodes = %d\n", mypid_, numNodes); } - if ( numBlocks_ == 1 ) + if ( numBlocks_ == 1 ) { localNNodes = numLocalNodes_ + numExtNodes_; if ( localNNodes != numNodes ) @@ -1810,7 +1809,7 @@ int FEI_HYPRE_Impl::getBlockNodeIDList(int blockID,int numNodes,int *nodeList) exit(1); } for ( iN = 0; iN < localNNodes; iN++ ) - nodeList[iN] = nodeGlobalIDs_[iN]; + nodeList[iN] = nodeGlobalIDs_[iN]; return 0; } else @@ -1822,8 +1821,8 @@ int FEI_HYPRE_Impl::getBlockNodeIDList(int blockID,int numNodes,int *nodeList) printf("%4d : FEI_HYPRE_Impl::getBlockNodeIDList ERROR -",mypid_); printf(" invalid blockID.\n"); exit(1); - } - totalNNodes = numLocalNodes_ + numExtNodes_; + } + totalNNodes = numLocalNodes_ + numExtNodes_; nodeIDs = new int[totalNNodes]; for ( iN = 0; iN < totalNNodes; iN++ ) nodeIDs[iN] = 0; nElems = elemBlocks_[iB]->getNumElems(); @@ -1833,7 +1832,7 @@ int FEI_HYPRE_Impl::getBlockNodeIDList(int blockID,int numNodes,int *nodeList) for ( iN = 0; iN < elemNNodes; iN++ ) nodeIDs[elemNodeLists[iE][iN]] = 1; localNNodes = 0; - for ( iN = 0; iN < totalNNodes; iN++ ) + for ( iN = 0; iN < totalNNodes; iN++ ) if ( nodeIDs[iN] == 1 ) nodeList[localNNodes++] = nodeGlobalIDs_[iN]; if ( localNNodes != numNodes ) { @@ -1847,7 +1846,7 @@ int FEI_HYPRE_Impl::getBlockNodeIDList(int blockID,int numNodes,int *nodeList) } /************************************************************************** - get solution + get solution -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::getBlockNodeSolution(int blockID,int numNodes, int *nodeList, int *nodeOffsets, double *solnValues) @@ -1857,14 +1856,14 @@ int FEI_HYPRE_Impl::getBlockNodeSolution(int blockID,int numNodes, double *dataBuf, **solnVecs; (void) nodeList; - if ( outputLevel_ >= 2 ) + if ( outputLevel_ >= 2 ) { - printf("%4d : FEI_HYPRE_Impl::getBlockNodeSolution blockID = %d\n", + printf("%4d : FEI_HYPRE_Impl::getBlockNodeSolution blockID = %d\n", mypid_, blockID); - printf("%4d : FEI_HYPRE_Impl::getBlockNodeSolution numNodes = %d\n", + printf("%4d : FEI_HYPRE_Impl::getBlockNodeSolution numNodes = %d\n", mypid_, numNodes); } - if ( numBlocks_ == 1 ) + if ( numBlocks_ == 1 ) { for ( iN = 0; iN < numNodes; iN++ ) { @@ -1882,8 +1881,8 @@ int FEI_HYPRE_Impl::getBlockNodeSolution(int blockID,int numNodes, printf("%4d : FEI_HYPRE_Impl::getBlockNodeSolution ERROR -",mypid_); printf(" invalid blockID.\n"); exit(1); - } - totalNNodes = numLocalNodes_ + numExtNodes_; + } + totalNNodes = numLocalNodes_ + numExtNodes_; nodeIDs = new int[totalNNodes]; dataBuf = new double[totalNNodes*nodeDOF_]; for ( iN = 0; iN < totalNNodes; iN++ ) nodeIDs[iN] = 0; @@ -1902,10 +1901,10 @@ int FEI_HYPRE_Impl::getBlockNodeSolution(int blockID,int numNodes, } } localNNodes = 0; - for ( iN = 0; iN < totalNNodes; iN++ ) + for ( iN = 0; iN < totalNNodes; iN++ ) { nodeID = nodeIDs[iN]; - if ( nodeID == 1 ) + if ( nodeID == 1 ) { nodeOffsets[localNNodes] = localNNodes * nodeDOF_; for ( iD = 0; iD < nodeDOF_; iD++ ) @@ -1925,11 +1924,11 @@ int FEI_HYPRE_Impl::getBlockNodeSolution(int blockID,int numNodes, void FEI_HYPRE_Impl::buildGlobalMatrixVector() { int matDim, *diagCounts=NULL, nElems, elemNNodes, **elemNodeLists=NULL; - int iB, iD, iE, iN, offset, iD2, iD3, iN2, *elemNodeList=NULL, diagNNZ; + int iB, iD, iE, iN, offset, iD2, iD3, iN2, *elemNodeList=NULL, diagNNZ; int offdNNZ, *offdCounts=NULL, rowIndBase, rowInd, colIndBase, colInd; int bound, iCount, index, iBegin, *TdiagIA=NULL, *TdiagJA=NULL; int *ToffdIA=NULL, *ToffdJA=NULL, elemNExt, elemNLocal, nodeID; - int diagOffset, offdOffset; + int diagOffset, offdOffset; double **elemMats=NULL, *elemMat=NULL, *TdiagAA=NULL, *ToffdAA=NULL; double alpha, beta, gamma1; @@ -1980,25 +1979,25 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() } } } - } + } /* ----------------------------------------------------------------- - * allocate the CSR matrix storage space + * allocate the CSR matrix storage space * -----------------------------------------------------------------*/ diagNNZ = offdNNZ = 0; - for ( iD = 0; iD < matDim; iD++ ) + for ( iD = 0; iD < matDim; iD++ ) { diagNNZ += diagCounts[iD]; offdNNZ += offdCounts[iD]; } - if ( diagNNZ > 0 ) + if ( diagNNZ > 0 ) { TdiagIA = new int[matDim+1]; TdiagJA = new int[diagNNZ]; TdiagAA = new double[diagNNZ]; } - if ( offdNNZ > 0 ) + if ( offdNNZ > 0 ) { ToffdIA = new int[matDim+1]; ToffdJA = new int[offdNNZ]; @@ -2006,7 +2005,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() } /* ----------------------------------------------------------------- - * get ready for loading up the CSR matrix + * get ready for loading up the CSR matrix * -----------------------------------------------------------------*/ offset = 0; @@ -2016,7 +2015,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() offset += diagCounts[iD]; } offset = 0; - if ( offdNNZ > 0 ) + if ( offdNNZ > 0 ) { for ( iD = 0; iD < matDim; iD++ ) { @@ -2026,7 +2025,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() } /* ----------------------------------------------------------------- - * load the CSR matrix + * load the CSR matrix * -----------------------------------------------------------------*/ bound = numLocalNodes_ * nodeDOF_; @@ -2046,12 +2045,12 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() for ( iN = 0; iN < elemNNodes; iN++ ) { colInd = elemNodeList[iN]; - if ( colInd >= bound ) + if ( colInd >= bound ) { for ( iN2 = 0; iN2 < elemNNodes; iN2++ ) { rowInd = elemNodeList[iN2]; - if ( *elemMat != 0.0 ) + if ( *elemMat != 0.0 ) { index = ToffdIA[rowInd]++; ToffdJA[index] = colInd; @@ -2065,7 +2064,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() for ( iN2 = 0; iN2 < elemNNodes; iN2++ ) { rowInd = elemNodeList[iN2]; - if ( *elemMat != 0.0 ) + if ( *elemMat != 0.0 ) { index = TdiagIA[rowInd]++; TdiagJA[index] = colInd; @@ -2098,9 +2097,9 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() for ( iD2 = 0; iD2 < nodeDOF_; iD2++ ) { rowInd = rowIndBase + iD2; - if ( elemMat[offset] != 0.0 ) + if ( elemMat[offset] != 0.0 ) { - if ( colInd >= bound ) + if ( colInd >= bound ) { index = ToffdIA[rowInd]++; ToffdJA[index] = colInd; @@ -2151,7 +2150,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() } for ( iD = 0; iD < matDim; iD++ ) { - if ( diagCounts[iD] > 0 ) + if ( diagCounts[iD] > 0 ) { iBegin = TdiagIA[iD]; iCount = diagCounts[iD]; @@ -2160,7 +2159,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() IntSort2a(&(TdiagJA[iBegin]),&(TdiagAA[iBegin]),0,iCount-1); for ( iD2 = iBegin+1; iD2 < iBegin+iCount; iD2++ ) { - if ( TdiagJA[iD2] == TdiagJA[index] ) + if ( TdiagJA[iD2] == TdiagJA[index] ) TdiagAA[index] += TdiagAA[iD2]; else { @@ -2172,7 +2171,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() if ( iCount > 0 && TdiagAA[index] != 0.0 ) index++; diagCounts[iD] = index - iBegin; } - if ( offdCounts[iD] > 0 ) + if ( offdCounts[iD] > 0 ) { iBegin = ToffdIA[iD]; iCount = offdCounts[iD]; @@ -2181,7 +2180,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() IntSort2a(&(ToffdJA[iBegin]),&(ToffdAA[iBegin]),0,iCount-1); for ( iD2 = iBegin+1; iD2 < iBegin+iCount; iD2++ ) { - if ( ToffdJA[iD2] == ToffdJA[index] ) + if ( ToffdJA[iD2] == ToffdJA[index] ) ToffdAA[index] += ToffdAA[iD2]; else { @@ -2208,9 +2207,9 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() { for ( iD = index*nodeDOF_; iD < (index+1)*nodeDOF_; iD++ ) { - alpha = BCNodeAlpha_[iN][iD%nodeDOF_]; - beta = BCNodeBeta_[iN][iD%nodeDOF_]; - gamma1= BCNodeGamma_[iN][iD%nodeDOF_]; + alpha = BCNodeAlpha_[iN][iD%nodeDOF_]; + beta = BCNodeBeta_[iN][iD%nodeDOF_]; + gamma1= BCNodeGamma_[iN][iD%nodeDOF_]; if ( beta == 0.0 && alpha != 0.0 ) { for (iD2=TdiagIA[iD]; iD2 0 ) @@ -2356,23 +2355,23 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() /* ----------------------------------------------------------------- * recompute the sparsity structure of the compressed matrix - * allocate and load the final CSR matrix + * allocate and load the final CSR matrix * -----------------------------------------------------------------*/ diagNNZ = 0; - for ( iD = 0; iD < matDim; iD++ ) + for ( iD = 0; iD < matDim; iD++ ) { - for ( iD2 = TdiagIA[iD]; iD2 < TdiagIA[iD]+diagCounts[iD]; iD2++ ) + for ( iD2 = TdiagIA[iD]; iD2 < TdiagIA[iD]+diagCounts[iD]; iD2++ ) if ( TdiagAA[iD2] != 0.0 ) diagNNZ++; } if ( offdNNZ > 0 ) { offdNNZ = 0; - for ( iD = 0; iD < matDim; iD++ ) - for ( iD2 = ToffdIA[iD]; iD2 < ToffdIA[iD]+offdCounts[iD]; iD2++ ) + for ( iD = 0; iD < matDim; iD++ ) + for ( iD2 = ToffdIA[iD]; iD2 < ToffdIA[iD]+offdCounts[iD]; iD2++ ) if ( ToffdAA[iD2] != 0.0 ) offdNNZ++; } - if ( diagNNZ > 0 ) + if ( diagNNZ > 0 ) { diagIA_ = new int[matDim+1]; diagJA_ = new int[diagNNZ]; @@ -2380,7 +2379,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() diagonal_ = new double[matDim]; diagIA_[0] = 0; } - if ( offdNNZ > 0 ) + if ( offdNNZ > 0 ) { offdIA_ = new int[matDim+1]; offdJA_ = new int[offdNNZ]; @@ -2388,41 +2387,41 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() offdIA_[0] = 0; } diagOffset = offdOffset = 0; - for ( iD = 0; iD < matDim; iD++ ) + for ( iD = 0; iD < matDim; iD++ ) { iCount = diagCounts[iD]; index = TdiagIA[iD]; diagonal_[iD] = 0.0; - for ( iD2 = 0; iD2 < iCount; iD2++ ) + for ( iD2 = 0; iD2 < iCount; iD2++ ) { - if ( TdiagJA[index] == iD ) + if ( TdiagJA[index] == iD ) { if ( TdiagAA[index] != 0.0 ) diagonal_[iD] = TdiagAA[index]; } - if ( TdiagJA[index] >= 0 && TdiagAA[index] != 0.0 ) + if ( TdiagJA[index] >= 0 && TdiagAA[index] != 0.0 ) { diagJA_[diagOffset] = TdiagJA[index]; diagAA_[diagOffset++] = TdiagAA[index]; } - index++; + index++; } diagIA_[iD+1] = diagOffset; - if ( offdNNZ > 0 ) + if ( offdNNZ > 0 ) { iCount = offdCounts[iD]; index = ToffdIA[iD]; - for ( iD2 = 0; iD2 < iCount; iD2++ ) + for ( iD2 = 0; iD2 < iCount; iD2++ ) { - if ( ToffdJA[index] == iD ) + if ( ToffdJA[index] == iD ) { if ( ToffdAA[index] != 0.0 ) diagonal_[iD] = ToffdAA[index]; } - if ( ToffdJA[index] >= 0 && ToffdAA[index] != 0.0 ) + if ( ToffdJA[index] >= 0 && ToffdAA[index] != 0.0 ) { offdJA_[offdOffset] = ToffdJA[index]; offdAA_[offdOffset++] = ToffdAA[index]; } - index++; + index++; } offdIA_[iD+1] = offdOffset; } @@ -2433,7 +2432,7 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() * -----------------------------------------------------------------*/ PVectorReverseChange( diagonal_ ); - for ( iD = 0; iD < numLocalNodes_*nodeDOF_; iD++ ) + for ( iD = 0; iD < numLocalNodes_*nodeDOF_; iD++ ) { if ( diagonal_[iD] == 0.0 ) diagonal_[iD] = 1.0; else diagonal_[iD] = 1.0 / diagonal_[iD]; @@ -2442,19 +2441,19 @@ void FEI_HYPRE_Impl::buildGlobalMatrixVector() /* ----------------------------------------------------------------- * clean up * -----------------------------------------------------------------*/ - + if ( matDim > 0 ) { delete [] diagCounts; delete [] offdCounts; } - if ( diagNNZ > 0 ) + if ( diagNNZ > 0 ) { delete [] TdiagIA; delete [] TdiagJA; delete [] TdiagAA; } - if ( offdNNZ > 0 ) + if ( offdNNZ > 0 ) { delete [] ToffdIA; delete [] ToffdJA; @@ -2482,7 +2481,7 @@ int FEI_HYPRE_Impl::solveUsingCG() extNRows = numExtNodes_ * nodeDOF_; totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - + /* ----------------------------------------------------------------- * assemble the initial guess vector * -----------------------------------------------------------------*/ @@ -2492,12 +2491,12 @@ int FEI_HYPRE_Impl::solveUsingCG() /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -2509,7 +2508,7 @@ int FEI_HYPRE_Impl::solveUsingCG() rnorm = sqrt(dArray2[0]); if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl initial rnorm = %e (%e)\n",rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -2533,17 +2532,17 @@ int FEI_HYPRE_Impl::solveUsingCG() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 2 ) + while ( converged == 0 && numTrials < 2 ) { innerIteration = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter++; innerIteration++; if ( innerIteration == 1 ) { if ( diagonal_ != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow] * diagonal_[irow]; else for (irow = 0; irow < localNRows; irow++) @@ -2551,7 +2550,7 @@ int FEI_HYPRE_Impl::solveUsingCG() rhom1 = rho; rho = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rho += rVec[irow] * zVec[irow]; dArray[0] = rho; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); @@ -2559,56 +2558,56 @@ int FEI_HYPRE_Impl::solveUsingCG() beta = 0.0; } else beta = rho / rhom1; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) pVec[irow] = zVec[irow] + beta * pVec[irow]; - matvec( pVec, apVec ); + matvec( pVec, apVec ); sigma = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) sigma += pVec[irow] * apVec[irow]; dArray[0] = sigma; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); sigma = dArray2[0]; - alpha = rho / sigma; - for ( irow = 0; irow < localNRows; irow++ ) + alpha = rho / sigma; + for ( irow = 0; irow < localNRows; irow++ ) { solnVector_[irow] += alpha * pVec[irow]; rVec[irow] -= alpha * apVec[irow]; } rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; dArray[0] = rnorm; if ( diagonal_ != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow] * diagonal_[irow]; else for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow]; rhom1 = rho; rho = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rho += rVec[irow] * zVec[irow]; dArray[1] = rho; MPI_Allreduce(dArray, dArray2, 2, MPI_DOUBLE, MPI_SUM, mpiComm_); - rho = dArray2[1]; + rho = dArray2[1]; rnorm = sqrt( dArray2[0] ); if ( outputLevel_ >= 2 && iter % 1 == 0 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl : iteration %d - rnorm = %e (%e)\n", iter, rnorm, eps1); } - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; dArray[0] = rnorm; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray2[0] ); if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl actual rnorm = %e \n",rnorm); - if ( (rnorm < eps1 || rnorm < 1.0e-16) || + if ( (rnorm < eps1 || rnorm < 1.0e-16) || iter >= krylovMaxIterations_ ) converged = 1; numTrials++; } @@ -2651,7 +2650,7 @@ int FEI_HYPRE_Impl::solveUsingGMRES() for (iV = 0; iV <= gmresDim_+1; iV++) kVectors[iV] = new double[totalNRows]; dArray = new double[gmresDim_+1]; dArray2 = new double[gmresDim_+1]; - + /* ----------------------------------------------------------------- * assemble the initial guess vector * -----------------------------------------------------------------*/ @@ -2661,13 +2660,13 @@ int FEI_HYPRE_Impl::solveUsingGMRES() /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - + tVector = kVectors[1]; - matvec( solnVector_, tVector ); - for ( irow = 0; irow < localNRows; irow++ ) + matvec( solnVector_, tVector ); + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] = rhsVector_[irow] - tVector[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (tVector[irow] * tVector[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -2680,7 +2679,7 @@ int FEI_HYPRE_Impl::solveUsingGMRES() if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl initial rnorm = %e (%e)\n", rnorm, rnorm0); - if ( rnorm0 < 1.0e-20 ) + if ( rnorm0 < 1.0e-20 ) { for (iV = 0; iV <= gmresDim_+1; iV++) delete [] kVectors[iV]; delete [] kVectors; @@ -2707,7 +2706,7 @@ int FEI_HYPRE_Impl::solveUsingGMRES() iter = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { dtemp = 1.0 / rnorm; tVector = kVectors[1]; @@ -2715,8 +2714,8 @@ int FEI_HYPRE_Impl::solveUsingGMRES() RS[1] = rnorm; innerIterations = 0; - while ( innerIterations < gmresDim_ && rnorm >= eps1 && - iter < krylovMaxIterations_ ) + while ( innerIterations < gmresDim_ && rnorm >= eps1 && + iter < krylovMaxIterations_ ) { innerIterations++; iter++; @@ -2725,63 +2724,63 @@ int FEI_HYPRE_Impl::solveUsingGMRES() v1 = kVectors[kStep]; v2 = kVectors[0]; if ( diagonal_ != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) v2[irow] = v1[irow] * diagonal_[irow]; else for (irow = 0; irow < localNRows; irow++) v2[irow] = v1[irow]; - matvec( kVectors[0], kVectors[kp1] ); + matvec( kVectors[0], kVectors[kp1] ); #if 0 tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = 0.0; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector2[irow] * tVector[irow]; dArray[iV-1] = dtemp; } - MPI_Allreduce(dArray, dArray2, kStep, MPI_DOUBLE, MPI_SUM, + MPI_Allreduce(dArray, dArray2, kStep, MPI_DOUBLE, MPI_SUM, mpiComm_); tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = dArray2[iV-1]; - HH[iV][kStep] = dtemp; + HH[iV][kStep] = dtemp; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] -= dtemp * tVector2[irow]; } #else tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = 0.0; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector2[irow] * tVector[irow]; dArray[0] = dtemp; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); dtemp = dArray2[0]; - HH[iV][kStep] = dtemp; - for ( irow = 0; irow < localNRows; irow++ ) + HH[iV][kStep] = dtemp; + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] -= dtemp * tVector2[irow]; } #endif dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector[irow] * tVector[irow]; MPI_Allreduce(&dtemp, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); dtemp = sqrt(dArray2[0]); HH[kp1][kStep] = dtemp; - if ( dtemp != 0.0 ) + if ( dtemp != 0.0 ) { dtemp = 1.0 / dtemp; for (irow = 0; irow < localNRows; irow++) tVector[irow] *= dtemp; } - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { dtemp = HH[iV-1][kStep]; HH[iV-1][kStep] = C[iV-1] * dtemp + S[iV-1] * HH[iV][kStep]; @@ -2794,7 +2793,7 @@ int FEI_HYPRE_Impl::solveUsingGMRES() S[kStep] = HH[kp1][kStep] / gam; RS[kp1] = -S[kStep] * RS[kStep]; RS[kStep] = C[kStep] * RS[kStep]; - HH[kStep][kStep] = C[kStep] * HH[kStep][kStep] + + HH[kStep][kStep] = C[kStep] * HH[kStep][kStep] + S[kStep] * HH[kp1][kStep]; rnorm = habs(RS[kp1]); if ( outputLevel_ >= 2 && mypid_ == 0 ) @@ -2802,42 +2801,42 @@ int FEI_HYPRE_Impl::solveUsingGMRES() iter, rnorm); } RS[kStep] = RS[kStep] / HH[kStep][kStep]; - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { iV2 = kStep - iV + 1; dtemp = RS[iV2]; - for ( jV = iV2+1; jV <= kStep; jV++ ) + for ( jV = iV2+1; jV <= kStep; jV++ ) dtemp = dtemp - HH[iV2][jV] * RS[jV]; RS[iV2] = dtemp / HH[iV2][iV2]; } tVector = kVectors[1]; dtemp = RS[1]; for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] *= dtemp; - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { dtemp = RS[iV]; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] += dtemp * tVector2[irow]; } tVector = kVectors[1]; if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVector[irow] *= diagonal_[irow]; } - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) solnVector_[irow] += tVector[irow]; - matvec( solnVector_, tVector ); - for ( irow = 0; irow < localNRows; irow++ ) + matvec( solnVector_, tVector ); + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] = rhsVector_[irow] - tVector[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += (tVector[irow] * tVector[irow]); MPI_Allreduce(&rnorm, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt(dArray2[0]); } - if ( rnorm < eps1 ) converged = 1; + if ( rnorm < eps1 ) converged = 1; if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl : final rnorm = %e\n", rnorm); @@ -2863,7 +2862,7 @@ int FEI_HYPRE_Impl::solveUsingGMRES() } /************************************************************************** - solve linear system using CGS + solve linear system using CGS -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::solveUsingCGS() { @@ -2881,7 +2880,7 @@ int FEI_HYPRE_Impl::solveUsingCGS() extNRows = numExtNodes_ * nodeDOF_; totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - + /* ----------------------------------------------------------------- * assemble the initial guess vector * -----------------------------------------------------------------*/ @@ -2891,12 +2890,12 @@ int FEI_HYPRE_Impl::solveUsingCGS() /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -2908,7 +2907,7 @@ int FEI_HYPRE_Impl::solveUsingCGS() rnorm = sqrt(dArray2[0]); if ( outputLevel_ >= 1 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl initial rnorm = %e (%e)\n",rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -2938,16 +2937,16 @@ int FEI_HYPRE_Impl::solveUsingCGS() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 2 ) + while ( converged == 0 && numTrials < 2 ) { innerIteration = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter++; innerIteration++; rho1 = rho2; beta2 = beta * beta; - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) { tVec[irow] = beta * qVec[irow]; uVec[irow] = rVec[irow] + tVec[irow]; @@ -2955,28 +2954,28 @@ int FEI_HYPRE_Impl::solveUsingCGS() } if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVec[irow] = pVec[irow] * diagonal_[irow]; } else for (irow = 0; irow < localNRows; irow++) tVec[irow] = pVec[irow]; - matvec( tVec, vVec ); + matvec( tVec, vVec ); sigma = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) sigma += (rhVec[irow] * vVec[irow]); MPI_Allreduce(&sigma, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); sigma = dArray[0]; alpha = rho1 / sigma; - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) { qVec[irow] = uVec[irow] - alpha * vVec[irow]; uVec[irow] += qVec[irow]; } if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { tVec[irow] = uVec[irow] * diagonal_[irow]; solnVector_[irow] += alpha * uVec[irow] * diagonal_[irow]; @@ -2984,19 +2983,19 @@ int FEI_HYPRE_Impl::solveUsingCGS() } else { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { tVec[irow] = uVec[irow]; solnVector_[irow] += alpha * uVec[irow]; } } - matvec( tVec, vVec ); + matvec( tVec, vVec ); - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) rVec[irow] -= alpha * vVec[irow]; dtemp = dtemp2 = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { dtemp += (rVec[irow] * rhVec[irow]); dtemp2 += (rVec[irow] * rVec[irow]); @@ -3011,11 +3010,11 @@ int FEI_HYPRE_Impl::solveUsingCGS() printf("\tFEI_HYPRE_Impl : iteration %d - rnorm = %e (%e)\n", iter, rnorm, eps1); } - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray[0] ); @@ -3045,7 +3044,7 @@ int FEI_HYPRE_Impl::solveUsingCGS() } /************************************************************************** - solve linear system using Bicgstab + solve linear system using Bicgstab -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::solveUsingBicgstab() { @@ -3065,7 +3064,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; vecByteSize = localNRows * sizeof(double); - + /* ----------------------------------------------------------------- * assemble the initial guess vector * -----------------------------------------------------------------*/ @@ -3075,12 +3074,12 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -3092,7 +3091,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() rnorm = sqrt(dArray2[0]); if ( outputLevel_ >= 1 && mypid_ == 0 ) printf("\tFEI_HYPRE_Impl initial rnorm = %e (%e)\n",rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -3112,7 +3111,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() gammapp = new double[blen+1]; mat = new double*[blen+1]; tau = new double*[blen+1]; - for ( iM = 1; iM <= blen; iM++ ) + for ( iM = 1; iM <= blen; iM++ ) { mat[iM] = new double[blen+1]; tau[iM] = new double[blen+1]; @@ -3122,7 +3121,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() tVec = new double[totalNRows]; utVec = new double*[blen+2]; rtVec = new double*[blen+2]; - for ( iM = 0; iM < blen+2; iM++ ) + for ( iM = 0; iM < blen+2; iM++ ) { utVec[iM] = new double[totalNRows]; rtVec[iM] = new double[totalNRows]; @@ -3134,7 +3133,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 2 ) + while ( converged == 0 && numTrials < 2 ) { innerIteration = 0; memcpy( rhVec, rVec, vecByteSize ); @@ -3143,7 +3142,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() memset( utVec[0], 0, vecByteSize ); omega = rho = 1.0; alpha = 0.0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter += blen; innerIteration += blen; @@ -3153,71 +3152,71 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() for ( iM = 0; iM < blen; iM++ ) { dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rhVec[irow] * rtVec[iM+1][irow]); MPI_Allreduce(&dtemp, &rho1, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); beta = alpha * rho1 / rho; rho = rho1; dtemp = -beta; - for ( jM = 0; jM <= iM; jM++ ) - for ( irow = 0; irow < localNRows; irow++ ) - utVec[jM+1][irow] = dtemp * utVec[jM+1][irow] + - rtVec[jM+1][irow]; + for ( jM = 0; jM <= iM; jM++ ) + for ( irow = 0; irow < localNRows; irow++ ) + utVec[jM+1][irow] = dtemp * utVec[jM+1][irow] + + rtVec[jM+1][irow]; if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVec[irow] = utVec[iM+1][irow] * diagonal_[irow]; } else { memcpy( tVec, utVec[iM+1], vecByteSize ); } - matvec( tVec, utVec[iM+2] ); + matvec( tVec, utVec[iM+2] ); dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rhVec[irow] * utVec[iM+2][irow]); MPI_Allreduce(&dtemp, &gamma1, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); - alpha = rho / gamma1; - for ( jM = 0; jM <= iM; jM++ ) - for ( irow = 0; irow < localNRows; irow++ ) - rtVec[jM+1][irow] -= alpha * utVec[jM+2][irow]; + alpha = rho / gamma1; + for ( jM = 0; jM <= iM; jM++ ) + for ( irow = 0; irow < localNRows; irow++ ) + rtVec[jM+1][irow] -= alpha * utVec[jM+2][irow]; if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVec[irow] = rtVec[iM+1][irow] * diagonal_[irow]; } else { memcpy( tVec, rtVec[iM+1], vecByteSize ); } - matvec( tVec, rtVec[iM+2] ); - for (irow = 0; irow < localNRows; irow++) + matvec( tVec, rtVec[iM+2] ); + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += alpha * utVec[1][irow]; } for ( iM = 1; iM <= blen; iM++ ) for ( jM = 1; jM <= blen; jM++ ) mat[iM][jM] = 0.0; for ( iM = 1; iM <= blen; iM++ ) { - for ( jM = 1; jM <= iM-1; jM++ ) + for ( jM = 1; jM <= iM-1; jM++ ) { dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[jM+1][irow] * rtVec[iM+1][irow]); MPI_Allreduce(&dtemp, &dtemp2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); tau[jM][iM] = dtemp2 / sigma[jM]; mat[jM][iM] = tau[jM][iM] * sigma[jM]; dtemp = -tau[jM][iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[iM+1][irow] += dtemp * rtVec[jM+1][irow]; } dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[iM+1][irow] * rtVec[iM+1][irow]); dArray[0] = dtemp; dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[1][irow] * rtVec[iM+1][irow]); dArray[1] = dtemp; MPI_Allreduce(dArray, dArray2, 2, MPI_DOUBLE, MPI_SUM, mpiComm_); @@ -3227,44 +3226,44 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() } gammanp[blen] = gammap[blen]; omega = gammanp[blen]; - for ( iM = blen-1; iM >= 1; iM-- ) + for ( iM = blen-1; iM >= 1; iM-- ) { gammanp[iM] = gammap[iM]; for (jM=iM+1; jM<=blen; jM++) gammanp[iM] = gammanp[iM] - tau[iM][jM] * gammanp[jM]; } - for (iM=1; iM<=blen-1; iM++) + for (iM=1; iM<=blen-1; iM++) { gammapp[iM] = gammanp[iM+1]; for (jM=iM+1; jM<=blen-1; jM++) gammapp[iM] = gammapp[iM] + tau[iM][jM] * gammanp[jM+1]; } dtemp = gammanp[1]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += dtemp * rtVec[1][irow]; dtemp = - gammap[blen]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[1][irow] += dtemp * rtVec[blen+1][irow]; dtemp = - gammanp[blen]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) utVec[1][irow] += dtemp * utVec[blen+1][irow]; - for (iM=1; iM<=blen-1; iM++) + for (iM=1; iM<=blen-1; iM++) { dtemp = - gammanp[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) utVec[1][irow] += dtemp * utVec[iM+1][irow]; dtemp = gammapp[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += dtemp * rtVec[iM+1][irow]; dtemp = - gammap[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[1][irow] += dtemp * rtVec[iM+1][irow]; } memcpy( utVec[0], utVec[1], vecByteSize ); memcpy( rtVec[0], rtVec[1], vecByteSize ); memcpy( solnVector_, xhVec, vecByteSize ); dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[1][irow] * rtVec[1][irow]); MPI_Allreduce(&dtemp, &rnorm, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( rnorm ); @@ -3275,14 +3274,14 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() if ( diagonal_ != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) solnVector_[irow] *= diagonal_[irow]; } - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray[0] ); @@ -3306,7 +3305,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() delete [] gammap; delete [] gammanp; delete [] gammapp; - for ( iM = 1; iM <= blen; iM++ ) + for ( iM = 1; iM <= blen; iM++ ) { delete [] mat[iM]; delete [] tau[iM]; @@ -3317,7 +3316,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() delete [] rhVec; delete [] xhVec; delete [] tVec; - for ( iM = 0; iM < blen+2; iM++ ) + for ( iM = 0; iM < blen+2; iM++ ) { delete [] utVec[iM]; delete [] rtVec[iM]; @@ -3329,7 +3328,7 @@ int FEI_HYPRE_Impl::solveUsingBicgstab() } /************************************************************************** - solve linear system using SuperLU + solve linear system using SuperLU -------------------------------------------------------------------------*/ int FEI_HYPRE_Impl::solveUsingSuperLU() { @@ -3351,7 +3350,7 @@ int FEI_HYPRE_Impl::solveUsingSuperLU() /* --------------------------------------------------------------- * conversion from CSR to CSC * -------------------------------------------------------------*/ - + localNRows = numLocalNodes_ * nodeDOF_; countArray = new int[localNRows]; for ( irow = 0; irow < localNRows; irow++ ) countArray[irow] = 0; @@ -3392,8 +3391,8 @@ int FEI_HYPRE_Impl::solveUsingSuperLU() * make SuperMatrix * -------------------------------------------------------------*/ - dCreate_CompCol_Matrix(&superLU_Amat, localNRows, localNRows, - cscJA[localNRows], cscAA, cscIA, cscJA, SLU_NC, + dCreate_CompCol_Matrix(&superLU_Amat, localNRows, localNRows, + cscJA[localNRows], cscAA, cscIA, cscJA, SLU_NC, SLU_D, SLU_GE); etree = new int[localNRows]; permC = new int[localNRows]; @@ -3428,9 +3427,9 @@ int FEI_HYPRE_Impl::solveUsingSuperLU() * -----------------------------------------------------------*/ solnVector_ = new double[localNRows]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) solnVector_[irow] = rhsVector_[irow]; - dCreate_Dense_Matrix(&B, localNRows, 1, solnVector_, localNRows, + dCreate_Dense_Matrix(&B, localNRows, 1, solnVector_, localNRows, SLU_DN, SLU_D, SLU_GE); /* ------------------------------------------------------------- @@ -3438,14 +3437,14 @@ int FEI_HYPRE_Impl::solveUsingSuperLU() * -----------------------------------------------------------*/ trans = NOTRANS; - dgstrs (trans, &superLU_Lmat, &superLU_Umat, permC, permR, &B, + dgstrs (trans, &superLU_Lmat, &superLU_Umat, permC, permR, &B, &slu_stat, &info); rVec = new double[localNRows]; - matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; rnorm = sqrt( rnorm ); if ( outputLevel_ >= 2 && mypid_ == 0 ) @@ -3474,7 +3473,7 @@ int FEI_HYPRE_Impl::solveUsingSuperLU() return (1); #endif } - + /************************************************************************** matrix vector multiply -------------------------------------------------------------------------*/ @@ -3494,10 +3493,10 @@ void FEI_HYPRE_Impl::matvec(double *xvec, double *yvec) { int matDim = ( numLocalNodes_ + numExtNodes_ ) * nodeDOF_; double ddata; - for ( int iD = 0; iD < matDim; iD++ ) + for ( int iD = 0; iD < matDim; iD++ ) { ddata = 0.0; - for ( int iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) + for ( int iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) ddata += diagAA_[iD2] * xvec[diagJA_[iD2]]; yvec[iD] = ddata; } @@ -3511,10 +3510,10 @@ void FEI_HYPRE_Impl::matvec(double *xvec, double *yvec) { int matDim = ( numLocalNodes_ + numExtNodes_ ) * nodeDOF_; double ddata; - for ( int iD = 0; iD < matDim; iD++ ) + for ( int iD = 0; iD < matDim; iD++ ) { ddata = 0.0; - for ( int iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) + for ( int iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) ddata += offdAA_[iD2] * xvec[offdJA_[iD2]]; yvec[iD] += ddata; } @@ -3528,7 +3527,7 @@ void FEI_HYPRE_Impl::matvec(double *xvec, double *yvec) } /************************************************************************** - form right hand side vector from element load vectors + form right hand side vector from element load vectors -------------------------------------------------------------------------*/ void FEI_HYPRE_Impl::assembleRHSVector() { @@ -3563,7 +3562,7 @@ void FEI_HYPRE_Impl::assembleRHSVector() } /************************************************************************** - form solution vector + form solution vector -------------------------------------------------------------------------*/ void FEI_HYPRE_Impl::assembleSolnVector() { @@ -3617,7 +3616,7 @@ void FEI_HYPRE_Impl::disassembleSolnVector() eqnIndex1 = elemNodeLists[iE][iN] * nodeDOF_; eqnIndex2 = iN * nodeDOF_; for ( iD = 0; iD < nodeDOF_; iD++ ) - solnVectors[iE][eqnIndex2+iD] = solnVector_[eqnIndex1+iD]; + solnVectors[iE][eqnIndex2+iD] = solnVector_[eqnIndex1+iD]; } } } @@ -3750,17 +3749,17 @@ void FEI_HYPRE_Impl::PVectorInterChange( double *dvec ) MPI_Request *requests; MPI_Status status; - if ( nRecvs_ > 0 ) + if ( nRecvs_ > 0 ) { dRecvBufs = new double*[nRecvs_]; requests = new MPI_Request[nRecvs_]; - for ( iP = 0; iP < nRecvs_; iP++ ) + for ( iP = 0; iP < nRecvs_; iP++ ) dRecvBufs[iP] = new double[recvLengs_[iP]*nodeDOF_]; } - if ( nSends_ > 0 ) + if ( nSends_ > 0 ) { dSendBufs = new double*[nSends_]; - for ( iP = 0; iP < nSends_; iP++ ) + for ( iP = 0; iP < nSends_; iP++ ) { dSendBufs[iP] = new double[sendLengs_[iP]*nodeDOF_]; for ( iD = 0; iD < sendLengs_[iP]; iD++ ) @@ -3768,7 +3767,7 @@ void FEI_HYPRE_Impl::PVectorInterChange( double *dvec ) ind1 = sendProcIndices_[iP][iD] * nodeDOF_; ind2 = iD * nodeDOF_; for ( iD2 = 0; iD2 < nodeDOF_; iD2++ ) - dSendBufs[iP][ind2+iD2] = dvec[ind1+iD2]; + dSendBufs[iP][ind2+iD2] = dvec[ind1+iD2]; } } } @@ -3788,12 +3787,12 @@ void FEI_HYPRE_Impl::PVectorInterChange( double *dvec ) ind1 = recvProcIndices_[iP][iD] * nodeDOF_; ind2 = iD * nodeDOF_; for ( iD2 = 0; iD2 < nodeDOF_; iD2++ ) - dvec[ind1+iD2] = dRecvBufs[iP][ind2+iD2]; + dvec[ind1+iD2] = dRecvBufs[iP][ind2+iD2]; } delete [] dRecvBufs[iP]; } if ( nRecvs_ > 0 ) delete [] dRecvBufs; - if ( nSends_ > 0 ) + if ( nSends_ > 0 ) { for ( iP = 0; iP < nSends_; iP++ ) delete [] dSendBufs[iP]; delete [] dSendBufs; @@ -3810,17 +3809,17 @@ void FEI_HYPRE_Impl::PVectorReverseChange( double *dvec ) MPI_Request *requests; MPI_Status status; - if ( nSends_ > 0 ) + if ( nSends_ > 0 ) { dRecvBufs = new double*[nSends_]; requests = new MPI_Request[nSends_]; - for ( iP = 0; iP < nSends_; iP++ ) + for ( iP = 0; iP < nSends_; iP++ ) dRecvBufs[iP] = new double[sendLengs_[iP]*nodeDOF_]; } - if ( nRecvs_ > 0 ) + if ( nRecvs_ > 0 ) { dSendBufs = new double*[nRecvs_]; - for ( iP = 0; iP < nRecvs_; iP++ ) + for ( iP = 0; iP < nRecvs_; iP++ ) { dSendBufs[iP] = new double[recvLengs_[iP]*nodeDOF_]; for ( iD = 0; iD < recvLengs_[iP]; iD++ ) @@ -3828,7 +3827,7 @@ void FEI_HYPRE_Impl::PVectorReverseChange( double *dvec ) ind1 = recvProcIndices_[iP][iD] * nodeDOF_; ind2 = iD * nodeDOF_; for ( iD2 = 0; iD2 < nodeDOF_; iD2++ ) - dSendBufs[iP][ind2+iD2] = dvec[ind1+iD2]; + dSendBufs[iP][ind2+iD2] = dvec[ind1+iD2]; } } } @@ -3848,12 +3847,12 @@ void FEI_HYPRE_Impl::PVectorReverseChange( double *dvec ) ind1 = sendProcIndices_[iP][iD] * nodeDOF_; ind2 = iD * nodeDOF_; for ( iD2 = 0; iD2 < nodeDOF_; iD2++ ) - dvec[ind1+iD2] += dRecvBufs[iP][ind2+iD2]; + dvec[ind1+iD2] += dRecvBufs[iP][ind2+iD2]; } delete [] dRecvBufs[iP]; } if ( nSends_ > 0 ) delete [] dRecvBufs; - if ( nRecvs_ > 0 ) + if ( nRecvs_ > 0 ) { for ( iP = 0; iP < nRecvs_; iP++ ) delete [] dSendBufs[iP]; delete [] dSendBufs; @@ -3880,16 +3879,16 @@ void FEI_HYPRE_Impl::printLinearSystem() { for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) if ( diagJA_[iD2] == iD ) - fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, + fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, diagJA_[iD2]+1+offset, diagAA_[iD2]); for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) if ( diagJA_[iD2] != iD ) - fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, + fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, diagJA_[iD2]+1+offset, diagAA_[iD2]); if ( offdIA_ != NULL ) { for ( iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) - fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, + fprintf(fp,"%6d %6d %25.16e \n", iD+1+offset, nodeExtNewGlobalIDs_[offdJA_[iD2]-iEnd]+1,offdAA_[iD2]); } } @@ -3920,7 +3919,7 @@ void FEI_HYPRE_Impl::printLinearSystem() fprintf(fp, "%6d \n", iEnd); for ( iD = 0; iD < iEnd; iD++ ) { - fprintf(fp,"%6d %25.16e \n", iD+1+offset, rhsVector_[iD]); + fprintf(fp,"%6d %25.16e \n", iD+1+offset, rhsVector_[iD]); } iBegin = numLocalNodes_ * nodeDOF_; iEnd = (numLocalNodes_ + numExtNodes_ ) * nodeDOF_; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSC_aux.cxx b/src/FEI_mv/fei-hypre/HYPRE_LSC_aux.cxx index f06135f7b..1e5a3f8e8 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSC_aux.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_LSC_aux.cxx @@ -20,7 +20,6 @@ #include #include #include -#include #include #if 0 /* RDF: Not sure this is really needed */ @@ -29,7 +28,7 @@ #endif #endif -//#define HAVE_SYSPDE +//#define HAVE_SYSPDE //--------------------------------------------------------------------------- // HYPRE include files @@ -82,7 +81,7 @@ extern "C" { /*-------------------------------------------------------------------------* - * ML functions + * ML functions *-------------------------------------------------------------------------*/ #ifdef HAVE_ML @@ -105,7 +104,7 @@ extern "C" { #endif /*-------------------------------------------------------------------------* - * MLMaxwell functions + * MLMaxwell functions *-------------------------------------------------------------------------*/ #ifdef HAVE_MLMAXWELL @@ -121,7 +120,7 @@ extern "C" { #endif /*-------------------------------------------------------------------------* - * other functions + * other functions *-------------------------------------------------------------------------*/ void hypre_qsort1(int *, double *, int, int); @@ -134,7 +133,7 @@ extern "C" { int*,double*); /*-------------------------------------------------------------------------* - * Y12 functions (obsolete) + * Y12 functions (obsolete) *-------------------------------------------------------------------------*/ #ifdef Y12M @@ -187,7 +186,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) if ( !strcmp(param1, "preconditioner") && (!precon_override) ) { sscanf(params[i],"%s %s %s", param, param2, param3); - if ( strcmp(param2, "reuse") ) + if ( strcmp(param2, "reuse") ) { precon_index = i; if (!strcmp(param3, "override")) precon_override = 1; @@ -227,9 +226,9 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) for ( i = 0; i < numParams; i++ ) { sscanf(params[i],"%s", param1); - + //---------------------------------------------------------------- - // help menu + // help menu //---------------------------------------------------------------- recognized = 1; @@ -332,7 +331,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // turn on memory optimizer + // turn on memory optimizer //---------------------------------------------------------------- else if ( !strcmp(param1, "optimizeMemory") ) @@ -405,35 +404,35 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) else if ( !strcmp(param1, "setDebug") ) { sscanf(params[i],"%s %s", param, param2); - if (!strcmp(param2, "slideReduction1")) + if (!strcmp(param2, "slideReduction1")) HYOutputLevel_ |= HYFEI_SLIDEREDUCE1; - else if (!strcmp(param2, "slideReduction2")) + else if (!strcmp(param2, "slideReduction2")) HYOutputLevel_ |= HYFEI_SLIDEREDUCE2; - else if (!strcmp(param2, "slideReduction3")) + else if (!strcmp(param2, "slideReduction3")) HYOutputLevel_ |= HYFEI_SLIDEREDUCE3; - else if (!strcmp(param2, "schurReduction1")) + else if (!strcmp(param2, "schurReduction1")) HYOutputLevel_ |= HYFEI_SCHURREDUCE1; - else if (!strcmp(param2, "schurReduction2")) + else if (!strcmp(param2, "schurReduction2")) HYOutputLevel_ |= HYFEI_SCHURREDUCE2; - else if (!strcmp(param2, "schurReduction3")) + else if (!strcmp(param2, "schurReduction3")) HYOutputLevel_ |= HYFEI_SCHURREDUCE3; - else if (!strcmp(param2, "amgDebug")) + else if (!strcmp(param2, "amgDebug")) HYOutputLevel_ |= HYFEI_AMGDEBUG; - else if (!strcmp(param2, "printMat")) + else if (!strcmp(param2, "printMat")) HYOutputLevel_ |= HYFEI_PRINTMAT; - else if (!strcmp(param2, "printSol")) + else if (!strcmp(param2, "printSol")) HYOutputLevel_ |= HYFEI_PRINTSOL; - else if (!strcmp(param2, "printReducedMat")) + else if (!strcmp(param2, "printReducedMat")) HYOutputLevel_ |= HYFEI_PRINTREDMAT; - else if (!strcmp(param2, "printParCSRMat")) + else if (!strcmp(param2, "printParCSRMat")) HYOutputLevel_ |= HYFEI_PRINTPARCSRMAT; - else if (!strcmp(param2, "printFEInfo")) + else if (!strcmp(param2, "printFEInfo")) HYOutputLevel_ |= HYFEI_PRINTFEINFO; - else if (!strcmp(param2, "ddilut")) + else if (!strcmp(param2, "ddilut")) HYOutputLevel_ |= HYFEI_DDILUT; - else if (!strcmp(param2, "stopAfterPrint")) + else if (!strcmp(param2, "stopAfterPrint")) HYOutputLevel_ |= HYFEI_STOPAFTERPRINT; - else if (!strcmp(param2, "off")) + else if (!strcmp(param2, "off")) HYOutputLevel_ = 0; if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 && mypid_ == 0 ) printf(" HYPRE_LSC::parameters setDebug %s.\n", param2); @@ -480,7 +479,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // perform slide reduction + // perform slide reduction //---------------------------------------------------------------- else if ( !strcmp(param1, "slideReduction") ) @@ -521,21 +520,21 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // perform A-conjugate projection + // perform A-conjugate projection //---------------------------------------------------------------- else if ( !strcmp(param1, "AConjugateProjection") ) { - if ( HYpbs_ != NULL ) + if ( HYpbs_ != NULL ) { - for ( k = 0; k <= projectSize_; k++ ) + for ( k = 0; k <= projectSize_; k++ ) if ( HYpbs_[k] != NULL ) HYPRE_IJVectorDestroy(HYpbs_[k]); delete [] HYpbs_; HYpbs_ = NULL; } - if ( HYpxs_ != NULL ) + if ( HYpxs_ != NULL ) { - for ( k = 0; k <= projectSize_; k++ ) + for ( k = 0; k <= projectSize_; k++ ) if ( HYpxs_[k] != NULL ) HYPRE_IJVectorDestroy(HYpxs_[k]); delete [] HYpxs_; HYpxs_ = NULL; @@ -549,21 +548,21 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // perform minimal residual projection + // perform minimal residual projection //---------------------------------------------------------------- else if ( !strcmp(param1, "minResProjection") ) { - if ( HYpbs_ != NULL ) + if ( HYpbs_ != NULL ) { - for ( k = 0; k <= projectSize_; k++ ) + for ( k = 0; k <= projectSize_; k++ ) if ( HYpbs_[k] != NULL ) HYPRE_IJVectorDestroy(HYpbs_[k]); delete [] HYpbs_; HYpbs_ = NULL; } - if ( HYpxs_ != NULL ) + if ( HYpxs_ != NULL ) { - for ( k = 0; k <= projectSize_; k++ ) + for ( k = 0; k <= projectSize_; k++ ) if ( HYpxs_[k] != NULL ) HYPRE_IJVectorDestroy(HYpxs_[k]); delete [] HYpxs_; HYpxs_ = NULL; @@ -611,7 +610,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // for GMRES, the convergence criterion + // for GMRES, the convergence criterion //---------------------------------------------------------------- else if ( !strcmp(param1, "gmresStopCrit") ) @@ -619,7 +618,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) sscanf(params[i],"%s %s", param, param2); if ( !strcmp(param2, "absolute" ) ) normAbsRel_ = 1; else if ( !strcmp(param2, "relative" ) ) normAbsRel_ = 0; - else normAbsRel_ = 0; + else normAbsRel_ = 0; if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 && mypid_ == 0 ) printf(" HYPRE_LSC::parameters gmresStopCrit = %s\n", param2); @@ -630,15 +629,15 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) sscanf(params[i],"%s %s", param, param2); if ( !strcmp(param2, "absolute") ) normAbsRel_ = 1; else if ( !strcmp(param2, "relative") ) normAbsRel_ = 0; - else normAbsRel_ = 0; - + else normAbsRel_ = 0; + if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 && mypid_ == 0 ) printf(" HYPRE_LSC::parameters stopCrit = %s\n", param2); } //---------------------------------------------------------------- - // for PCG only + // for PCG only //---------------------------------------------------------------- else if ( !strcmp(param1, "pcgRecomputeResidual") ) @@ -806,7 +805,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // Schwarz preconditioner : Fillin + // Schwarz preconditioner : Fillin //---------------------------------------------------------------- else if ( !strcmp(param1, "schwarzFillin") ) @@ -819,7 +818,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // Schwarz preconditioner : block size + // Schwarz preconditioner : block size //---------------------------------------------------------------- else if ( !strcmp(param1, "schwarzNBlocks") ) @@ -832,7 +831,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // Schwarz preconditioner : block size + // Schwarz preconditioner : block size //---------------------------------------------------------------- else if ( !strcmp(param1, "schwarzBlockSize") ) @@ -890,7 +889,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) else recognized = 0; //---------------------------------------------------------------- - // amg preconditoner : coarsening type + // amg preconditoner : coarsening type //---------------------------------------------------------------- if (!recognized) @@ -906,7 +905,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // amg preconditoner : coarsening type + // amg preconditoner : coarsening type //---------------------------------------------------------------- else if ( !strcmp(param1, "amgCoarsenType") ) @@ -925,7 +924,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // amg preconditoner : measure + // amg preconditoner : measure //---------------------------------------------------------------- else if ( !strcmp(param1, "amgMeasureType") ) @@ -961,16 +960,16 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) { sscanf(params[i],"%s %s", param, param2); if ( !strcmp(param2, "jacobi" ) ) rtype = 0; - else if ( !strcmp(param2, "CFjacobi" ) ) + else if ( !strcmp(param2, "CFjacobi" ) ) {rtype = 0; amgGridRlxType_ = 1;} else if ( !strcmp(param2, "gsSlow") ) rtype = 1; else if ( !strcmp(param2, "gsFast") ) rtype = 4; else if ( !strcmp(param2, "hybrid" ) ) rtype = 3; - else if ( !strcmp(param2, "CFhybrid" ) ) + else if ( !strcmp(param2, "CFhybrid" ) ) {rtype = 3; amgGridRlxType_ = 1;} else if ( !strcmp(param2, "hybridsym" ) ) rtype = 6; else if ( !strcmp(param2, "l1gs" ) ) rtype = 8; - else if ( !strcmp(param2, "CFhybridsym" ) ) + else if ( !strcmp(param2, "CFhybridsym" ) ) {rtype = 6; amgGridRlxType_ = 1;} else rtype = 4; for ( k = 0; k < 3; k++ ) amgRelaxType_[k] = rtype; @@ -1042,7 +1041,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // amg preconditoner : choose max iterations + // amg preconditoner : choose max iterations //--------------------------------------------------------------- else if ( !strcmp(param1, "amgMaxIterations") ) @@ -1261,7 +1260,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // parasails preconditoner : symmetry flag (1 - symm, 0 - nonsym) + // parasails preconditoner : symmetry flag (1 - symm, 0 - nonsym) //--------------------------------------------------------------- else if ( !strcmp(param1, "parasailsSymmetric") ) @@ -1293,7 +1292,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // Euclid preconditoner : fill-in + // Euclid preconditoner : fill-in //--------------------------------------------------------------- if (!recognized) @@ -1310,7 +1309,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // Euclid preconditoner : threshold + // Euclid preconditoner : threshold //--------------------------------------------------------------- else if ( !strcmp(param1, "euclidThreshold") ) @@ -1324,13 +1323,13 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // block preconditoner (hold this until this end) + // block preconditoner (hold this until this end) //--------------------------------------------------------------- else if ( !strcmp(param1, "blockP") ) { if ( HYPreconID_ == HYBLOCK ) - HYPRE_LSI_BlockPrecondSetParams(HYPrecon_, params[i]); + HYPRE_LSI_BlockPrecondSetParams(HYPrecon_, params[i]); } //--------------------------------------------------------------- @@ -1367,7 +1366,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) { #ifdef HAVE_MLI if ( HYPreconID_ == HYMLI ) - HYPRE_LSI_MLISetParams(HYPrecon_, params[i]); + HYPRE_LSI_MLISetParams(HYPrecon_, params[i]); #else // if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 2 && mypid_ == 0 ) // printf(" HYPRE_LSC::MLI SetParams - MLI unavailable.\n"); @@ -1375,13 +1374,13 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //---------------------------------------------------------------- - // for Uzawa, the various parameters + // for Uzawa, the various parameters //---------------------------------------------------------------- else if ( !strcmp(param1, "Uzawa") ) { if ( HYPreconID_ == HYUZAWA ) - HYPRE_LSI_UzawaSetParams(HYPrecon_, params[i]); + HYPRE_LSI_UzawaSetParams(HYPrecon_, params[i]); } else recognized = 0; @@ -1682,7 +1681,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // ams preconditoner : print level + // ams preconditoner : print level //--------------------------------------------------------------- else if (!strcmp(param1, "amsPrintLevel")) @@ -1697,7 +1696,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) //---------------------------------------------------------------- // amg preconditoner : alpha coarsening type //---------------------------------------------------------------- - + else if ( !strcmp(param1, "amsAlphaCoarsenType") ) { sscanf(params[i],"%s %s", param, param2); @@ -1712,7 +1711,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) printf(" HYPRE_LSC::parameters amsAlphaCoarsenType = %s\n", param2); } - + //---------------------------------------------------------------- // amg preconditoner : coarsening type //---------------------------------------------------------------- @@ -1888,7 +1887,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) } //--------------------------------------------------------------- - // error + // error //--------------------------------------------------------------- if (!recognized) @@ -1903,7 +1902,7 @@ int HYPRE_LinSysCore::parameters(int numParams, char **params) // if reuse is requested, set preconditioner reuse flag //------------------------------------------------------------------- - if ( reuse == 1 ) HYPreconReuse_ = 1; + if ( reuse == 1 ) HYPreconReuse_ = 1; if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) printf("%4d : HYPRE_LSC::leaving parameters function.\n",mypid_); return(0); @@ -2726,14 +2725,14 @@ void HYPRE_LinSysCore::setupFGMRESPrecon() case HYBLOCK : if ( HYPreconReuse_ == 1 && HYPreconSetup_ == 1 ) - HYPRE_ParCSRFGMRESSetPrecond(HYSolver_, - HYPRE_LSI_BlockPrecondSolve, HYPRE_DummyFunction, + HYPRE_ParCSRFGMRESSetPrecond(HYSolver_, + HYPRE_LSI_BlockPrecondSolve, HYPRE_DummyFunction, HYPrecon_); else { setupPreconBlock(); - HYPRE_ParCSRFGMRESSetPrecond(HYSolver_, - HYPRE_LSI_BlockPrecondSolve, + HYPRE_ParCSRFGMRESSetPrecond(HYSolver_, + HYPRE_LSI_BlockPrecondSolve, HYPRE_LSI_BlockPrecondSetup, HYPrecon_); HYPreconSetup_ = 1; } @@ -2955,7 +2954,7 @@ void HYPRE_LinSysCore::setupBiCGSTABPrecon() case HYPARASAILS : if ( HYPreconReuse_ == 1 && HYPreconSetup_ == 1 ) - HYPRE_ParCSRBiCGSTABSetPrecond(HYSolver_, + HYPRE_ParCSRBiCGSTABSetPrecond(HYSolver_, HYPRE_ParCSRParaSailsSolve, HYPRE_DummyFunction, HYPrecon_); else @@ -2963,7 +2962,7 @@ void HYPRE_LinSysCore::setupBiCGSTABPrecon() setupPreconParaSails(); HYPRE_ParCSRBiCGSTABSetPrecond(HYSolver_, HYPRE_ParCSRParaSailsSolve, - HYPRE_ParCSRParaSailsSetup, + HYPRE_ParCSRParaSailsSetup, HYPrecon_); HYPreconSetup_ = 1; } @@ -3207,15 +3206,15 @@ void HYPRE_LinSysCore::setupBiCGSTABLPrecon() case HYPARASAILS : if ( HYPreconReuse_ == 1 && HYPreconSetup_ == 1 ) - HYPRE_ParCSRBiCGSTABLSetPrecond(HYSolver_, + HYPRE_ParCSRBiCGSTABLSetPrecond(HYSolver_, HYPRE_ParCSRParaSailsSolve, HYPRE_DummyFunction, HYPrecon_); else { setupPreconParaSails(); - HYPRE_ParCSRBiCGSTABLSetPrecond(HYSolver_, + HYPRE_ParCSRBiCGSTABLSetPrecond(HYSolver_, HYPRE_ParCSRParaSailsSolve, - HYPRE_ParCSRParaSailsSetup, + HYPRE_ParCSRParaSailsSetup, HYPrecon_); HYPreconSetup_ = 1; } @@ -3242,7 +3241,7 @@ void HYPRE_LinSysCore::setupBiCGSTABLPrecon() { setupPreconEuclid(); HYPRE_ParCSRBiCGSTABLSetPrecond(HYSolver_,HYPRE_EuclidSolve, - HYPRE_EuclidSetup, + HYPRE_EuclidSetup, HYPrecon_); HYPreconSetup_ = 1; } @@ -3967,14 +3966,14 @@ void HYPRE_LinSysCore::setupSymQMRPrecon() case HYBLOCK : if ( HYPreconReuse_ == 1 && HYPreconSetup_ == 1 ) - HYPRE_ParCSRSymQMRSetPrecond(HYSolver_, - HYPRE_LSI_BlockPrecondSolve, HYPRE_DummyFunction, + HYPRE_ParCSRSymQMRSetPrecond(HYSolver_, + HYPRE_LSI_BlockPrecondSolve, HYPRE_DummyFunction, HYPrecon_); else { setupPreconBlock(); - HYPRE_ParCSRSymQMRSetPrecond(HYSolver_, - HYPRE_LSI_BlockPrecondSolve, + HYPRE_ParCSRSymQMRSetPrecond(HYSolver_, + HYPRE_LSI_BlockPrecondSolve, HYPRE_LSI_BlockPrecondSetup, HYPrecon_); HYPreconSetup_ = 1; } @@ -4123,16 +4122,16 @@ void HYPRE_LinSysCore::setupPreconBoomerAMG() for ( i = 0; i < amgMaxLevels_; i++ ) relax_omega[i] = amgRelaxOmega_[i]; HYPRE_BoomerAMGSetOmega(HYPrecon_, relax_omega); - if (amgGridRlxType_) + if (amgGridRlxType_) { relax_points = hypre_CTAlloc(int*,4,HYPRE_MEMORY_HOST); relax_points[0] = hypre_CTAlloc(int,num_sweeps[0],HYPRE_MEMORY_HOST); for ( j = 0; j < num_sweeps[0]; j++ ) relax_points[0][j] = 0; relax_points[1] = hypre_CTAlloc(int,2*num_sweeps[1],HYPRE_MEMORY_HOST); - for ( j = 0; j < num_sweeps[1]; j+=2 ) + for ( j = 0; j < num_sweeps[1]; j+=2 ) {relax_points[1][j] = -1;relax_points[1][j+1] = 1;} relax_points[2] = hypre_CTAlloc(int,2*num_sweeps[2],HYPRE_MEMORY_HOST); - for ( j = 0; j < num_sweeps[2]; j+=2 ) + for ( j = 0; j < num_sweeps[2]; j+=2 ) {relax_points[2][j] = -1;relax_points[2][j+1] = 1;} relax_points[3] = hypre_CTAlloc(int,num_sweeps[3],HYPRE_MEMORY_HOST); for ( j = 0; j < num_sweeps[3]; j++ ) relax_points[3][j] = 0; @@ -4140,7 +4139,7 @@ void HYPRE_LinSysCore::setupPreconBoomerAMG() else { relax_points = hypre_CTAlloc(int*,4,HYPRE_MEMORY_HOST); - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 4; i++ ) { relax_points[i] = hypre_CTAlloc(int,num_sweeps[i],HYPRE_MEMORY_HOST); for ( j = 0; j < num_sweeps[i]; j++ ) relax_points[i][j] = 0; @@ -4222,7 +4221,7 @@ void HYPRE_LinSysCore::setupPreconMLMaxwell() HYPRE_IJMatrixGetObject(currA_, (void **) &A_csr); hypre_BoomerAMGBuildCoarseOperator((hypre_ParCSRMatrix *) maxwellGEN_, (hypre_ParCSRMatrix *) A_csr, - (hypre_ParCSRMatrix *) maxwellGEN_, + (hypre_ParCSRMatrix *) maxwellGEN_, (hypre_ParCSRMatrix **) &maxwellANN_); } HYPRE_LSI_MLMaxwellSetANNMatrix(HYPrecon_,maxwellANN_); @@ -4253,7 +4252,7 @@ void HYPRE_LinSysCore::setupPreconAMS() if (amsBetaPoisson_ != NULL) HYPRE_AMSSetBetaPoissonMatrix(HYPrecon_, amsBetaPoisson_); - HYPRE_AMSSetAlphaAMGOptions(HYPrecon_, amsAlphaCoarsenType_, + HYPRE_AMSSetAlphaAMGOptions(HYPrecon_, amsAlphaCoarsenType_, amsAlphaAggLevels_, amsAlphaRelaxType_, amsAlphaStrengthThresh_, amsAlphaInterpType_, amsAlphaPmax_); HYPRE_AMSSetBetaAMGOptions(HYPrecon_, amsBetaCoarsenType_, @@ -4418,7 +4417,7 @@ void HYPRE_LinSysCore::setupPreconParaSails() if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 1 ) HYPRE_ParCSRParaSailsSetLogging(HYPrecon_, 1); HYPRE_ParCSRParaSailsSetSym(HYPrecon_,parasailsSym_); - HYPRE_ParCSRParaSailsSetParams(HYPrecon_, parasailsThreshold_, + HYPRE_ParCSRParaSailsSetParams(HYPrecon_, parasailsThreshold_, parasailsNlevels_); HYPRE_ParCSRParaSailsSetFilter(HYPrecon_, parasailsFilter_); HYPRE_ParCSRParaSailsSetLoadbal(HYPrecon_, parasailsLoadbal_); @@ -4434,7 +4433,7 @@ void HYPRE_LinSysCore::setupPreconEuclid() if ((HYOutputLevel_ & HYFEI_SPECIALMASK) >= 1 && mypid_ == 0) { for ( int i = 0; i < euclidargc_; i++ ) - printf("Euclid parameter : %s %s\n", euclidargv_[2*i], + printf("Euclid parameter : %s %s\n", euclidargv_[2*i], euclidargv_[2*i+1]); } HYPRE_EuclidSetParams(HYPrecon_,euclidargc_*2,euclidargv_); @@ -4541,7 +4540,7 @@ void HYPRE_LinSysCore::solveUsingBoomeramg(int& status) HYPRE_BoomerAMGSetOmega(HYPrecon_, relax_omega); relax_points = hypre_CTAlloc(int*,4,HYPRE_MEMORY_HOST); - for ( i = 0; i < 4; i++ ) + for ( i = 0; i < 4; i++ ) { relax_points[i] = hypre_CTAlloc(int,num_sweeps[i],HYPRE_MEMORY_HOST); for ( j = 0; j < num_sweeps[i]; j++ ) relax_points[i][j] = 0; @@ -4643,7 +4642,7 @@ double HYPRE_LinSysCore::solveUsingSuperLU(int& status) // need to construct a CSR matrix, and the column indices should // have been stored in colIndices and rowLengths //------------------------------------------------------------------- - + if ( localStartRow_ != 1 ) { printf("solveUsingSuperLU ERROR - row does not start at 1\n"); @@ -4688,13 +4687,13 @@ double HYPRE_LinSysCore::solveUsingSuperLU(int& status) ierr = HYPRE_IJVectorGetValues(currB_, nrows, ind_array, rhs); - assert(!ierr); + hypre_assert(!ierr); dCreate_Dense_Matrix(&B, nrows, 1, rhs, nrows, SLU_DN, SLU_D, SLU_GE); //------------------------------------------------------------------- // set up the rest and solve (permc_spec=0 : natural ordering) //------------------------------------------------------------------- - + perm_r = new int[nrows]; perm_c = new int[nrows]; permc_spec = superluOrdering_; @@ -4711,7 +4710,7 @@ double HYPRE_LinSysCore::solveUsingSuperLU(int& status) // postprocessing of the return status information //------------------------------------------------------------------- - if ( info == 0 ) + if ( info == 0 ) { status = 1; Lstore = (SCformat *) L.Store; @@ -4721,9 +4720,9 @@ double HYPRE_LinSysCore::solveUsingSuperLU(int& status) printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("SuperLU : NNZ in L+U = %d\n",Lstore->nnz+Ustore->nnz-nrows); - } + } } - else + else { status = 0; printf("HYPRE_LinSysCore::solveUsingSuperLU - dgssv error = %d\n",info); @@ -4738,33 +4737,33 @@ double HYPRE_LinSysCore::solveUsingSuperLU(int& status) soln = (double *) ((DNformat *) B.Store)->nzval; ierr = HYPRE_IJVectorSetValues(currX_, nrows, (const int *) ind_array, (const double *) soln); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(currX_, (void **) &x_csr); HYPRE_IJVectorGetObject(currB_, (void **) &b_csr); HYPRE_IJVectorGetObject(currR_, (void **) &r_csr); ierr = HYPRE_ParVectorCopy( b_csr, r_csr ); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, r_csr ); ierr = HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); - assert(!ierr); + hypre_assert(!ierr); rnorm = sqrt( rnorm ); if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 2 ) printf("HYPRE_LSC::solveUsingSuperLU - FINAL NORM = %e.\n",rnorm); } //------------------------------------------------------------------- - // clean up + // clean up //------------------------------------------------------------------- - delete [] ind_array; - delete [] rhs; - delete [] perm_c; - delete [] perm_r; - delete [] new_ia; - delete [] new_ja; - delete [] new_a; + delete [] ind_array; + delete [] rhs; + delete [] perm_c; + delete [] perm_r; + delete [] new_ia; + delete [] new_ja; + delete [] new_a; Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); SUPERLU_FREE( A2.Store ); @@ -4830,7 +4829,7 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) // need to construct a CSR matrix, and the column indices should // have been stored in colIndices and rowLengths //------------------------------------------------------------------- - + if ( localStartRow_ != 1 ) { printf("solveUsingSuperLUX ERROR - row not start at 1\n"); @@ -4855,7 +4854,7 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); nnz += rowSize; HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - } + } new_ia = new int[nrows+1]; new_ja = new int[nnz]; @@ -4874,7 +4873,7 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) rhs = new double[nrows]; ierr = HYPRE_IJVectorGetValues(currB_, nrows, ind_array, rhs); - assert(!ierr); + hypre_assert(!ierr); dCreate_Dense_Matrix(&B, nrows, 1, rhs, nrows, SLU_DN, SLU_D, SLU_GE); soln = new double[nrows]; @@ -4884,7 +4883,7 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) //------------------------------------------------------------------- // set up the other parameters (permc_spec=0 : natural ordering) //------------------------------------------------------------------- - + perm_r = new int[nrows]; for ( i = 0; i < nrows; i++ ) perm_r[i] = 0; perm_c = new int[nrows]; @@ -4914,17 +4913,17 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) //------------------------------------------------------------------- // dgssvx(&slu_options, &A2, perm_c, perm_r, etree, -// equed, R, C, &L, &U, work, lwork, &B, &X, +// equed, R, C, &L, &U, work, lwork, &B, &X, // &rpg, &rcond, ferr, berr, &mem_usage, &slu_stat, &info); dgssvx(&slu_options, &A2, perm_c, perm_r, etree, - equed, R, C, &L, &U, work, lwork, &B, &X, + equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &slu_stat, &info); //------------------------------------------------------------------- // print SuperLU internal information at the first step //------------------------------------------------------------------- - - if ( info == 0 || info == nrows+1 ) + + if ( info == 0 || info == nrows+1 ) { status = 1; Lstore = (SCformat *) L.Store; @@ -4942,8 +4941,8 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("SuperLUX : NNZ in L+U = %d\n", Lstore->nnz+Ustore->nnz-nrows); } - } - else + } + else { printf("solveUsingSuperLUX - dgssvx error code = %d\n",info); status = 0; @@ -4959,31 +4958,31 @@ double HYPRE_LinSysCore::solveUsingSuperLUX(int& status) ierr = HYPRE_IJVectorSetValues(currX_, nrows, (const int *) ind_array, (const double *) sol2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(currX_, (void **) &x_csr); HYPRE_IJVectorGetObject(currR_, (void **) &r_csr); HYPRE_IJVectorGetObject(currB_, (void **) &b_csr); ierr = HYPRE_ParVectorCopy( b_csr, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); - assert(!ierr); + hypre_assert(!ierr); rnorm = sqrt( rnorm ); if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 2 ) printf("HYPRE_LSC::solveUsingSuperLUX - FINAL NORM = %e.\n",rnorm); } //------------------------------------------------------------------- - // clean up + // clean up //------------------------------------------------------------------- - delete [] ind_array; - delete [] perm_c; - delete [] perm_r; - delete [] etree; - delete [] rhs; + delete [] ind_array; + delete [] perm_c; + delete [] perm_r; + delete [] etree; + delete [] rhs; delete [] soln; delete [] new_ia; delete [] new_ja; @@ -5025,17 +5024,17 @@ double HYPRE_LinSysCore::solveUsingDSuperLU(int& status) HYPRE_IJVectorGetObject(currB_, (void **) &b_csr); HYPRE_IJVectorGetObject(currR_, (void **) &r_csr); - HYPRE_LSI_DSuperLUCreate(comm_, &HYSolver_); + HYPRE_LSI_DSuperLUCreate(comm_, &HYSolver_); HYPRE_LSI_DSuperLUSetOutputLevel(HYSolver_, HYOutputLevel_); HYPRE_LSI_DSuperLUSetup(HYSolver_, A_csr, b_csr, x_csr); HYPRE_LSI_DSuperLUSolve(HYSolver_, A_csr, b_csr, x_csr); HYPRE_LSI_DSuperLUDestroy(HYSolver_); ierr = HYPRE_ParVectorCopy( b_csr, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); - assert(!ierr); + hypre_assert(!ierr); rnorm = sqrt( rnorm ); //if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 1 ) // printf("HYPRE_LSC::solveUsingDSuperLU - FINAL NORM = %e.\n",rnorm); @@ -5078,24 +5077,24 @@ void HYPRE_LinSysCore::solveUsingY12M(int& status) // need to construct a CSR matrix, and the column indices should // have been stored in colIndices and rowLengths //------------------------------------------------------------------- - + if ( localStartRow_ != 1 ) { printf("solveUsingY12M ERROR - row does not start at 1.\n"); status = -1; return; } - if (slideReduction_ == 1) + if (slideReduction_ == 1) nrows = localEndRow_ - 2 * nConstraints_; - else if (slideReduction_ == 2 || slideReduction_ == 3) + else if (slideReduction_ == 2 || slideReduction_ == 3) nrows = localEndRow_ - nConstraints_; - else if (schurReduction_ == 1) + else if (schurReduction_ == 1) nrows = localEndRow_ - localStartRow_ + 1 - A21NRows_; else nrows = localEndRow_; colLengths = new int[nrows]; for ( i = 0; i < nrows; i++ ) colLengths[i] = 0; - + maxRowSize = 0; HYPRE_IJMatrixGetObject(currA_, (void**) &A_csr); @@ -5103,10 +5102,10 @@ void HYPRE_LinSysCore::solveUsingY12M(int& status) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); maxRowSize = ( rowSize > maxRowSize ) ? rowSize : maxRowSize; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) if ( colVal[j] != 0.0 ) colLengths[colInd[j]]++; HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - } + } nnz = 0; for ( i = 0; i < nrows; i++ ) nnz += colLengths[i]; @@ -5134,7 +5133,7 @@ void HYPRE_LinSysCore::solveUsingY12M(int& status) } } HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - } + } nnz = nz_ptr; @@ -5156,7 +5155,7 @@ void HYPRE_LinSysCore::solveUsingY12M(int& status) rhs = new double[nrows]; ierr = HYPRE_IJVectorGetValues(currB_, nrows, ind_array, rhs); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------- // call Y12M to solve the linear system @@ -5177,33 +5176,33 @@ void HYPRE_LinSysCore::solveUsingY12M(int& status) { ierr = HYPRE_IJVectorSetValues(currX_, nrows, (const int *) &ind_array, (const double *) rhs); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(currX_, (void**) &x_csr); HYPRE_IJVectorGetObject(currR_, (void**) &r_csr); HYPRE_IJVectorGetObject(currB_, (void**) &b_csr); ierr = HYPRE_ParVectorCopy( b_csr, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); - assert(!ierr); + hypre_assert(!ierr); rnorm = sqrt( rnorm ); if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 1 ) printf("HYPRE_LSC::solveUsingY12M - final norm = %e.\n", rnorm); } //------------------------------------------------------------------- - // clean up + // clean up //------------------------------------------------------------------- - delete [] ind_array; - delete [] rhs; - delete [] val; - delete [] snr; - delete [] rnr; - delete [] ha; - delete [] pivot; + delete [] ind_array; + delete [] rhs; + delete [] val; + delete [] snr; + delete [] rnr; + delete [] ha; + delete [] pivot; #else status = -1; printf("HYPRE_LSC::solveUsingY12M - not available.\n"); @@ -5239,18 +5238,18 @@ void HYPRE_LinSysCore::solveUsingAMGe(int &iterations) // need to construct a CSR matrix, and the column indices should // have been stored in colIndices and rowLengths //------------------------------------------------------------------- - + if ( localStartRow_ != 1 ) { printf("solveUsingAMGe ERROR - row does not start at 1.\n"); status = -1; return; } - if (slideReduction_ == 1) + if (slideReduction_ == 1) nrows = localEndRow_ - 2 * nConstraints_; - else if (slideReduction_ == 2 || slideReduction_ == 3) + else if (slideReduction_ == 2 || slideReduction_ == 3) nrows = localEndRow_ - nConstraints_; - else if (schurReduction_ == 1) + else if (schurReduction_ == 1) nrows = localEndRow_ - localStartRow_ + 1 - A21NRows_; else nrows = localEndRow_; @@ -5263,44 +5262,44 @@ void HYPRE_LinSysCore::solveUsingAMGe(int &iterations) rhs = new double[nrows]; ierr = HYPRE_IJVectorGetValues(currB_, nrows, ind_array, rhs); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------- // call Y12M to solve the linear system //------------------------------------------------------------------- sol = new double[nrows]; - status = HYPRE_LSI_AMGeSolve( rhs, sol ); - + status = HYPRE_LSI_AMGeSolve( rhs, sol ); + //------------------------------------------------------------------- // postprocessing //------------------------------------------------------------------- ierr = HYPRE_IJVectorSetValues(currX_, nrows, (const int *) &ind_array, (const double *) sol); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(currX_, (void**) &x_csr); HYPRE_IJVectorGetObject(currR_, (void**) &r_csr); HYPRE_IJVectorGetObject(currB_, (void**) &b_csr); ierr = HYPRE_ParVectorCopy( b_csr, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, r_csr ); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); - assert(!ierr); + hypre_assert(!ierr); rnorm = sqrt( rnorm ); if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 1 ) printf("HYPRE_LSC::solveUsingAMGe - final norm = %e.\n", rnorm); //------------------------------------------------------------------- - // clean up + // clean up //------------------------------------------------------------------- - delete [] ind_array; - delete [] rhs; - delete [] sol; + delete [] ind_array; + delete [] rhs; + delete [] sol; #else iterations = 0; printf("HYPRE_LSC::solveUsingAMGe - not available.\n"); @@ -5315,7 +5314,7 @@ void HYPRE_LinSysCore::solveUsingAMGe(int &iterations) void HYPRE_LinSysCore::loadConstraintNumbers(int nConstr, int *constrList) { if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) - printf("%4d : HYPRE_LSC::loadConstraintNumbers - size = %d\n", + printf("%4d : HYPRE_LSC::loadConstraintNumbers - size = %d\n", mypid_, nConstr); nConstraints_ = nConstr; if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -5422,7 +5421,7 @@ void HYPRE_LinSysCore::putIntoMappedMatrix(int row, int numValues, newLeng = rowLengths_[localRow] + numValues; tempInd = new int[newLeng]; tempVal = new double[newLeng]; - for ( i = 0; i < rowLengths_[localRow]; i++ ) + for ( i = 0; i < rowLengths_[localRow]; i++ ) { tempVal[i] = colValues_[localRow][i]; tempInd[i] = colIndices_[localRow][i]; @@ -5434,7 +5433,7 @@ void HYPRE_LinSysCore::putIntoMappedMatrix(int row, int numValues, index = rowLengths_[localRow]; - for ( i = 0; i < numValues; i++ ) + for ( i = 0; i < numValues; i++ ) { colIndex = scatterIndices[i]; @@ -5443,7 +5442,7 @@ void HYPRE_LinSysCore::putIntoMappedMatrix(int row, int numValues, else mappedCol = colIndex; ind2 = HYPRE_LSI_Search(colIndices_[localRow],mappedCol+1,index); - if ( ind2 >= 0 ) + if ( ind2 >= 0 ) { newLeng--; colValues_[localRow][ind2] = values[i]; @@ -5591,7 +5590,7 @@ void HYPRE_LinSysCore::addToMinResProjectionSpace(HYPRE_IJVector xvec, ierr = HYPRE_IJVectorSetObjectType(HYpbs_[i], HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(HYpbs_[i]); ierr = HYPRE_IJVectorAssemble(HYpbs_[i]); - assert( !ierr ); + hypre_assert( !ierr ); } for ( i = 0; i <= projectSize_; i++ ) { @@ -5599,10 +5598,10 @@ void HYPRE_LinSysCore::addToMinResProjectionSpace(HYPRE_IJVector xvec, ierr = HYPRE_IJVectorSetObjectType(HYpxs_[i], HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(HYpxs_[i]); ierr = HYPRE_IJVectorAssemble(HYpxs_[i]); - assert(!ierr); + hypre_assert(!ierr); } } - + //----------------------------------------------------------------------- // if buffer has been filled, move things up (but for now, restart) //----------------------------------------------------------------------- @@ -5636,7 +5635,7 @@ void HYPRE_LinSysCore::addToMinResProjectionSpace(HYPRE_IJVector xvec, HYPRE_ParVectorCopy( x_csr, xn_csr ); //----------------------------------------------------------------------- - // compute bn = A * x + // compute bn = A * x //----------------------------------------------------------------------- HYPRE_ParCSRMatrixMatvec( 1.0, A_csr, x_csr, 0.0, bn_csr ); @@ -5671,7 +5670,7 @@ void HYPRE_LinSysCore::addToMinResProjectionSpace(HYPRE_IJVector xvec, } //----------------------------------------------------------------------- - // update final solution + // update final solution //----------------------------------------------------------------------- if ( alpha != 0.0 ) @@ -5787,7 +5786,7 @@ void HYPRE_LinSysCore::computeAConjProjection(HYPRE_ParCSRMatrix A_csr, // (1) compute alpha_i = (x, psi_i) for all previous stored vectors // (2) phi_n = x - sum(alpha_i * phi_i) // (3) phi_n = phi_n / norm(phi_n)_A -// (4) psi_n = A * phi_n +// (4) psi_n = A * phi_n //--------------------------------------------------------------------------- void HYPRE_LinSysCore::addToAConjProjectionSpace(HYPRE_IJVector xvec, @@ -5834,7 +5833,7 @@ void HYPRE_LinSysCore::addToAConjProjectionSpace(HYPRE_IJVector xvec, ierr = HYPRE_IJVectorSetObjectType(HYpbs_[i], HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(HYpbs_[i]); ierr = HYPRE_IJVectorAssemble(HYpbs_[i]); - assert( !ierr ); + hypre_assert( !ierr ); } for ( i = 0; i <= projectSize_; i++ ) { @@ -5842,7 +5841,7 @@ void HYPRE_LinSysCore::addToAConjProjectionSpace(HYPRE_IJVector xvec, ierr = HYPRE_IJVectorSetObjectType(HYpxs_[i], HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(HYpxs_[i]); ierr = HYPRE_IJVectorAssemble(HYpxs_[i]); - assert(!ierr); + hypre_assert(!ierr); } } @@ -5894,7 +5893,7 @@ void HYPRE_LinSysCore::addToAConjProjectionSpace(HYPRE_IJVector xvec, } //----------------------------------------------------------------------- - // update final solution + // update final solution //----------------------------------------------------------------------- if ( alpha != 0.0 ) @@ -5922,10 +5921,10 @@ void HYPRE_LinSysCore::addToAConjProjectionSpace(HYPRE_IJVector xvec, //*************************************************************************** //*************************************************************************** -// initialize field information +// initialize field information //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::FE_initFields(int nFields, int *fieldSizes, +void HYPRE_LinSysCore::FE_initFields(int nFields, int *fieldSizes, int *fieldIDs) { #ifdef HAVE_MLI @@ -5943,14 +5942,14 @@ void HYPRE_LinSysCore::FE_initFields(int nFields, int *fieldSizes, // initialize element block //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::FE_initElemBlock(int nElems, int nNodesPerElem, +void HYPRE_LinSysCore::FE_initElemBlock(int nElems, int nNodesPerElem, int numNodeFields, int *nodeFieldIDs) { #ifdef HAVE_MLI int status; if ( haveFEData_ == 1 && feData_ != NULL ) { - status = HYPRE_LSI_MLIFEDataInitElemBlock(feData_, nElems, + status = HYPRE_LSI_MLIFEDataInitElemBlock(feData_, nElems, nNodesPerElem, numNodeFields, nodeFieldIDs); if ( status ) { @@ -5973,7 +5972,7 @@ void HYPRE_LinSysCore::FE_initElemBlock(int nElems, int nNodesPerElem, // initialize element node list //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::FE_initElemNodeList(int elemID, int nNodesPerElem, +void HYPRE_LinSysCore::FE_initElemNodeList(int elemID, int nNodesPerElem, int *nodeIDs) { #ifdef HAVE_MLI @@ -5989,10 +5988,10 @@ void HYPRE_LinSysCore::FE_initElemNodeList(int elemID, int nNodesPerElem, } //*************************************************************************** -// initialize shared nodes +// initialize shared nodes //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::FE_initSharedNodes(int nShared, int *sharedIDs, +void HYPRE_LinSysCore::FE_initSharedNodes(int nShared, int *sharedIDs, int *sharedPLengs, int **sharedProcs) { #ifdef HAVE_MLI @@ -6009,7 +6008,7 @@ void HYPRE_LinSysCore::FE_initSharedNodes(int nShared, int *sharedIDs, } //*************************************************************************** -// initialize complete +// initialize complete //--------------------------------------------------------------------------- void HYPRE_LinSysCore::FE_initComplete() @@ -6025,7 +6024,7 @@ void HYPRE_LinSysCore::FE_initComplete() // load element matrix //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::FE_loadElemMatrix(int elemID, int nNodes, +void HYPRE_LinSysCore::FE_loadElemMatrix(int elemID, int nNodes, int *elemNodeList, int matDim, double **elemMat) { #ifdef HAVE_MLI @@ -6062,7 +6061,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() /* -------------------------------------------------------- */ /* construct procNRows array */ /* -------------------------------------------------------- */ - + localNRows = localEndRow_ - localStartRow_ + 1; procNRows = new int[numProcs_+1]; iTempArray = new int[numProcs_]; @@ -6093,7 +6092,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() /* -------------------------------------------------------- */ /* construct send information */ /* -------------------------------------------------------- */ - + procList = new int[numProcs_]; for (iP = 0; iP < numProcs_; iP++) procList[iP] = 0; for (iN = 0; iN < numProcs_; iN++) @@ -6116,11 +6115,11 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() sendProcs[nSends++] = iP; } } - + /* -------------------------------------------------------- */ /* construct recv information */ /* -------------------------------------------------------- */ - + for (iP = 0; iP < numProcs_; iP++) procList[iP] = 0; for (iP = 0; iP < nSends; iP++) procList[sendProcs[iP]]++; MPI_Allreduce(procList,iTempArray,numProcs_,MPI_INT,MPI_SUM,comm_); @@ -6143,11 +6142,11 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() MPI_Wait(&(mpiRequests[iP]), &mpiStatus); recvProcs[iP] = mpiStatus.MPI_SOURCE; } - + /* -------------------------------------------------------- */ /* communicate equation numbers information */ /* -------------------------------------------------------- */ - + for (iP = 0; iP < nRecvs; iP++) { iRecvBufs[iP] = new int[recvLengs[iP]]; @@ -6175,11 +6174,11 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() 29422, comm_); } for (iP = 0; iP < nRecvs; iP++) MPI_Wait(&(mpiRequests[iP]),&mpiStatus); - + /* -------------------------------------------------------- */ /* communicate coordinate information */ /* -------------------------------------------------------- */ - + for (iP = 0; iP < nRecvs; iP++) { dRecvBufs[iP] = new double[recvLengs[iP]*MLI_FieldSize_]; @@ -6210,11 +6209,11 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() sendProcs[iP], 29425, comm_); } for (iP = 0; iP < nRecvs; iP++) MPI_Wait(&(mpiRequests[iP]),&mpiStatus); - + /* -------------------------------------------------------- */ /* check any duplicate coordinate information */ /* -------------------------------------------------------- */ - + arrayLeng = MLI_NumNodes_; for (iP = 0; iP < nRecvs; iP++) arrayLeng += recvLengs[iP]; flags = new int[arrayLeng]; @@ -6252,14 +6251,14 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() else numNodes++; } delete [] flags; - + /* -------------------------------------------------------- */ /* set up nodal coordinate information in correct order */ /* -------------------------------------------------------- */ - + coordLength = MLI_NumNodes_ * MLI_FieldSize_; nCoords = new double[coordLength]; - + arrayLeng = MLI_NumNodes_ * MLI_FieldSize_; for (iN = 0; iN < MLI_NumNodes_; iN++) { @@ -6268,7 +6267,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() eqnInd = (MLI_EqnNumbers_[iN] - procNRows[mypid_]) / MLI_FieldSize_; if (eqnInd >= 0 && eqnInd < arrayLeng) for (iD = 0; iD < MLI_FieldSize_; iD++) - nCoords[eqnInd*MLI_FieldSize_+iD] = + nCoords[eqnInd*MLI_FieldSize_+iD] = MLI_NodalCoord_[iN*MLI_FieldSize_+iD]; } } @@ -6283,7 +6282,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() dRecvBufs[iP][iR*MLI_FieldSize_+iD]; } } - + /* -------------------------------------------------------- */ /* create AMS vectors */ /* -------------------------------------------------------- */ @@ -6294,7 +6293,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() ierr += HYPRE_IJVectorSetObjectType(amsX_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(amsX_); ierr += HYPRE_IJVectorAssemble(amsX_); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(amsX_, (void **) &parVec); vecData = (double *) hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) parVec)); for (iN = 0; iN < localNRows/MLI_FieldSize_; iN++) @@ -6304,7 +6303,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() ierr += HYPRE_IJVectorSetObjectType(amsY_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(amsY_); ierr += HYPRE_IJVectorAssemble(amsY_); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(amsY_, (void **) &parVec); vecData = (double *) hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) parVec)); for (iN = 0; iN < localNRows/MLI_FieldSize_; iN++) @@ -6314,7 +6313,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() ierr += HYPRE_IJVectorSetObjectType(amsZ_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(amsZ_); ierr += HYPRE_IJVectorAssemble(amsZ_); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(amsZ_, (void **) &parVec); vecData = (double *) hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) parVec)); for (iN = 0; iN < localNRows/MLI_FieldSize_; iN++) @@ -6323,7 +6322,7 @@ void HYPRE_LinSysCore::HYPRE_LSI_BuildNodalCoordinates() /* -------------------------------------------------------- */ /* clean up */ /* -------------------------------------------------------- */ - + delete [] procList; delete [] iTempArray; delete [] nodeProcMap; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_Dsuperlu.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_Dsuperlu.c index d5dd3d047..0cd5a7ba8 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_Dsuperlu.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_Dsuperlu.c @@ -51,14 +51,14 @@ HYPRE_LSI_DSuperLU; int HYPRE_LSI_DSuperLUGenMatrix(HYPRE_Solver solver); /*************************************************************************** - * HYPRE_LSI_DSuperLUCreate - Return a DSuperLU object "solver". + * HYPRE_LSI_DSuperLUCreate - Return a DSuperLU object "solver". *--------------------------------------------------------------------------*/ int HYPRE_LSI_DSuperLUCreate( MPI_Comm comm, HYPRE_Solver *solver ) { HYPRE_LSI_DSuperLU *sluPtr; sluPtr = hypre_TAlloc(HYPRE_LSI_DSuperLU, 1, HYPRE_MEMORY_HOST); - assert ( sluPtr != NULL ); + hypre_assert ( sluPtr != NULL ); sluPtr->comm_ = comm; sluPtr->Amat_ = NULL; sluPtr->localNRows_ = 0; @@ -91,13 +91,13 @@ int HYPRE_LSI_DSuperLUDestroy( HYPRE_Solver solver ) dSolveFinalize(&(sluPtr->options_), &(sluPtr->SOLVEstruct_)); superlu_gridexit(&(sluPtr->sluGrid_)); } - free(sluPtr->berr_); - free(sluPtr); + hypre_TFree(sluPtr->berr_, HYPRE_MEMORY_HOST); + hypre_TFree(sluPtr, HYPRE_MEMORY_HOST); return 0; } /*************************************************************************** - * HYPRE_LSI_DSuperLUSetOutputLevel - Set debug level + * HYPRE_LSI_DSuperLUSetOutputLevel - Set debug level *--------------------------------------------------------------------------*/ int HYPRE_LSI_DSuperLUSetOutputLevel(HYPRE_Solver solver, int level) @@ -155,7 +155,7 @@ int HYPRE_LSI_DSuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, options->Equil = YES (NO, ROW, COL, BOTH) (YES not robust) options->ParSymbFact = NO; - options->ColPerm = MMD_AT_PLUS_A (NATURAL, MMD_ATA, + options->ColPerm = MMD_AT_PLUS_A (NATURAL, MMD_ATA, METIS_AT_PLUS_A, PARMETIS, MY_PERMC} (MMD_AT_PLUS_A the fastest, a factor of 3+ better than MMD_ATA, which in @@ -179,16 +179,16 @@ int HYPRE_LSI_DSuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, if (sluPtr->outputLevel_ < 2) sluPtr->options_.PrintStat = NO; ScalePermstructInit(sluPtr->globalNRows_, sluPtr->globalNRows_, &(sluPtr->ScalePermstruct_)); -// LUstructInit(sluPtr->globalNRows_, sluPtr->globalNRows_, +// LUstructInit(sluPtr->globalNRows_, sluPtr->globalNRows_, // &(sluPtr->LUstruct_)); LUstructInit(sluPtr->globalNRows_, &(sluPtr->LUstruct_)); sluPtr->berr_[0] = 0.0; PStatInit(&(sluPtr->stat_)); - pdgssvx(&(sluPtr->options_), &(sluPtr->sluAmat_), - &(sluPtr->ScalePermstruct_), NULL, sluPtr->localNRows_, iZero, - &(sluPtr->sluGrid_), &(sluPtr->LUstruct_), + pdgssvx(&(sluPtr->options_), &(sluPtr->sluAmat_), + &(sluPtr->ScalePermstruct_), NULL, sluPtr->localNRows_, iZero, + &(sluPtr->sluGrid_), &(sluPtr->LUstruct_), &(sluPtr->SOLVEstruct_), sluPtr->berr_, &(sluPtr->stat_), &info); - sluPtr->options_.Fact = FACTORED; + sluPtr->options_.Fact = FACTORED; if (sluPtr->outputLevel_ >= 2) PStatPrint(&(sluPtr->options_),&(sluPtr->stat_),&(sluPtr->sluGrid_)); @@ -229,9 +229,9 @@ int HYPRE_LSI_DSuperLUSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /* solve */ /* ---------------------------------------------------------------- */ - pdgssvx(&(sluPtr->options_), &(sluPtr->sluAmat_), - &(sluPtr->ScalePermstruct_), soln, localNRows, iOne, - &(sluPtr->sluGrid_), &(sluPtr->LUstruct_), + pdgssvx(&(sluPtr->options_), &(sluPtr->sluAmat_), + &(sluPtr->ScalePermstruct_), soln, localNRows, iOne, + &(sluPtr->sluGrid_), &(sluPtr->LUstruct_), &(sluPtr->SOLVEstruct_), sluPtr->berr_, &(sluPtr->stat_), &info); /* ---------------------------------------------------------------- */ @@ -286,9 +286,9 @@ int HYPRE_LSI_DSuperLUGenMatrix(HYPRE_Solver solver) localNRows = procNRows[mypid+1] - procNRows[mypid]; sluPtr->localNRows_ = localNRows; sluPtr->globalNRows_ = procNRows[nprocs]; - csrIA = (int *) intMalloc_dist(localNRows+1); - csrJA = (int *) intMalloc_dist(localNNZ); - csrAA = (double *) doubleMalloc_dist(localNNZ); + csrIA = (int *) intMalloc_dist(localNRows+1); + csrJA = (int *) intMalloc_dist(localNNZ); + csrAA = (double *) doubleMalloc_dist(localNNZ); localNNZ = 0; csrIA[0] = localNNZ; @@ -312,9 +312,9 @@ int HYPRE_LSI_DSuperLUGenMatrix(HYPRE_Solver solver) /* ---------------------------------------------------------------- */ dCreate_CompRowLoc_Matrix_dist(&(sluPtr->sluAmat_), sluPtr->globalNRows_, - sluPtr->globalNRows_, localNNZ, localNRows, startRow, csrAA, + sluPtr->globalNRows_, localNNZ, localNRows, startRow, csrAA, csrJA, csrIA, SLU_NR_loc, SLU_D, SLU_GE); - free(procNRows); + hypre_TFree(procNRows, HYPRE_MEMORY_HOST); return 0; } #else diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_UZAWA.cxx b/src/FEI_mv/fei-hypre/HYPRE_LSI_UZAWA.cxx index d0e6796c5..b35cc1994 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_UZAWA.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_UZAWA.cxx @@ -6,7 +6,7 @@ ******************************************************************************/ //*************************************************************************** -// Date : Apr 26, 2002 +// Date : Apr 26, 2002 //*************************************************************************** // system includes //--------------------------------------------------------------------------- @@ -14,7 +14,6 @@ #include #include #include -#include #if 0 /* RDF: Not sure this is really needed */ #ifdef WIN32 @@ -41,7 +40,7 @@ // local defines and external functions //--------------------------------------------------------------------------- -extern "C" +extern "C" { int hypre_BoomerAMGBuildCoarseOperator(hypre_ParCSRMatrix*, hypre_ParCSRMatrix*, @@ -51,13 +50,13 @@ extern "C" //*************************************************************************** //*************************************************************************** -// C-Interface data structure +// C-Interface data structure //--------------------------------------------------------------------------- typedef struct HYPRE_LSI_Uzawa_Struct { void *precon; -} +} HYPRE_LSI_UzawaStruct; //*************************************************************************** @@ -91,7 +90,7 @@ extern "C" int HYPRE_LSI_UzawaDestroy(HYPRE_Solver solver) else err = 1; free( cprecon ); } - return err; + return err; } //*************************************************************************** @@ -160,7 +159,7 @@ extern "C" int HYPRE_LSI_UzawaGetNumIterations(HYPRE_Solver solver, int *iter) //*************************************************************************** -extern "C" +extern "C" int HYPRE_LSI_UzawaSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, HYPRE_ParVector b, HYPRE_ParVector x) { @@ -180,7 +179,7 @@ int HYPRE_LSI_UzawaSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, //*************************************************************************** -extern "C" +extern "C" int HYPRE_LSI_UzawaSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, HYPRE_ParVector b, HYPRE_ParVector x) { @@ -327,7 +326,7 @@ int HYPRE_LSI_Uzawa::setParams(char *params) else if ( !strcmp(param2, "outputLevel") ) { sscanf(params,"%s %s %d", param1, param2, &outputLevel_); - if ( outputLevel_ > 0 ) + if ( outputLevel_ > 0 ) printf("HYPRE_LSI_Uzawa::outputLevel = %d.\n", outputLevel_); } else if ( !strcmp(param2, "modified") ) @@ -338,17 +337,17 @@ int HYPRE_LSI_Uzawa::setParams(char *params) else if ( !strcmp(param2, "A11Solver") ) { sscanf(params,"%s %s %s", param1, param2, param3); - if ( !strcmp(param3, "none") ) + if ( !strcmp(param3, "none") ) { A11Params_.SolverID_ = 0; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 solver = cg\n"); } - else if ( !strcmp(param3, "cg") ) + else if ( !strcmp(param3, "cg") ) { A11Params_.SolverID_ = 1; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 solver = cg\n"); } - else if ( !strcmp(param3, "gmres") ) + else if ( !strcmp(param3, "gmres") ) { A11Params_.SolverID_ = 2; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 solver = gmres\n"); @@ -357,17 +356,17 @@ int HYPRE_LSI_Uzawa::setParams(char *params) else if ( !strcmp(param2, "S22Solver") ) { sscanf(params,"%s %s %s", param1, param2, param3); - if ( !strcmp(param3, "none") ) + if ( !strcmp(param3, "none") ) { S22Params_.SolverID_ = 0; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 solver = cg\n"); } - else if ( !strcmp(param3, "cg") ) + else if ( !strcmp(param3, "cg") ) { S22Params_.SolverID_ = 1; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 solver = cg\n"); } - else if ( !strcmp(param3, "gmres") ) + else if ( !strcmp(param3, "gmres") ) { S22Params_.SolverID_ = 2; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 solver = gmres\n"); @@ -381,110 +380,110 @@ int HYPRE_LSI_Uzawa::setParams(char *params) else if ( !strcmp(param2, "A11Tolerance") ) { sscanf(params,"%s %s %lg", param1, param2, &(A11Params_.Tol_)); - if ( A11Params_.Tol_ >= 1.0 || A11Params_.Tol_ <= 0.0 ) + if ( A11Params_.Tol_ >= 1.0 || A11Params_.Tol_ <= 0.0 ) A11Params_.Tol_ = 1.0e-12; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 tol = %e\n", A11Params_.Tol_); } else if ( !strcmp(param2, "S22Tolerance") ) { sscanf(params,"%s %s %lg", param1, param2, &(S22Params_.Tol_)); - if ( S22Params_.Tol_ >= 1.0 || S22Params_.Tol_ <= 0.0 ) + if ( S22Params_.Tol_ >= 1.0 || S22Params_.Tol_ <= 0.0 ) S22Params_.Tol_ = 1.0e-12; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 tol = %e\n", S22Params_.Tol_); } else if ( !strcmp(param2, "A11MaxIterations") ) { sscanf(params,"%s %s %d", param1, param2, &(A11Params_.MaxIter_)); if ( A11Params_.MaxIter_ <= 0 ) A11Params_.MaxIter_ = 10; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 maxiter = %d\n", A11Params_.MaxIter_); } else if ( !strcmp(param2, "S22MaxIterations") ) { sscanf(params,"%s %s %d", param1, param2, &(S22Params_.MaxIter_)); if ( S22Params_.MaxIter_ <= 0 ) S22Params_.MaxIter_ = 10; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 maxiter = %d\n", S22Params_.MaxIter_); } else if ( !strcmp(param2, "A11Precon") ) { sscanf(params,"%s %s %s", param1, param2, param3); - if ( !strcmp(param3, "diagonal") ) + if ( !strcmp(param3, "diagonal") ) { A11Params_.PrecondID_ = 1; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = diagonal\n"); } - else if ( !strcmp(param3, "parasails") ) + else if ( !strcmp(param3, "parasails") ) { A11Params_.PrecondID_ = 2; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = parasails\n"); } - else if ( !strcmp(param3, "boomeramg") ) + else if ( !strcmp(param3, "boomeramg") ) { A11Params_.PrecondID_ = 3; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = boomeramg\n"); } - else if ( !strcmp(param3, "pilut") ) + else if ( !strcmp(param3, "pilut") ) { A11Params_.PrecondID_ = 4; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = pilut\n"); } - else if ( !strcmp(param3, "euclid") ) + else if ( !strcmp(param3, "euclid") ) { A11Params_.PrecondID_ = 5; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = euclid\n"); } - else if ( !strcmp(param3, "mli") ) + else if ( !strcmp(param3, "mli") ) { A11Params_.PrecondID_ = 6; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::A11 precon = MLISA\n"); } } else if ( !strcmp(param2, "S22Precon") ) { sscanf(params,"%s %s %s", param1, param2, param3); - if ( !strcmp(param3, "diagonal") ) + if ( !strcmp(param3, "diagonal") ) { S22Params_.PrecondID_ = 1; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = diagonal\n"); } - else if ( !strcmp(param3, "parasails") ) + else if ( !strcmp(param3, "parasails") ) { S22Params_.PrecondID_ = 2; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = parasails\n"); } - else if ( !strcmp(param3, "boomeramg") ) + else if ( !strcmp(param3, "boomeramg") ) { S22Params_.PrecondID_ = 3; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = boomeramg\n"); } - else if ( !strcmp(param3, "pilut") ) + else if ( !strcmp(param3, "pilut") ) { S22Params_.PrecondID_ = 4; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = pilut\n"); } - else if ( !strcmp(param3, "euclid") ) + else if ( !strcmp(param3, "euclid") ) { S22Params_.PrecondID_ = 5; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = euclid\n"); } - else if ( !strcmp(param3, "mli") ) + else if ( !strcmp(param3, "mli") ) { S22Params_.PrecondID_ = 6; - if (outputLevel_ > 0) + if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22 precon = MLISA\n"); } } @@ -668,7 +667,7 @@ int HYPRE_LSI_Uzawa::setParams(char *params) if ( S22Params_.MLINullDim_ < 1 ) S22Params_.MLINullDim_ = 1; if (outputLevel_ > 0) printf("HYPRE_LSI_Uzawa::S22PreconMLINullDim\n"); } - else + else { printf("HYPRE_LSI_Uzawa:: string not recognized %s\n", params); } @@ -686,7 +685,7 @@ int HYPRE_LSI_Uzawa::setMaxIterations(int niter) } //*************************************************************************** -// set tolerance +// set tolerance //--------------------------------------------------------------------------- int HYPRE_LSI_Uzawa::setTolerance(double tol) @@ -696,7 +695,7 @@ int HYPRE_LSI_Uzawa::setTolerance(double tol) } //*************************************************************************** -// get number of iterations +// get number of iterations //--------------------------------------------------------------------------- int HYPRE_LSI_Uzawa::getNumIterations(int &iter) @@ -709,13 +708,13 @@ int HYPRE_LSI_Uzawa::getNumIterations(int &iter) // Given the matrix (A) within the object, separate the blocks //--------------------------------------------------------------------------- -int HYPRE_LSI_Uzawa::setup(HYPRE_ParCSRMatrix A, HYPRE_ParVector x, +int HYPRE_LSI_Uzawa::setup(HYPRE_ParCSRMatrix A, HYPRE_ParVector x, HYPRE_ParVector b) { int mypid; //------------------------------------------------------------------ - // initial set up + // initial set up //------------------------------------------------------------------ MPI_Comm_rank( mpiComm_, &mypid ); @@ -753,8 +752,8 @@ int HYPRE_LSI_Uzawa::setup(HYPRE_ParCSRMatrix A, HYPRE_ParVector x, // setup preconditioners //------------------------------------------------------------------ - setupPrecon(&A11Precond_, A11mat_, A11Params_); - setupPrecon(&S22Precond_, S22mat_, S22Params_); + setupPrecon(&A11Precond_, A11mat_, A11Params_); + setupPrecon(&S22Precond_, S22mat_, S22Params_); //------------------------------------------------------------------ // return @@ -800,7 +799,7 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) ierr = HYPRE_IJVectorSetObjectType(IJR, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJR); ierr = HYPRE_IJVectorAssemble(IJR); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJR, (void **) &r_csr); startRow = procNRows[mypid] - procA22Sizes_[mypid]; @@ -809,25 +808,25 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) ierr = HYPRE_IJVectorSetObjectType(IJF1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJF1); ierr = HYPRE_IJVectorAssemble(IJF1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJF1, (void **) &f1_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJU1); ierr = HYPRE_IJVectorSetObjectType(IJU1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJU1); ierr = HYPRE_IJVectorAssemble(IJU1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJU1, (void **) &u1_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJT1); ierr = HYPRE_IJVectorSetObjectType(IJT1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJT1); ierr = HYPRE_IJVectorAssemble(IJT1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJT1, (void **) &t1_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJV1); ierr = HYPRE_IJVectorSetObjectType(IJV1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJV1); ierr = HYPRE_IJVectorAssemble(IJV1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJV1, (void **) &v1_csr); startRow = procA22Sizes_[mypid]; @@ -836,34 +835,34 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) ierr = HYPRE_IJVectorSetObjectType(IJF2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJF2); ierr = HYPRE_IJVectorAssemble(IJF2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJF2, (void **) &f2_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJU2); ierr = HYPRE_IJVectorSetObjectType(IJU2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJU2); ierr = HYPRE_IJVectorAssemble(IJU2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJU2, (void **) &u2_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJT2); ierr = HYPRE_IJVectorSetObjectType(IJT2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJT2); ierr = HYPRE_IJVectorAssemble(IJT2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJT2, (void **) &t2_csr); ierr = HYPRE_IJVectorCreate(mpiComm_, startRow, endRow, &IJV2); ierr = HYPRE_IJVectorSetObjectType(IJV2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(IJV2); ierr = HYPRE_IJVectorAssemble(IJV2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(IJV2, (void **) &v2_csr); free( procNRows ); //------------------------------------------------------------------ - // compute initial residual + // compute initial residual //------------------------------------------------------------------ if ( maxIterations_ > 1 ) - { + { HYPRE_ParVectorCopy( b, r_csr ); HYPRE_ParCSRMatrixMatvec( -1.0, Amat_, x, 1.0, r_csr ); HYPRE_ParVectorInnerProd( r_csr, r_csr, &rnorm); @@ -875,13 +874,13 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) else rnorm = rnorm0 = 1.0; //------------------------------------------------------------------ - // set up solvers + // set up solvers //------------------------------------------------------------------ if ( A11Solver_ == NULL ) - setupSolver(&A11Solver_,A11mat_,f1_csr,u1_csr,A11Precond_,A11Params_); + setupSolver(&A11Solver_,A11mat_,f1_csr,u1_csr,A11Precond_,A11Params_); if ( S22Params_.SolverID_ != 0 && S22Solver_ == NULL ) - setupSolver(&S22Solver_,S22mat_,f2_csr,u2_csr,S22Precond_,S22Params_); + setupSolver(&S22Solver_,S22mat_,f2_csr,u2_csr,S22Precond_,S22Params_); //------------------------------------------------------------------ // distribute the vectors @@ -940,7 +939,7 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) else if ( A11Params_.SolverID_ == 2 ) HYPRE_ParCSRGMRESSolve(A11Solver_, A11mat_, t1_csr, u1_csr); - hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v1_csr , + hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v1_csr , (hypre_ParVector*)u1_csr ); //---------------------------------------------------------------- @@ -962,7 +961,7 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) HYPRE_ParVectorScale( S22SolverDampFactor_, v2_csr ); } - hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v2_csr , + hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v2_csr , (hypre_ParVector*)u2_csr ); //---------------------------------------------------------------- @@ -972,19 +971,19 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) HYPRE_ParVectorCopy( f1_csr, t1_csr ); HYPRE_ParCSRMatrixMatvec( -1.0, A11mat_, v1_csr, 1.0, t1_csr ); HYPRE_ParCSRMatrixMatvec( -1.0, A12mat_, u2_csr, 1.0, t1_csr ); - + if ( A11Params_.SolverID_ == 1 ) HYPRE_ParCSRPCGSolve(A11Solver_, A11mat_, t1_csr, u1_csr); else if ( A11Params_.SolverID_ == 2 ) HYPRE_ParCSRGMRESSolve(A11Solver_, A11mat_, t1_csr, u1_csr); - hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v1_csr , + hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v1_csr , (hypre_ParVector*)u1_csr ); //---------------------------------------------------------------- // y_{i+1} = y_{i+1/2} + Q_B^{-1} (A21 x_{i+1} - f2) //--------------------------------------------------------------- - + HYPRE_ParVectorCopy( f2_csr, t2_csr ); HYPRE_ParCSRMatrixMatvecT( 1.0, A12mat_, u1_csr, -1.0, t2_csr ); @@ -997,8 +996,8 @@ int HYPRE_LSI_Uzawa::solve(HYPRE_ParVector b, HYPRE_ParVector x) HYPRE_ParVectorCopy( t2_csr, v2_csr ); HYPRE_ParVectorScale( S22SolverDampFactor_, v2_csr ); } - - hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v2_csr , + + hypre_ParVectorAxpy( 1.0, (hypre_ParVector*)v2_csr , (hypre_ParVector*)u2_csr ); } @@ -1043,7 +1042,7 @@ int HYPRE_LSI_Uzawa::findA22BlockSize() { int mypid, nprocs, *procNRows, startRow, endRow; int A22LocalSize, irow, zeroDiag, jcol, rowSize, *colInd; - int *iTempList, ip, ncnt, A22GlobalSize; + int *iTempList, ip, ncnt, A22GlobalSize; double *colVal; //------------------------------------------------------------------ @@ -1058,19 +1057,19 @@ int HYPRE_LSI_Uzawa::findA22BlockSize() free( procNRows ); //------------------------------------------------------------------ - // search for dimension of A_22 + // search for dimension of A_22 //------------------------------------------------------------------ A22LocalSize = 0; - for ( irow = endRow; irow >= startRow; irow-- ) + for ( irow = endRow; irow >= startRow; irow-- ) { HYPRE_ParCSRMatrixGetRow(Amat_,irow,&rowSize,&colInd,&colVal); zeroDiag = 1; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { - if ( colInd[jcol] == irow && colVal[jcol] != 0.0 ) + if ( colInd[jcol] == irow && colVal[jcol] != 0.0 ) { - zeroDiag = 0; + zeroDiag = 0; break; } } @@ -1094,7 +1093,7 @@ int HYPRE_LSI_Uzawa::findA22BlockSize() delete [] iTempList; A22GlobalSize = 0; ncnt = 0; - for ( ip = 0; ip < nprocs; ip++ ) + for ( ip = 0; ip < nprocs; ip++ ) { ncnt = procA22Sizes_[ip]; procA22Sizes_[ip] = A22GlobalSize; @@ -1116,7 +1115,7 @@ int HYPRE_LSI_Uzawa::buildBlockMatrices() ierr += buildS22Mat(); return ierr; } - + //**************************************************************************** // build A11 and A12 matrix //---------------------------------------------------------------------------- @@ -1127,7 +1126,7 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() int A12NCols, A11NRows, A11StartRow, A12StartCol, *A11MatSize, ip; int *A12MatSize, irow, jcol, colIndex, uBound, A11RowSize, A12RowSize; int *A11ColInd, *A12ColInd, rowIndex, rowSize, *colInd, ncnt; - int localNRows, maxA11RowSize, maxA12RowSize; + int localNRows, maxA11RowSize, maxA12RowSize; double *colVal, *A11ColVal, *A12ColVal; HYPRE_IJMatrix IJA11, IJA12; @@ -1142,7 +1141,7 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() endRow = procNRows[mypid+1] - 1; localNRows = endRow - startRow + 1; newEndRow = endRow - (procA22Sizes_[mypid+1] - procA22Sizes_[mypid]); - + //------------------------------------------------------------------ // calculate the dimension of A11 and A12 //------------------------------------------------------------------ @@ -1167,11 +1166,11 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() ierr = HYPRE_IJMatrixCreate(mpiComm_,A11StartRow,A11StartRow+A11NRows-1, A11StartRow,A11StartRow+A11NRows-1,&IJA11); ierr += HYPRE_IJMatrixSetObjectType(IJA11, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixCreate(mpiComm_,A11StartRow,A11StartRow+A11NRows-1, A12StartCol,A12StartCol+A12NCols-1,&IJA12); ierr += HYPRE_IJMatrixSetObjectType(IJA12, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in each matrix @@ -1179,21 +1178,21 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() A11MatSize = new int[A11NRows]; A12MatSize = new int[A11NRows]; - maxA11RowSize = maxA12RowSize = 0; + maxA11RowSize = maxA12RowSize = 0; - for ( irow = startRow; irow <= newEndRow ; irow++ ) + for ( irow = startRow; irow <= newEndRow ; irow++ ) { A11RowSize = A12RowSize = 0; HYPRE_ParCSRMatrixGetRow(Amat_,irow,&rowSize,&colInd,NULL); - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; for ( ip = 1; ip <= nprocs; ip++ ) if ( procNRows[ip] > colIndex ) break; uBound = procNRows[ip] - (procA22Sizes_[ip] - procA22Sizes_[ip-1]); - if ( colIndex < uBound ) A11RowSize++; - else A12RowSize++; - } + if ( colIndex < uBound ) A11RowSize++; + else A12RowSize++; + } A11MatSize[irow-startRow] = A11RowSize; A12MatSize[irow-startRow] = A12RowSize; maxA11RowSize = (A11RowSize > maxA11RowSize) ? A11RowSize : maxA11RowSize; @@ -1207,10 +1206,10 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() ierr = HYPRE_IJMatrixSetRowSizes(IJA11, A11MatSize); ierr += HYPRE_IJMatrixInitialize(IJA11); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJA12, A12MatSize); ierr += HYPRE_IJMatrixInitialize(IJA12); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // next load the matrices @@ -1221,37 +1220,37 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() A12ColInd = new int[maxA12RowSize+1]; A12ColVal = new double[maxA12RowSize+1]; - for ( irow = startRow; irow <= newEndRow ; irow++ ) + for ( irow = startRow; irow <= newEndRow ; irow++ ) { A11RowSize = A12RowSize = 0; HYPRE_ParCSRMatrixGetRow(Amat_,irow,&rowSize,&colInd,&colVal); - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; for ( ip = 1; ip <= nprocs; ip++ ) if ( procNRows[ip] > colIndex ) break; uBound = procNRows[ip] - (procA22Sizes_[ip] - procA22Sizes_[ip-1]); - if ( colIndex < uBound ) + if ( colIndex < uBound ) { - A11ColInd[A11RowSize] = colIndex - procA22Sizes_[ip-1]; + A11ColInd[A11RowSize] = colIndex - procA22Sizes_[ip-1]; A11ColVal[A11RowSize++] = colVal[jcol]; } else { - A12ColInd[A12RowSize] = colIndex - uBound + procA22Sizes_[ip-1]; - A12ColVal[A12RowSize++] = colVal[jcol]; + A12ColInd[A12RowSize] = colIndex - uBound + procA22Sizes_[ip-1]; + A12ColVal[A12RowSize++] = colVal[jcol]; } - } + } HYPRE_ParCSRMatrixRestoreRow(Amat_,irow,&rowSize,&colInd,&colVal); rowIndex = irow - procA22Sizes_[mypid]; - ierr = HYPRE_IJMatrixSetValues(IJA11, 1, &A11RowSize, - (const int *) &rowIndex, (const int *) A11ColInd, + ierr = HYPRE_IJMatrixSetValues(IJA11, 1, &A11RowSize, + (const int *) &rowIndex, (const int *) A11ColInd, (const double *) A11ColVal); - assert( !ierr ); - ierr = HYPRE_IJMatrixSetValues(IJA12, 1, &A12RowSize, - (const int *) &rowIndex, (const int *) A12ColInd, + hypre_assert( !ierr ); + ierr = HYPRE_IJMatrixSetValues(IJA12, 1, &A12RowSize, + (const int *) &rowIndex, (const int *) A12ColInd, (const double *) A12ColVal); - assert( !ierr ); + hypre_assert( !ierr ); } //------------------------------------------------------------------ @@ -1285,14 +1284,14 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() { ncnt = 0; MPI_Barrier(mpiComm_); - while ( ncnt < nprocs ) + while ( ncnt < nprocs ) { - if ( mypid == ncnt ) + if ( mypid == ncnt ) { printf("====================================================\n"); printf("%4d : Printing A11 matrix... \n", mypid); fflush(stdout); - for (irow = A11StartRow;irow < A11StartRow+A11NRows;irow++) + for (irow = A11StartRow;irow < A11StartRow+A11NRows;irow++) { HYPRE_ParCSRMatrixGetRow(A11mat_,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -1312,14 +1311,14 @@ int HYPRE_LSI_Uzawa::buildA11A12Mat() { ncnt = 0; MPI_Barrier(mpiComm_); - while ( ncnt < nprocs ) + while ( ncnt < nprocs ) { - if ( mypid == ncnt ) + if ( mypid == ncnt ) { printf("====================================================\n"); printf("%4d : Printing A12 matrix... \n", mypid); fflush(stdout); - for (irow = A11StartRow;irow < A11StartRow+A11NRows;irow++) + for (irow = A11StartRow;irow < A11StartRow+A11NRows;irow++) { HYPRE_ParCSRMatrixGetRow(A12mat_,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -1367,7 +1366,7 @@ int HYPRE_LSI_Uzawa::buildS22Mat() //--------------------------------------------------------------- // build approximate inverse of A11 //--------------------------------------------------------------- - + HYPRE_ParaSailsCreate(mpiComm_, ¶sails); HYPRE_ParaSailsSetParams(parasails, 0.1, 1); HYPRE_ParaSailsSetFilter(parasails, 0.1); @@ -1389,13 +1388,13 @@ int HYPRE_LSI_Uzawa::buildS22Mat() A11StartRow+A11NRows-1, A11StartRow, A11StartRow+A11NRows-1,&ainvA11); ierr += HYPRE_IJMatrixSetObjectType(ainvA11, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); A11MatSize = new int[A11NRows]; for ( irow = 0; irow < A11NRows; irow++ ) A11MatSize[irow] = 1; ierr = HYPRE_IJMatrixSetRowSizes(ainvA11, A11MatSize); ierr += HYPRE_IJMatrixInitialize(ainvA11); - assert(!ierr); + hypre_assert(!ierr); for ( irow = A11StartRow; irow < A11StartRow+A11NRows; irow++ ) { @@ -1403,16 +1402,16 @@ int HYPRE_LSI_Uzawa::buildS22Mat() ddata = 0.0; for ( jcol = 0; jcol < rowSize; jcol++ ) { - if ( colInd[jcol] == irow ) + if ( colInd[jcol] == irow ) { ddata = 1.0 / colVal[jcol]; break; } } HYPRE_ParCSRMatrixRestoreRow(A11mat_,irow,&rowSize,&colInd,&colVal); - ierr = HYPRE_IJMatrixSetValues(ainvA11, 1, &one, (const int *) &irow, + ierr = HYPRE_IJMatrixSetValues(ainvA11, 1, &one, (const int *) &irow, (const int *) &irow, (const double *) &ddata); - assert( !ierr ); + hypre_assert( !ierr ); } HYPRE_IJMatrixAssemble(ainvA11); free( procNRows ); @@ -1459,8 +1458,8 @@ int HYPRE_LSI_Uzawa::setupPrecon(HYPRE_Solver *precon,HYPRE_ParCSRMatrix Amat, switch( paramPtr.PrecondID_ ) { - case 2 : - HYPRE_ParCSRParaSailsCreate( mpiComm_, precon ); + case 2 : + HYPRE_ParCSRParaSailsCreate( mpiComm_, precon ); if (paramPtr.SolverID_ == 0) HYPRE_ParCSRParaSailsSetSym(*precon,1); else HYPRE_ParCSRParaSailsSetSym(*precon,0); HYPRE_ParCSRParaSailsSetParams(*precon, paramPtr.PSThresh_, @@ -1506,22 +1505,22 @@ int HYPRE_LSI_Uzawa::setupPrecon(HYPRE_Solver *precon,HYPRE_ParCSRMatrix Amat, case 6 : #ifdef HAVE_MLI HYPRE_LSI_MLICreate(mpiComm_, precon); - sprintf(paramString, "MLI outputLevel %d", outputLevel_); - HYPRE_LSI_MLISetParams(*precon, paramString); - sprintf(paramString, "MLI strengthThreshold %e",paramPtr.MLIThresh_); - HYPRE_LSI_MLISetParams(*precon, paramString); + sprintf(paramString, "MLI outputLevel %d", outputLevel_); + HYPRE_LSI_MLISetParams(*precon, paramString); + sprintf(paramString, "MLI strengthThreshold %e",paramPtr.MLIThresh_); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI method AMGSA"); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI smoother SGS"); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI numSweeps %d",paramPtr.MLINSweeps_); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI Pweight %e",paramPtr.MLIPweight_); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI nodeDOF %d",paramPtr.MLINodeDOF_); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); sprintf(paramString, "MLI nullSpaceDim %d",paramPtr.MLINullDim_); - HYPRE_LSI_MLISetParams(*precon, paramString); + HYPRE_LSI_MLISetParams(*precon, paramString); #else printf("Uzawa setupPrecon ERROR : mli not available.\n"); exit(1); @@ -1554,27 +1553,27 @@ int HYPRE_LSI_Uzawa::setupSolver(HYPRE_Solver *solver,HYPRE_ParCSRMatrix Amat, HYPRE_ParCSRPCGSetTwoNorm(*solver, 1); switch ( paramPtr.PrecondID_ ) { - case 1 : + case 1 : HYPRE_ParCSRPCGSetPrecond(*solver, HYPRE_ParCSRDiagScale, HYPRE_ParCSRDiagScaleSetup,precon); break; - case 2 : + case 2 : HYPRE_ParCSRPCGSetPrecond(*solver,HYPRE_ParCSRParaSailsSolve, HYPRE_ParCSRParaSailsSetup,precon); break; - case 3 : + case 3 : HYPRE_ParCSRPCGSetPrecond(*solver, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, precon); break; - case 4 : + case 4 : HYPRE_ParCSRPCGSetPrecond(*solver, HYPRE_ParCSRPilutSolve, - HYPRE_ParCSRPilutSetup, precon); + HYPRE_ParCSRPilutSetup, precon); break; - case 5 : + case 5 : HYPRE_ParCSRPCGSetPrecond(*solver, HYPRE_EuclidSolve, HYPRE_EuclidSetup, precon); break; - case 6 : + case 6 : #ifdef HAVE_MLI HYPRE_ParCSRPCGSetPrecond(*solver,HYPRE_LSI_MLISolve, HYPRE_LSI_MLISetup, precon); @@ -1595,27 +1594,27 @@ int HYPRE_LSI_Uzawa::setupSolver(HYPRE_Solver *solver,HYPRE_ParCSRMatrix Amat, HYPRE_ParCSRGMRESSetKDim(*solver, 50); switch ( paramPtr.PrecondID_ ) { - case 1 : + case 1 : HYPRE_ParCSRGMRESSetPrecond(*solver, HYPRE_ParCSRDiagScale, HYPRE_ParCSRDiagScaleSetup,precon); break; - case 2 : + case 2 : HYPRE_ParCSRGMRESSetPrecond(*solver,HYPRE_ParCSRParaSailsSolve, HYPRE_ParCSRParaSailsSetup,precon); break; - case 3 : + case 3 : HYPRE_ParCSRGMRESSetPrecond(*solver, HYPRE_BoomerAMGSolve, HYPRE_BoomerAMGSetup, precon); break; - case 4 : + case 4 : HYPRE_ParCSRGMRESSetPrecond(*solver, HYPRE_ParCSRPilutSolve, - HYPRE_ParCSRPilutSetup, precon); + HYPRE_ParCSRPilutSetup, precon); break; - case 5 : + case 5 : HYPRE_ParCSRGMRESSetPrecond(*solver, HYPRE_EuclidSolve, HYPRE_EuclidSetup, precon); break; - case 6 : + case 6 : #ifdef HAVEL_MLI HYPRE_ParCSRGMRESSetPrecond(*solver,HYPRE_LSI_MLISolve, HYPRE_LSI_MLISetup, precon); diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_blkprec.h b/src/FEI_mv/fei-hypre/HYPRE_LSI_blkprec.h index d7cd2a2e6..8c364f14c 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_blkprec.h +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_blkprec.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include "HYPRE.h" #include "IJ_mv/HYPRE_IJ_mv.h" @@ -27,7 +26,7 @@ #include "parcsr_mv/_hypre_parcsr_mv.h" // ************************************************************************* -// local defines +// local defines // ------------------------------------------------------------------------- #define HYPRE_INCFLOW_BDIAG 1 @@ -37,13 +36,13 @@ // ************************************************************************* -// FEI include files +// FEI include files // ------------------------------------------------------------------------- #include "HYPRE_FEI_includes.h" // ************************************************************************* -// C-wrapper for the FEI Lookup class +// C-wrapper for the FEI Lookup class // ------------------------------------------------------------------------- typedef struct HYPRE_Lookup_Struct @@ -95,7 +94,7 @@ class HYPRE_LSI_BlockP HYPRE_ParCSRMatrix Amat_; // incoming system matrix HYPRE_IJMatrix A11mat_; // velocity matrix HYPRE_IJMatrix A12mat_; // gradient (divergence) matrix - HYPRE_IJMatrix A22mat_; // pressure Poisson + HYPRE_IJMatrix A22mat_; // pressure Poisson HYPRE_IJVector F1vec_; // rhs for velocity HYPRE_IJVector F2vec_; // rhs for pressure HYPRE_IJVector X1vec_; // solution for velocity @@ -120,7 +119,7 @@ class HYPRE_LSI_BlockP int printFlag_; // for diagnostics HYPRE_Solver A11Solver_; // solver for velocity matrix HYPRE_Solver A11Precond_; // preconditioner for velocity matrix - HYPRE_Solver A22Solver_; // solver for pressure Poisson + HYPRE_Solver A22Solver_; // solver for pressure Poisson HYPRE_Solver A22Precond_; // preconditioner for pressure Poisson HYPRE_LSI_BLOCKP_PARAMS A11Params_; HYPRE_LSI_BLOCKP_PARAMS A22Params_; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_ddict.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_ddict.c index 99dd4d27d..ab33690e1 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_ddict.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_ddict.c @@ -48,21 +48,21 @@ HYPRE_LSI_DDICT; extern int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix,MH_Matrix *, MPI_Comm, int *, MH_Context *); -extern int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *, int *, - int **recv_lengths, int **int_buf, double **dble_buf, +extern int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *, int *, + int **recv_lengths, int **int_buf, double **dble_buf, int **sindex_array, int **sindex_array2, int *offset); extern int HYPRE_LSI_DDICTGetRowLengths(MH_Matrix *Amat, int *leng, int **); extern int HYPRE_LIS_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *, int Noffset, int *map, int *map2, int **int_buf, double **dble_buf); extern int HYPRE_LSI_DDICTDecompose(HYPRE_LSI_DDICT *ict_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset); extern void HYPRE_LSI_qsort1a(int *, int *, int, int); extern int HYPRE_LSI_SplitDSort(double *,int,int*,int); extern int HYPRE_LSI_Search(int *, int, int); -extern int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, +extern int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, int *mat_ja, int *mat_ia, double *rowNorms); extern int MH_ExchBdry(double *, void *); @@ -78,7 +78,7 @@ extern int MH_GetRow(void *, int, int *, int, int *, double *, int *); int HYPRE_LSI_DDICTCreate( MPI_Comm comm, HYPRE_Solver *solver ) { HYPRE_LSI_DDICT *ict_ptr; - + ict_ptr = hypre_TAlloc(HYPRE_LSI_DDICT, 1, HYPRE_MEMORY_HOST); if (ict_ptr == NULL) return 1; @@ -106,24 +106,21 @@ int HYPRE_LSI_DDICTDestroy( HYPRE_Solver solver ) HYPRE_LSI_DDICT *ict_ptr; ict_ptr = (HYPRE_LSI_DDICT *) solver; - if ( ict_ptr->mat_ja != NULL ) free(ict_ptr->mat_ja); - if ( ict_ptr->mat_aa != NULL ) free(ict_ptr->mat_aa); - ict_ptr->mat_ja = NULL; - ict_ptr->mat_aa = NULL; - if ( ict_ptr->mh_mat != NULL ) + hypre_TFree(ict_ptr->mat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr->mat_aa, HYPRE_MEMORY_HOST); + if ( ict_ptr->mh_mat != NULL ) { - if (ict_ptr->mh_mat->sendProc != NULL) free(ict_ptr->mh_mat->sendProc); - if (ict_ptr->mh_mat->sendLeng != NULL) free(ict_ptr->mh_mat->sendLeng); - if (ict_ptr->mh_mat->recvProc != NULL) free(ict_ptr->mh_mat->recvProc); - if (ict_ptr->mh_mat->recvLeng != NULL) free(ict_ptr->mh_mat->recvLeng); + hypre_TFree(ict_ptr->mh_mat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr->mh_mat->sendLeng, HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr->mh_mat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr->mh_mat->recvLeng, HYPRE_MEMORY_HOST); for ( i = 0; i < ict_ptr->mh_mat->sendProcCnt; i++ ) - if (ict_ptr->mh_mat->sendList[i] != NULL) - free(ict_ptr->mh_mat->sendList[i]); - if (ict_ptr->mh_mat->sendList != NULL) free(ict_ptr->mh_mat->sendList); - free(ict_ptr); - } + hypre_TFree(ict_ptr->mh_mat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr->mh_mat->sendList, HYPRE_MEMORY_HOST); + hypre_TFree(ict_ptr, HYPRE_MEMORY_HOST); + } ict_ptr->mh_mat = NULL; - free(ict_ptr); + hypre_TFree(ict_ptr, HYPRE_MEMORY_HOST); return 0; } @@ -208,14 +205,14 @@ int HYPRE_LSI_DDICTSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, dtmp -= ( mat_aa[j] * dbuf2[mat_ja[j]] ); dbuf2[i] = dtmp * mat_aa[i]; } - for ( i = extNrows-1; i >= 0; i-- ) + for ( i = extNrows-1; i >= 0; i-- ) { dbuf2[i] *= mat_aa[i]; dtmp = dbuf2[i]; for ( j = mat_ja[i]; j < mat_ja[i+1]; j++ ) dbuf2[mat_ja[j]] -= ( dtmp * mat_aa[j] ); - } - if ( dbuf != NULL ) free(dbuf); + } + hypre_TFree(dbuf, HYPRE_MEMORY_HOST); for ( i = 0; i < Nrows; i++ ) soln[i] = dbuf2[i]; @@ -223,10 +220,10 @@ int HYPRE_LSI_DDICTSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, for ( i = 0; i < length; i++ ) soln[ibuf[i]] = soln[ibuf[i]] + dbuf[i]; - if ( ibuf != NULL ) free(ibuf); - if ( dbuf != NULL ) free(dbuf); - if ( dbuf2 != NULL ) free(dbuf2); - free(context); + hypre_TFree(ibuf, HYPRE_MEMORY_HOST); + hypre_TFree(dbuf, HYPRE_MEMORY_HOST); + hypre_TFree(dbuf2, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } @@ -267,15 +264,15 @@ int HYPRE_LSI_DDICTSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST); context->Amat = mh_mat; HYPRE_LSI_MLConstructMHMatrix(A_csr,mh_mat,MPI_COMM_WORLD, - context->partition,context); + context->partition,context); /* ---------------------------------------------------------------- */ /* compose the enlarged overlapped local matrix */ /* ---------------------------------------------------------------- */ - + if ( overlap_flag ) { - HYPRE_LSI_DDICTComposeOverlappedMatrix(mh_mat, &total_recv_leng, + HYPRE_LSI_DDICTComposeOverlappedMatrix(mh_mat, &total_recv_leng, &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset); } else @@ -293,8 +290,8 @@ int HYPRE_LSI_DDICTSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,MPI_COMM_WORLD); offset = 0; for (i = 0; i < mypid; i++) offset += parray[i]; - free(parray); - free(parray2); + hypre_TFree(parray, HYPRE_MEMORY_HOST); + hypre_TFree(parray2, HYPRE_MEMORY_HOST); } /* ---------------------------------------------------------------- */ @@ -312,13 +309,13 @@ int HYPRE_LSI_DDICTSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, ict_ptr->mat_aa[j]); } ict_ptr->mh_mat = mh_mat; - if ( recv_lengths != NULL ) free( recv_lengths ); - if ( int_buf != NULL ) free( int_buf ); - if ( dble_buf != NULL ) free( dble_buf ); - if ( map != NULL ) free( map ); - if ( map2 != NULL ) free( map2 ); - free( context->partition ); - free( context ); + hypre_TFree(recv_lengths, HYPRE_MEMORY_HOST); + hypre_TFree(int_buf, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(map, HYPRE_MEMORY_HOST); + hypre_TFree(map2, HYPRE_MEMORY_HOST); + hypre_TFree(context->partition, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } @@ -393,31 +390,33 @@ int HYPRE_LSI_DDICTGetRowLengths(MH_Matrix *Amat, int *leng, int **recv_leng) index = sendList[i][j]; while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0) { - free(cols); free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); - } + } temp_list[j] = m; } msgtype = mtype; MPI_Send((void*)temp_list,length,MPI_INT,proc_id,msgtype,MPI_COMM_WORLD); - free( temp_list ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); - free(context); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* wait for messages */ /* ---------------------------------------------------------------- */ - for ( i = 0; i < nRecv; i++ ) + for ( i = 0; i < nRecv; i++ ) { MPI_Wait( &Request[i], &status ); } - if (nRecv > 0) free( Request ); + if (nRecv > 0) + hypre_TFree(Request, HYPRE_MEMORY_HOST); return 0; } @@ -510,11 +509,12 @@ int HYPRE_LSI_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, index = sendList[i][j]; while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0) { - free(cols); free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); - } + } nnz += m; } if ( nnz > 0 ) send_buf = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); @@ -529,10 +529,11 @@ int HYPRE_LSI_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, msgtype = mtype; MPI_Send((void*) send_buf, nnz, MPI_DOUBLE, proc_id, msgtype, MPI_COMM_WORLD); - if ( nnz > 0 ) free( send_buf ); + if ( nnz > 0 ) + hypre_TFree(send_buf, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* wait for all messages */ @@ -597,10 +598,11 @@ int HYPRE_LSI_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, msgtype = mtype; MPI_Send((void*) isend_buf, nnz, MPI_INT, proc_id, msgtype, MPI_COMM_WORLD); - if ( nnz > 0 ) free( isend_buf ); + if ( nnz > 0 ) + hypre_TFree(isend_buf, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* ----------------------------------------------------------- */ /* post receives for all messages */ @@ -611,8 +613,8 @@ int HYPRE_LSI_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, MPI_Wait(request+i, &status); } - free(request); - free(context); + hypre_TFree(request, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } @@ -620,9 +622,9 @@ int HYPRE_LSI_DDICTGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, /* construct an enlarged overlapped local matrix */ /*---------------------------------------------------------------------------*/ -int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, - int *total_recv_leng, int **recv_lengths, int **int_buf, - double **dble_buf, int **sindex_array, int **sindex_array2, +int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, + int *total_recv_leng, int **recv_lengths, int **int_buf, + double **dble_buf, int **sindex_array, int **sindex_array2, int *offset) { int i, nprocs, mypid, Nrows, *proc_array, *proc_array2; @@ -666,7 +668,7 @@ int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, NrowsOffset = 0; for (i = 0; i < mypid; i++) NrowsOffset += proc_array[i]; for (i = 1; i < nprocs; i++) proc_array[i] += proc_array[i-1]; - free(proc_array2); + hypre_TFree(proc_array2, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* compose the column index map (index_array,index_array2) */ @@ -681,16 +683,16 @@ int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, MH_ExchBdry(dble_array, context); if ( extNrows-Nrows > 0 ) index_array = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST); - else + else index_array = NULL; for (i = Nrows; i < extNrows; i++) index_array[i-Nrows] = dble_array[i]; if ( extNrows-Nrows > 0 ) index_array2 = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST); - else + else index_array2 = NULL; for (i = 0; i < extNrows-Nrows; i++) index_array2[i] = i; - free( dble_array ); - free(context); + hypre_TFree(dble_array, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* send the lengths of each row to remote processor */ @@ -699,10 +701,10 @@ int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, /* ---------------------------------------------------------------- */ HYPRE_LSI_DDICTGetRowLengths(mh_mat, total_recv_leng, recv_lengths); - HYPRE_LSI_DDICTGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths, + HYPRE_LSI_DDICTGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths, NrowsOffset,index_array,index_array2,int_buf, dble_buf); - free(proc_array); + hypre_TFree(proc_array, HYPRE_MEMORY_HOST); HYPRE_LSI_qsort1a(index_array, index_array2, 0, extNrows-Nrows-1); (*sindex_array) = index_array; (*sindex_array2) = index_array2; @@ -715,7 +717,7 @@ int HYPRE_LSI_DDICTComposeOverlappedMatrix(MH_Matrix *mh_mat, /*---------------------------------------------------------------------------*/ int HYPRE_LSI_DDICTDecompose(HYPRE_LSI_DDICT *ict_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int i, j, row_leng, *mat_ia, *mat_ja, allocated_space, *cols, mypid; @@ -755,7 +757,8 @@ int HYPRE_LSI_DDICTDecompose(HYPRE_LSI_DDICT *ict_ptr,MH_Matrix *Amat, rowNorms[i] = 0.0; while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&row_leng)==0) { - free(vals); free(cols); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); @@ -780,12 +783,12 @@ rowNorms[i] = 1.0; { rel_tau = tau * rowNorms[i]; MH_GetRow(context,1,&i,allocated_space,cols,vals,&row_leng); - for ( j = 0; j < row_leng; j++ ) + for ( j = 0; j < row_leng; j++ ) { - if ( cols[j] <= i && habs(vals[j]) > rel_tau ) + if ( cols[j] <= i && habs(vals[j]) > rel_tau ) { - mat_aa[total_nnz] = vals[j]; - mat_ja[total_nnz++] = cols[j]; + mat_aa[total_nnz] = vals[j]; + mat_ja[total_nnz++] = cols[j]; } } mat_ia[i+1] = total_nnz; @@ -812,10 +815,10 @@ rowNorms[i+Nrows] = 1.0; rel_tau = tau * rowNorms[i+Nrows]; for ( j = offset; j < offset+recv_lengths[i]; j++ ) { - if (ext_ja[j] != -1 && ext_ja[j] <= Nrows+i && habs(ext_aa[j]) > rel_tau) + if (ext_ja[j] != -1 && ext_ja[j] <= Nrows+i && habs(ext_aa[j]) > rel_tau) { - mat_aa[total_nnz] = ext_aa[j]; - mat_ja[total_nnz++] = ext_ja[j]; + mat_aa[total_nnz] = ext_aa[j]; + mat_ja[total_nnz++] = ext_ja[j]; } } offset += recv_lengths[i]; @@ -826,12 +829,12 @@ rowNorms[i+Nrows] = 1.0; /* clean up a little */ /* ---------------------------------------------------------------- */ - if ( Amat->rowptr != NULL ) {free (Amat->rowptr); Amat->rowptr = NULL;} - if ( Amat->colnum != NULL ) {free (Amat->colnum); Amat->colnum = NULL;} - if ( Amat->values != NULL ) {free (Amat->values); Amat->values = NULL;} - free(context); - free(cols); - free(vals); + hypre_TFree(Amat->rowptr, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->colnum, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->values, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* call ICT factorization */ @@ -839,12 +842,12 @@ rowNorms[i+Nrows] = 1.0; HYPRE_LSI_DDICTFactorize(ict_ptr, mat_aa, mat_ja, mat_ia, rowNorms); - free( mat_aa ); - free( mat_ia ); - free( mat_ja ); - free(rowNorms); + hypre_TFree(mat_aa , HYPRE_MEMORY_HOST); + hypre_TFree(mat_ia , HYPRE_MEMORY_HOST); + hypre_TFree(mat_ja , HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); - if ( ict_ptr->outputLevel > 0 ) + if ( ict_ptr->outputLevel > 0 ) { total_nnz = ict_ptr->mat_ja[extNrows]; printf("%d : DDICT number of nonzeros = %d\n",mypid,total_nnz); @@ -857,9 +860,9 @@ rowNorms[i+Nrows] = 1.0; /* function for doing ICT factorization */ /*---------------------------------------------------------------------------*/ -int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, +int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, int *mat_ja, int *mat_ia, double *rowNorms) -{ +{ int i, j, row_leng, first, row_beg, row_endp1, track_leng, *track_array; int k, mypid, nnz_count, num_small_pivot, printstep, extNrows; int *msr_iptr, *msc_jptr, *msc_jend, rowMax, Lcount, sortcnt, *sortcols; @@ -876,7 +879,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, fillin = ict_ptr->fillin; extNrows = ict_ptr->extNrows; rowMax = 0; - for ( i = 0; i < extNrows; i++ ) + for ( i = 0; i < extNrows; i++ ) { row_leng = mat_ia[i+1] - mat_ia[i]; if ( row_leng > rowMax ) rowMax = row_leng; @@ -897,9 +900,9 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, msr_aptr = hypre_TAlloc(double, (totalFill+extNrows) , HYPRE_MEMORY_HOST); msc_aptr = hypre_TAlloc(double, (totalFill+extNrows) , HYPRE_MEMORY_HOST); msc_jptr[0] = msc_jend[0] = extNrows + 1; - for ( i = 1; i <= extNrows; i++ ) + for ( i = 1; i <= extNrows; i++ ) { - msc_jptr[i] = msc_jptr[i-1] + rowMax * (fillin + 1); + msc_jptr[i] = msc_jptr[i-1] + rowMax * (fillin + 1); msc_jend[i] = msc_jptr[i]; } for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0; @@ -915,9 +918,9 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, for ( i = 0; i < extNrows; i++ ) { - if ( i % printstep == 0 && ict_ptr->outputLevel > 0 ) + if ( i % printstep == 0 && ict_ptr->outputLevel > 0 ) printf("%4d : DDICT Processing row %6d (%6d)\n",mypid,i,extNrows); - + /* ------------------------------------------------------------- */ /* get the row information */ /* ------------------------------------------------------------- */ @@ -956,7 +959,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, for ( k = msc_jptr[j]; k < msc_jend[j]; k++ ) { colIndex = msc_jptr[k]; - if ( colIndex > j && colIndex < i ) + if ( colIndex > j && colIndex < i ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * msc_aptr[k]); @@ -968,7 +971,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } @@ -989,7 +992,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, } else dble_buf[index] = 0.0; } - if ( sortcnt > Lcount ) + if ( sortcnt > Lcount ) { HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount); for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; @@ -1005,7 +1008,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, printf("%d : DDICT negative pivot (%d,%d,%d)\n", mypid, i, j, extNrows); num_small_pivot++; - for ( k = j; k < row_leng; k++ ) + for ( k = j; k < row_leng; k++ ) { index = track_array[k]; dble_buf[index] = 0.0; @@ -1015,17 +1018,17 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, } } } - for ( j = 0; j < Lcount; j++ ) + for ( j = 0; j < Lcount; j++ ) { index = sortcols[j]; ddata = dble_buf[i] - (dble_buf[index] * dble_buf[index]); if ( ddata > 1.0E-10 ) dble_buf[i] = ddata; - else + else { printf("%d : (2) DDICT negative pivot (%d,%d,%d)\n", mypid, i, j, extNrows); num_small_pivot++; - for ( k = j; k < Lcount; k++ ) + for ( k = j; k < Lcount; k++ ) { index = sortcols[k]; dble_buf[index] = 0.0; @@ -1049,7 +1052,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, printf("%d : DDICT negative pivot (%d,%d,%d)\n", mypid, i, j, extNrows); num_small_pivot++; - for ( k = j; k < row_leng; k++ ) + for ( k = j; k < row_leng; k++ ) { index = track_array[k]; dble_buf[index] = 0.0; @@ -1059,17 +1062,17 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, } } } - for ( j = 0; j < sortcnt; j++ ) + for ( j = 0; j < sortcnt; j++ ) { index = sortcols[j]; ddata = dble_buf[i] - (dble_buf[index] * dble_buf[index]); if ( ddata > 1.0E-10 ) dble_buf[i] = ddata; - else + else { printf("%d : (2) DDICT negative pivot (%d,%d,%d)\n", mypid, i, j, extNrows); num_small_pivot++; - for ( k = j; k < sortcnt; k++ ) + for ( k = j; k < sortcnt; k++ ) { index = sortcols[k]; dble_buf[index] = 0.0; @@ -1079,10 +1082,10 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, } } } - if ( dble_buf[i] > 0 ) + if ( dble_buf[i] > 0 ) { - if ( dble_buf[i] < 1.0E-10 ) - { + if ( dble_buf[i] < 1.0E-10 ) + { num_small_pivot++; msc_aptr[i] = msr_aptr[i] = 1.0E5; } @@ -1096,7 +1099,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, msc_aptr[i] = msr_aptr[i] = 1.0 / sqrt( - dble_buf[i] ); dble_buf[i] = 0.0; } - for ( j = 0; j < track_leng; j++ ) + for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; if ( index < i && dble_buf[index] != 0.0 ) @@ -1104,8 +1107,8 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, msr_aptr[nnz_count] = dble_buf[index]; msr_iptr[nnz_count++] = index; colIndex = msc_jend[index]++; - msc_aptr[colIndex] = dble_buf[index]; - msc_jptr[colIndex] = i; + msc_aptr[colIndex] = dble_buf[index]; + msc_jptr[colIndex] = i; dble_buf[index] = 0.0; } } @@ -1115,7 +1118,7 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, if ( nnz_count > totalFill+extNrows ) printf("%4d : DDICT WARNING : buffer overflow (%d,%d)\n",mypid,nnz_count, totalFill+extNrows); - if ( ict_ptr->outputLevel > 0 ) + if ( ict_ptr->outputLevel > 0 ) { printf("%4d : DDICT number of nonzeros = %d\n",mypid,nnz_count); printf("%4d : DDICT number of small pivots = %d\n",mypid,num_small_pivot); @@ -1125,13 +1128,13 @@ int HYPRE_LSI_DDICTFactorize(HYPRE_LSI_DDICT *ict_ptr, double *mat_aa, /* deallocate temporary storage space */ /* ---------------------------------------------------------- */ - free(track_array); - free(sortcols); - free(sortvals); - free(dble_buf); - free(msc_jptr); - free(msc_jend); - free(msc_aptr); + hypre_TFree(track_array, HYPRE_MEMORY_HOST); + hypre_TFree(sortcols, HYPRE_MEMORY_HOST); + hypre_TFree(sortvals, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(msc_jptr, HYPRE_MEMORY_HOST); + hypre_TFree(msc_jend, HYPRE_MEMORY_HOST); + hypre_TFree(msc_aptr, HYPRE_MEMORY_HOST); ict_ptr->mat_ja = msr_iptr; ict_ptr->mat_aa = msr_aptr; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_ddilut.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_ddilut.c index 462977e76..bcb80ddd3 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_ddilut.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_ddilut.c @@ -40,10 +40,10 @@ extern int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *, int Noffset, int *map, int *map2, int **int_buf, double **dble_buf, MPI_Comm mpi_comm); extern int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset); extern int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr, - MH_Matrix *Amat,int total_recv_leng, int *recv_lengths, + MH_Matrix *Amat,int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset); extern void HYPRE_LSI_qsort1a(int *, int *, int, int); extern void hypre_qsort0(int *, int, int); @@ -57,13 +57,13 @@ extern int HYPRE_LSI_Search(int *, int, int); #define habs(x) ((x) > 0 ? (x) : -(x)) /*-------------------------------------------------------------------------- - * HYPRE_LSI_DDIlutCreate - Return a DDIlut preconditioner object "solver". + * HYPRE_LSI_DDIlutCreate - Return a DDIlut preconditioner object "solver". *--------------------------------------------------------------------------*/ int HYPRE_LSI_DDIlutCreate( MPI_Comm comm, HYPRE_Solver *solver ) { HYPRE_LSI_DDIlut *ilut_ptr; - + ilut_ptr = hypre_TAlloc(HYPRE_LSI_DDIlut, 1, HYPRE_MEMORY_HOST); if (ilut_ptr == NULL) return 1; @@ -96,28 +96,24 @@ int HYPRE_LSI_DDIlutDestroy( HYPRE_Solver solver ) HYPRE_LSI_DDIlut *ilut_ptr; ilut_ptr = (HYPRE_LSI_DDIlut *) solver; - if ( ilut_ptr->mat_ia != NULL ) free(ilut_ptr->mat_ia); - if ( ilut_ptr->mat_ja != NULL ) free(ilut_ptr->mat_ja); - if ( ilut_ptr->mat_aa != NULL ) free(ilut_ptr->mat_aa); - ilut_ptr->mat_ia = NULL; - ilut_ptr->mat_ja = NULL; - ilut_ptr->mat_aa = NULL; - if ( ilut_ptr->mh_mat != NULL ) + hypre_TFree(ilut_ptr->mat_ia, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mat_aa, HYPRE_MEMORY_HOST); + if ( ilut_ptr->mh_mat != NULL ) { - if (ilut_ptr->mh_mat->sendProc != NULL) free(ilut_ptr->mh_mat->sendProc); - if (ilut_ptr->mh_mat->sendLeng != NULL) free(ilut_ptr->mh_mat->sendLeng); - if (ilut_ptr->mh_mat->recvProc != NULL) free(ilut_ptr->mh_mat->recvProc); - if (ilut_ptr->mh_mat->recvLeng != NULL) free(ilut_ptr->mh_mat->recvLeng); + hypre_TFree(ilut_ptr->mh_mat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mh_mat->sendLeng, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mh_mat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mh_mat->recvLeng, HYPRE_MEMORY_HOST); for ( i = 0; i < ilut_ptr->mh_mat->sendProcCnt; i++ ) - if (ilut_ptr->mh_mat->sendList[i] != NULL) - free(ilut_ptr->mh_mat->sendList[i]); - if (ilut_ptr->mh_mat->sendList != NULL) free(ilut_ptr->mh_mat->sendList); - free( ilut_ptr->mh_mat ); - } + hypre_TFree(ilut_ptr->mh_mat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mh_mat->sendList, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->mh_mat, HYPRE_MEMORY_HOST); + } ilut_ptr->mh_mat = NULL; - if ( ilut_ptr->order_array != NULL ) free(ilut_ptr->order_array); - if ( ilut_ptr->reorder_array != NULL ) free(ilut_ptr->reorder_array); - free(ilut_ptr); + hypre_TFree(ilut_ptr->order_array, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr->reorder_array, HYPRE_MEMORY_HOST); + hypre_TFree(ilut_ptr, HYPRE_MEMORY_HOST); return 0; } @@ -149,7 +145,7 @@ int HYPRE_LSI_DDIlutSetDropTolerance(HYPRE_Solver solver, double thresh) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_DDIlutSetOverlap - turn on overlap + * HYPRE_LSI_DDIlutSetOverlap - turn on overlap *--------------------------------------------------------------------------*/ int HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver) @@ -162,7 +158,7 @@ int HYPRE_LSI_DDIlutSetOverlap(HYPRE_Solver solver) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_DDIlutSetReorder - turn on reordering + * HYPRE_LSI_DDIlutSetReorder - turn on reordering *--------------------------------------------------------------------------*/ int HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver) @@ -175,7 +171,7 @@ int HYPRE_LSI_DDIlutSetReorder(HYPRE_Solver solver) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_DDIlutSetOutputLevel - Set debug level + * HYPRE_LSI_DDIlutSetOutputLevel - Set debug level *--------------------------------------------------------------------------*/ int HYPRE_LSI_DDIlutSetOutputLevel(HYPRE_Solver solver, int level) @@ -254,9 +250,9 @@ int HYPRE_LSI_DDIlutSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[reorder_list[i]]; else for ( i = 0; i < Nrows; i++ ) soln[i] = dbuffer[i]; - free(dbuffer); - free(idiag); - free(context); + hypre_TFree(dbuffer, HYPRE_MEMORY_HOST); + hypre_TFree(idiag, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } @@ -299,15 +295,15 @@ int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST); context->Amat = mh_mat; HYPRE_LSI_MLConstructMHMatrix(A_csr,mh_mat,mpi_comm, - context->partition,context); + context->partition,context); /* ---------------------------------------------------------------- */ /* compose the enlarged overlapped local matrix */ /* ---------------------------------------------------------------- */ - + if ( ilut_ptr->overlap != 0 ) { - HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng, + HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng, &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset, mpi_comm); } @@ -326,8 +322,8 @@ int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,mpi_comm); offset = 0; for (i = 0; i < mypid; i++) offset += parray[i]; - free(parray); - free(parray2); + hypre_TFree(parray, HYPRE_MEMORY_HOST); + hypre_TFree(parray2, HYPRE_MEMORY_HOST); } /* ---------------------------------------------------------------- */ @@ -353,19 +349,16 @@ int HYPRE_LSI_DDIlutSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, } ilut_ptr->mh_mat = mh_mat; - if ( mh_mat->rowptr != NULL ) free (mh_mat->rowptr); - if ( mh_mat->colnum != NULL ) free (mh_mat->colnum); - if ( mh_mat->values != NULL ) free (mh_mat->values); - mh_mat->rowptr = NULL; - mh_mat->colnum = NULL; - mh_mat->values = NULL; - if ( map != NULL ) free(map); - if ( map2 != NULL ) free(map2); - if ( int_buf != NULL ) free(int_buf); - if ( dble_buf != NULL ) free(dble_buf); - if ( recv_lengths != NULL ) free(recv_lengths); - free( context->partition ); - free( context ); + hypre_TFree(mh_mat->rowptr, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->colnum, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->values, HYPRE_MEMORY_HOST); + hypre_TFree(map, HYPRE_MEMORY_HOST); + hypre_TFree(map2, HYPRE_MEMORY_HOST); + hypre_TFree(int_buf, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf , HYPRE_MEMORY_HOST); + hypre_TFree(recv_lengths, HYPRE_MEMORY_HOST); + hypre_TFree(context->partition, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } @@ -441,31 +434,33 @@ int HYPRE_LSI_DDIlutGetRowLengths(MH_Matrix *Amat, int *leng, int **recv_leng, index = sendList[i][j]; while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0) { - free(cols); free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); - } + } temp_list[j] = m; } msgtype = mtype; MPI_Send((void*)temp_list,length,MPI_INT,proc_id,msgtype,mpi_comm); - free( temp_list ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); - free(context); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* wait for messages */ /* ---------------------------------------------------------------- */ - for ( i = 0; i < nRecv; i++ ) + for ( i = 0; i < nRecv; i++ ) { MPI_Wait( &Request[i], &status ); } - if (nRecv > 0) free( Request ); + if (nRecv > 0) + hypre_TFree(Request, HYPRE_MEMORY_HOST); return 0; } @@ -558,11 +553,12 @@ int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, index = sendList[i][j]; while (MH_GetRow(context,1,&index,allocated_space,cols,vals,&m)==0) { - free(cols); free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); - } + } nnz += m; } if ( nnz > 0 ) send_buf = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); @@ -577,10 +573,12 @@ int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, msgtype = mtype; MPI_Send((void*) send_buf, nnz, MPI_DOUBLE, proc_id, msgtype, mpi_comm); - if ( nnz > 0 ) free( send_buf ); + if ( nnz > 0 ) + hypre_TFree(send_buf, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); + + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* wait for all messages */ @@ -645,10 +643,11 @@ int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, msgtype = mtype; MPI_Send((void*) isend_buf, nnz, MPI_INT, proc_id, msgtype, mpi_comm); - if ( nnz > 0 ) free( isend_buf ); + if ( nnz > 0 ) + hypre_TFree(isend_buf, HYPRE_MEMORY_HOST); } - free(cols); - free(vals); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* ----------------------------------------------------------- */ /* post receives for all messages */ @@ -659,18 +658,18 @@ int HYPRE_LSI_DDIlutGetOffProcRows(MH_Matrix *Amat, int leng, int *recv_leng, MPI_Wait(request+i, &status); } - free(request); - free(context); + hypre_TFree(request, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); return 0; } /*****************************************************************************/ /* construct an enlarged overlapped local matrix */ /*****************************************************************************/ - -int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, - int *total_recv_leng, int **recv_lengths, int **int_buf, - double **dble_buf, int **sindex_array, int **sindex_array2, + +int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, + int *total_recv_leng, int **recv_lengths, int **int_buf, + double **dble_buf, int **sindex_array, int **sindex_array2, int *offset, MPI_Comm mpi_comm) { int i, nprocs, mypid, Nrows, *proc_array, *proc_array2; @@ -714,7 +713,7 @@ int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, NrowsOffset = 0; for (i = 0; i < mypid; i++) NrowsOffset += proc_array[i]; for (i = 1; i < nprocs; i++) proc_array[i] += proc_array[i-1]; - free(proc_array2); + hypre_TFree(proc_array2, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* compose the column index map (index_array,index_array2) */ @@ -729,16 +728,16 @@ int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, MH_ExchBdry(dble_array, context); if ( extNrows-Nrows > 0 ) index_array = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST); - else + else index_array = NULL; for (i = Nrows; i < extNrows; i++) index_array[i-Nrows] = dble_array[i]; if ( extNrows-Nrows > 0 ) index_array2 = hypre_TAlloc(int, (extNrows-Nrows) , HYPRE_MEMORY_HOST); - else + else index_array2 = NULL; for (i = 0; i < extNrows-Nrows; i++) index_array2[i] = i; - free( dble_array ); - free(context); + hypre_TFree(dble_array, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* send the lengths of each row to remote processor */ @@ -747,10 +746,10 @@ int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, /* ---------------------------------------------------------------- */ HYPRE_LSI_DDIlutGetRowLengths(mh_mat,total_recv_leng,recv_lengths,mpi_comm); - HYPRE_LSI_DDIlutGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths, + HYPRE_LSI_DDIlutGetOffProcRows(mh_mat, *total_recv_leng, *recv_lengths, NrowsOffset,index_array,index_array2,int_buf, dble_buf,mpi_comm); - free(proc_array); + hypre_TFree(proc_array, HYPRE_MEMORY_HOST); HYPRE_LSI_qsort1a(index_array, index_array2, 0, extNrows-Nrows-1); (*sindex_array) = index_array; (*sindex_array2) = index_array2; @@ -764,7 +763,7 @@ int HYPRE_LSI_DDIlutComposeOverlappedMatrix(MH_Matrix *mh_mat, /*****************************************************************************/ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid; @@ -815,7 +814,8 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, rowNorms[i] = 0.0; while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0) { - free(vals); free(cols); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); @@ -824,8 +824,8 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( j = 0; j < m; j++ ) rowNorms[i] += habs(vals[j]); rowNorms[i] /= extNrows; } - free( vals ); - free( cols ); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* permute the matrix */ @@ -845,7 +845,7 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } if ( order_flag ) - { + { order_list = hypre_TAlloc(int, Nrows , HYPRE_MEMORY_HOST); reorder_list = hypre_TAlloc(int, Nrows , HYPRE_MEMORY_HOST); for ( i = 0; i < Nrows; i++ ) order_list[i] = reorder_list[i] = i; @@ -854,12 +854,12 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, ilut_ptr->reorder_array = reorder_list; Norm2 = hypre_TAlloc(double, Nrows , HYPRE_MEMORY_HOST); for ( i = 0; i < Nrows; i++ ) Norm2[i] = rowNorms[order_list[i]]; - free( rowNorms ); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); rowNorms = Norm2; } /* - for ( i = 0; i < Nrows; i++ ) - for ( j = Amat_ia[i]; j < Amat_ia[i+1]; j++ ) + for ( i = 0; i < Nrows; i++ ) + for ( j = Amat_ia[i]; j < Amat_ia[i+1]; j++ ) printf("%10d %10d %25.16e\n", i+1, Amat_ja[j]+1, Amat_aa[j]); */ @@ -893,7 +893,7 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows; else ext_ja[j] = -1; } - if ( ext_ja[j] != -1 ) + if ( ext_ja[j] != -1 ) { rowNorms[i+Nrows] += habs(ext_aa[j]); nnz_row++; @@ -918,17 +918,17 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( i = 0; i < Nrows; i++ ) { - if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) + if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : 0DDILUT Processing row %d(%d)\n",mypid,i,extNrows); - + track_leng = 0; cols = &(Amat_ja[Amat_ia[i]]); vals = &(Amat_aa[Amat_ia[i]]); m = Amat_ia[i+1] - Amat_ia[i]; - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { - if ( cols[j] < extNrows ) + if ( cols[j] < extNrows ) { dble_buf[cols[j]] = vals[j]; track_array[track_leng++] = cols[j]; @@ -939,7 +939,7 @@ int HYPRE_LSI_DDIlutDecompose(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i ) Lcount++; else if ( index > i ) Ucount++; @@ -959,28 +959,28 @@ touch_cnt++; for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); else { dble_buf[colIndex] = - (ddata * mat_aa[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { - if ( cols[j] < extNrows ) + if ( cols[j] < extNrows ) { - vals[j] = dble_buf[cols[j]]; - if ( cols[j] != i ) dble_buf[cols[j]] = 0.0; + vals[j] = dble_buf[cols[j]]; + if ( cols[j] != i ) dble_buf[cols[j]] = 0.0; } } sortcnt = 0; @@ -1002,12 +1002,12 @@ touch_cnt++; else dble_buf[index] = 0.0; } } - if ( sortcnt > Lcount ) + if ( sortcnt > Lcount ) { HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount); for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] < i && vals[j] != 0.0 ) { @@ -1026,7 +1026,7 @@ touch_cnt++; } } diagonal[i] = dble_buf[i]; - if ( habs(diagonal[i]) < 1.0e-16 ) + if ( habs(diagonal[i]) < 1.0e-16 ) { diagonal[i] = 1.0E-6; num_small_pivot++; @@ -1057,7 +1057,7 @@ touch_cnt++; HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount); for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] > i && vals[j] != 0.0 ) { @@ -1078,9 +1078,9 @@ touch_cnt++; dble_buf[i] = 0.0; mat_ia[i+1] = nnz_count; } - free(Amat_ia); - free(Amat_ja); - free(Amat_aa); + hypre_TFree(Amat_ia, HYPRE_MEMORY_HOST); + hypre_TFree(Amat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(Amat_aa, HYPRE_MEMORY_HOST); printf("touch_cnt = %d\n", touch_cnt); /* ---------------------------------------------------------------- */ @@ -1093,16 +1093,16 @@ touch_cnt++; for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0; for ( i = 0; i < total_recv_leng; i++ ) { - if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 ) + if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows); - + track_leng = m = 0; for ( j = offset; j < offset+recv_lengths[i]; j++ ) { - if ( ext_ja[j] != -1 ) + if ( ext_ja[j] != -1 ) { if (order_flag && ext_ja[j] < Nrows) index = reorder_list[ext_ja[j]]; - else index = ext_ja[j]; + else index = ext_ja[j]; dble_buf[index] = ext_aa[j]; track_array[track_leng++] = index; cols[m] = index; @@ -1114,7 +1114,7 @@ touch_cnt++; for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i+Nrows ) Lcount++; else if ( index > i+Nrows ) Ucount++; @@ -1133,14 +1133,14 @@ touch_cnt++; for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); else { dble_buf[colIndex] = - (ddata * mat_aa[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } @@ -1149,12 +1149,12 @@ touch_cnt++; } else dble_buf[j] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { - if ( cols[j] < extNrows ) + if ( cols[j] < extNrows ) { - vals[j] = dble_buf[cols[j]]; - if ( cols[j] != i+Nrows ) dble_buf[cols[j]] = 0.0; + vals[j] = dble_buf[cols[j]]; + if ( cols[j] != i+Nrows ) dble_buf[cols[j]] = 0.0; } } sortcnt = 0; @@ -1172,7 +1172,7 @@ touch_cnt++; { sortcols[sortcnt] = index; sortvals[sortcnt++] = dble_buf[index] * rowNorms[index]; - } + } else dble_buf[index] = 0.0; } } @@ -1181,7 +1181,7 @@ touch_cnt++; HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount); for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] < i+Nrows && vals[j] != 0.0 ) { @@ -1200,7 +1200,7 @@ touch_cnt++; } } diagonal[i+Nrows] = dble_buf[i+Nrows]; - if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) + if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) { diagonal[i+Nrows] = 1.0E-6; num_small_pivot++; @@ -1232,7 +1232,7 @@ touch_cnt++; HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount); for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] > i+Nrows && cols[j] < extNrows && vals[j] != 0.0 ) { @@ -1255,7 +1255,7 @@ touch_cnt++; } if ( nnz_count > total_nnz ) printf("WARNING in ILUTDecomp : memory bound passed.\n"); - if ( ilut_ptr->outputLevel > 0 ) + if ( ilut_ptr->outputLevel > 0 ) { printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count); printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot); @@ -1265,15 +1265,15 @@ touch_cnt++; /* deallocate temporary storage space */ /* ---------------------------------------------------------- */ - free(cols); - free(vals); - free(sortcols); - free(sortvals); - free(dble_buf); - free(diagonal); - free(rowNorms); - free(context); - free(track_array); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(sortcols, HYPRE_MEMORY_HOST); + hypre_TFree(sortvals, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(diagonal, HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(track_array, HYPRE_MEMORY_HOST); return 0; } @@ -1284,7 +1284,7 @@ touch_cnt++; /*****************************************************************************/ int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid; @@ -1332,7 +1332,8 @@ int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, rowNorms[i] = 0.0; while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0) { - free(vals); free(cols); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); @@ -1349,19 +1350,19 @@ int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, ilut_ptr->mat_ia = hypre_TAlloc(int, (extNrows + 1 ) , HYPRE_MEMORY_HOST); ilut_ptr->mat_ja = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST); ilut_ptr->mat_aa = hypre_TAlloc(double, total_nnz , HYPRE_MEMORY_HOST); - + ncnt = 0; ilut_ptr->mat_ia[0] = 0; - for ( i = 0; i < Nrows; i++ ) + for ( i = 0; i < Nrows; i++ ) { - for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) - if ( mat_ja[j] >= 0 && mat_ja[j] < extNrows ) + for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) + if ( mat_ja[j] >= 0 && mat_ja[j] < extNrows ) ilut_ptr->mat_ja[ncnt++] = mat_ja[j]; ilut_ptr->mat_ia[i+1] = ncnt; } - if ( mat_ia != NULL ) free( mat_ia ); - if ( mat_ja != NULL ) free( mat_ja ); - if ( mat_aa != NULL ) free( mat_aa ); + hypre_TFree(mat_ia, HYPRE_MEMORY_HOST); + hypre_TFree(mat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(mat_aa, HYPRE_MEMORY_HOST); mat_ia = ilut_ptr->mat_ia; mat_ja = ilut_ptr->mat_ja; mat_aa = ilut_ptr->mat_aa; @@ -1380,9 +1381,9 @@ int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, ndisc = 0; for ( i = 0; i < Nrows; i++ ) { - if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) + if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : 1DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows); - + MH_GetRow(context,1,&i,allocated_space,cols,vals,&m); /* ------------------------------------------------------------- */ @@ -1391,10 +1392,10 @@ int HYPRE_LSI_DDIlutDecompose2(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, track_leng = 0; first = extNrows; - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { index = cols[j]; - if ( index < extNrows ) + if ( index < extNrows ) { dble_buf[index] = vals[j]; track_array[track_leng++] = index; @@ -1422,25 +1423,25 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); else { dble_buf[colIndex] = - (ddata * mat_aa[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } diagonal[i] = dble_buf[i]; - if ( habs(diagonal[i]) < 1.0e-16 ) + if ( habs(diagonal[i]) < 1.0e-16 ) { diagonal[i] = dble_buf[i] = 1.0E-6; num_small_pivot++; @@ -1451,11 +1452,11 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; nnz_count = mat_ia[Nrows]; ncnt = 0; k = 0; - for ( i = 0; i < Nrows; i++ ) + for ( i = 0; i < Nrows; i++ ) { - for ( j = k; j < mat_ia[i+1]; j++ ) + for ( j = k; j < mat_ia[i+1]; j++ ) { - if ( mat_aa[j] != 0.0 ) + if ( mat_aa[j] != 0.0 ) { mat_ja[ncnt] = mat_ja[j]; mat_aa[ncnt++] = mat_aa[j]; @@ -1464,7 +1465,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; k = mat_ia[i+1]; mat_ia[i+1] = ncnt; } - if ( ilut_ptr->outputLevel > 0 ) + if ( ilut_ptr->outputLevel > 0 ) { printf("%4d : DDILUT after Nrows - nnz = %d %d\n", mypid, nnz_count, ncnt); printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot); @@ -1506,14 +1507,14 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0; for ( i = 0; i < total_recv_leng; i++ ) { - if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 ) + if ( (i+Nrows) % printstep == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows); track_leng = m = 0; for ( j = offset; j < offset+recv_lengths[i]; j++ ) { index = ext_ja[j]; - if ( index != -1 ) + if ( index != -1 ) { cols[m] = index; vals[m++] = ext_aa[j]; @@ -1526,7 +1527,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i+Nrows ) Lcount++; else if ( index > i+Nrows ) Ucount++; @@ -1545,14 +1546,14 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); else { dble_buf[colIndex] = - (ddata * mat_aa[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } @@ -1561,11 +1562,11 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; } else dble_buf[j] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { index = cols[j]; - vals[j] = dble_buf[index]; - if ( index != i+Nrows ) dble_buf[index] = 0.0; + vals[j] = dble_buf[index]; + if ( index != i+Nrows ) dble_buf[index] = 0.0; } sortcnt = 0; for ( j = 0; j < track_leng; j++ ) @@ -1587,7 +1588,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount); for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] < i+Nrows && vals[j] != 0.0 ) { @@ -1606,7 +1607,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; } } diagonal[i+Nrows] = dble_buf[i+Nrows]; - if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) + if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) { diagonal[i+Nrows] = 1.0E-6; num_small_pivot++; @@ -1634,7 +1635,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount); for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( cols[j] > i+Nrows && vals[j] != 0.0 ) { @@ -1658,7 +1659,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; if ( nnz_count > total_nnz ) printf("WARNING in ILUTDecomp : memory bound passed.\n"); - if ( ilut_ptr->outputLevel > 0 ) + if ( ilut_ptr->outputLevel > 0 ) { printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count); printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot); @@ -1668,15 +1669,15 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; /* deallocate temporary storage space */ /* ---------------------------------------------------------- */ - free(cols); - free(vals); - free(sortcols); - free(sortvals); - free(dble_buf); - free(diagonal); - free(rowNorms); - free(context); - free(track_array); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(sortcols, HYPRE_MEMORY_HOST); + hypre_TFree(sortvals, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(diagonal, HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(track_array, HYPRE_MEMORY_HOST); return 0; } @@ -1687,7 +1688,7 @@ if ( (mat_ia[i+1]-mat_ia[i]) != track_leng) ndisc++; /*****************************************************************************/ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid; @@ -1733,7 +1734,8 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, rowNorms[i] = 0.0; while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0) { - free(vals); free(cols); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); @@ -1784,17 +1786,17 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( i = 0; i < Nrows; i++ ) { - if ( i % 1000 == 0 && ilut_ptr->outputLevel > 0 ) + if ( i % 1000 == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : 2DDILUT Processing row %d(%d)\n",mypid,i,extNrows); - + track_leng = 0; MH_GetRow(context,1,&i,allocated_space,cols,vals,&m); if ( m < 0 ) printf("IlutDecompose WARNING(1): row nnz = %d\n",m); - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { - if ( cols[j] < extNrows ) + if ( cols[j] < extNrows ) { dble_buf[cols[j]] = vals[j]; track_array[track_leng++] = cols[j]; @@ -1805,7 +1807,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i ) Lcount++; else if ( index > i ) Ucount++; @@ -1824,7 +1826,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); @@ -1836,7 +1838,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } @@ -1876,7 +1878,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } } diagonal[i] = dble_buf[i]; - if ( habs(diagonal[i]) < 1.0e-16 ) + if ( habs(diagonal[i]) < 1.0e-16 ) { diagonal[i] = 1.0E-6; num_small_pivot++; @@ -1929,13 +1931,13 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( i = 0; i < extNrows; i++ ) dble_buf[i] = 0.0; for ( i = 0; i < total_recv_leng; i++ ) { - if ( (i+Nrows) % 1000 == 0 && ilut_ptr->outputLevel > 0 ) + if ( (i+Nrows) % 1000 == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : *DDILUT Processing row %d(%d)\n",mypid,i+Nrows,extNrows); - + track_leng = 0; for ( j = offset; j < offset+recv_lengths[i]; j++ ) { - if ( ext_ja[j] != -1 ) + if ( ext_ja[j] != -1 ) { dble_buf[ext_ja[j]] = ext_aa[j]; track_array[track_leng++] = ext_ja[j]; @@ -1946,7 +1948,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i+Nrows ) Lcount++; else if ( index > i+Nrows ) Ucount++; @@ -1965,7 +1967,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( k = mat_ia[j]; k < mat_ia[j+1]; k++ ) { colIndex = mat_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa[k]); @@ -1995,7 +1997,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, { cols[sortcnt] = index; vals[sortcnt++] = dble_buf[index] * rowNorms[index]; - } + } else dble_buf[index] = 0.0; } } @@ -2015,7 +2017,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } } diagonal[i+Nrows] = dble_buf[i+Nrows]; - if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) + if ( habs(diagonal[i+Nrows]) < 1.0e-16 ) { diagonal[i+Nrows] = 1.0E-6; num_small_pivot++; @@ -2062,7 +2064,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } if ( nnz_count > total_nnz ) printf("WARNING in ILUTDecomp : memory bound passed.\n"); - if ( ilut_ptr->outputLevel > 0 ) + if ( ilut_ptr->outputLevel > 0 ) { printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count); printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot); @@ -2072,13 +2074,13 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, /* deallocate temporary storage space */ /* ---------------------------------------------------------- */ - free(cols); - free(vals); - free(dble_buf); - free(diagonal); - free(rowNorms); - free(context); - free(track_array); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(diagonal, HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(track_array, HYPRE_MEMORY_HOST); return 0; } @@ -2090,7 +2092,7 @@ int HYPRE_LSI_DDIlutDecompose3(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, /*****************************************************************************/ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int *mat_ia, *mat_ja, i, m, allocated_space, *cols, mypid; @@ -2136,7 +2138,8 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, rowNorms[i] = 0.0; while (MH_GetRow(context,1,&i,allocated_space,cols,vals,&m)==0) { - free(vals); free(cols); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); allocated_space += 200 + 1; cols = hypre_TAlloc(int, allocated_space , HYPRE_MEMORY_HOST); vals = hypre_TAlloc(double, allocated_space , HYPRE_MEMORY_HOST); @@ -2157,7 +2160,7 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( i = 0; i < Nrows; i++ ) { MH_GetRow(context,1,&i,allocated_space,cols,vals,&m); - for ( j = 0; j < m; j++ ) + for ( j = 0; j < m; j++ ) { if ( vals[j] != 0.0 ) { @@ -2182,7 +2185,7 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, if ( m >= 0 ) ext_ja[j] = map2[m] + Nrows; else ext_ja[j] = -1; } - if ( ext_ja[j] != -1 && ext_aa[j] != 0.0 ) + if ( ext_ja[j] != -1 && ext_aa[j] != 0.0 ) { rowNorms[i+Nrows] += habs(ext_aa[j]); mat_ja[ncnt] = ext_ja[j]; @@ -2203,21 +2206,21 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, printstep = extNrows / 10; for ( i = 0; i < extNrows; i++ ) { - if ( ( i % printstep == 0 ) && ilut_ptr->outputLevel > 0 ) + if ( ( i % printstep == 0 ) && ilut_ptr->outputLevel > 0 ) printf("%4d : DDILUT Processing pattern row = %d (%d)\n",mypid,i,extNrows); k = mat_ia[i+1] - mat_ia[i]; for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) { index = mat_ja[j]; k += ( mat_ia[index+1] - mat_ia[index] ); - } + } if ( (k+ncnt) > total_nnz ) { - iarray = mat_ja2; + iarray = mat_ja2; total_nnz += (extNrows - i ) * k; mat_ja2 = hypre_TAlloc(int, total_nnz , HYPRE_MEMORY_HOST); for ( j = 0; j < ncnt; j++ ) mat_ja2[j] = iarray[j]; - free( iarray ); + hypre_TFree(iarray, HYPRE_MEMORY_HOST); } for ( j = mat_ia[i]; j < mat_ia[i+1]; j++ ) { @@ -2225,7 +2228,7 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, mat_ja2[ncnt++] = index; for (k = mat_ia[index]; k < mat_ia[index+1]; k++) mat_ja2[ncnt++] = mat_ja[k]; - } + } hypre_qsort0(mat_ja2, mat_ia2[i], ncnt-1); k = mat_ia2[i] + 1; for ( j = mat_ia2[i]+1; j < ncnt; j++ ) @@ -2234,11 +2237,11 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, } mat_ia2[i+1] = k; ncnt = k; - } + } for ( i = 0; i < ncnt; i++ ) if ( mat_ja2[i] < 0 || mat_ja2[i] >= extNrows ) printf("%4d : DDILUT ERROR ja %d = %d \n",mypid,i,mat_ja2[i]); - + mat_aa2 = hypre_TAlloc(double, ncnt , HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ @@ -2251,9 +2254,9 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( i = 0; i < extNrows; i++ ) { - if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) + if ( i % printstep == 0 && ilut_ptr->outputLevel > 0 ) printf("%4d : $DDILUT Processing row %d(%d,%d)\n",mypid,i,extNrows,Nrows); - + /* ------------------------------------------------------------- */ /* load the row into buffer */ /* ------------------------------------------------------------- */ @@ -2281,35 +2284,35 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, for ( k = mat_ia2[j]; k < mat_ia2[j+1]; k++ ) { colIndex = mat_ja2[k]; - if ( colIndex > j && mat_aa2[k] != 0.0 ) + if ( colIndex > j && mat_aa2[k] != 0.0 ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * mat_aa2[k]); else { dble_buf[colIndex] = - (ddata * mat_aa2[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } diagonal[i] = dble_buf[i]; - if ( habs(diagonal[i]) < 1.0e-16 ) + if ( habs(diagonal[i]) < 1.0e-16 ) { diagonal[i] = dble_buf[i] = 1.0E-6; num_small_pivot++; } - for (j = mat_ia2[i]; j < mat_ia2[i+1]; j++) + for (j = mat_ia2[i]; j < mat_ia2[i+1]; j++) mat_aa2[j] = dble_buf[mat_ja2[j]]; for ( j = 0; j < track_leng; j++ ) dble_buf[track_array[j]] = 0.0; } nnz_count = mat_ia2[extNrows]; - if ( ilut_ptr->outputLevel > 0 ) + if ( ilut_ptr->outputLevel > 0 ) { printf("%4d : DDILUT number of nonzeros = %d\n",mypid,nnz_count); printf("%4d : DDILUT number of small pivots = %d\n",mypid,num_small_pivot); @@ -2322,16 +2325,16 @@ int HYPRE_LSI_DDIlutDecomposeNew(HYPRE_LSI_DDIlut *ilut_ptr,MH_Matrix *Amat, ilut_ptr->mat_ia = mat_ia2; ilut_ptr->mat_ja = mat_ja2; ilut_ptr->mat_aa = mat_aa2; - free(mat_ia); - free(mat_ja); - free(mat_aa); - free(cols); - free(vals); - free(dble_buf); - free(diagonal); - free(rowNorms); - free(context); - free(track_array); + hypre_TFree(mat_ia, HYPRE_MEMORY_HOST); + hypre_TFree(mat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(mat_aa, HYPRE_MEMORY_HOST); + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(diagonal, HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(track_array, HYPRE_MEMORY_HOST); return 0; } diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_ml.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_ml.c index 9f4e4aaac..f78a07ebe 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_ml.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_ml.c @@ -5,11 +5,11 @@ * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ -/****************************************************************************/ +/****************************************************************************/ /* HYPRE_LSI_ML interface */ /*--------------------------------------------------------------------------*/ /* local functions : - * + * * MH_Irecv * MH_Send * MH_Wait @@ -34,7 +34,6 @@ #include #include -#include #include #include "../../parcsr_ls/HYPRE_parcsr_ls.h" @@ -46,7 +45,7 @@ #include "../../matrix_matrix/HYPRE_matrix_matrix_protos.h" #include "../../seq_mv/vector.h" -#include "../../parcsr_mv/_hypre_parcsr_mv.h" +#include "../../parcsr_mv/_hypre_parcsr_mv.h" /* #include "../../parcsr_mv/par_vector.h" */ extern void hypre_qsort0(int *, int, int); @@ -54,9 +53,9 @@ extern void hypre_qsort0(int *, int, int); #include "HYPRE_MHMatrix.h" extern int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix, MH_Matrix *, - MPI_Comm, int *,MH_Context*); + MPI_Comm, int *,MH_Context*); -/****************************************************************************/ +/****************************************************************************/ /* communication functions on parallel platforms */ /*--------------------------------------------------------------------------*/ @@ -68,7 +67,7 @@ int MH_Irecv(void* buf, unsigned int count, int *src, int *mid, #else int my_id, lsrc, retcode; - if ( *src < 0 ) lsrc = MPI_ANY_SOURCE; else lsrc = (*src); + if ( *src < 0 ) lsrc = MPI_ANY_SOURCE; else lsrc = (*src); retcode = MPI_Irecv( buf, (int) count, MPI_BYTE, lsrc, *mid, comm, request); if ( retcode != 0 ) { @@ -95,7 +94,7 @@ int MH_Wait(void* buf, unsigned int count, int *src, int *mid, printf("%d : MH_Wait warning : retcode = %d\n", my_id, retcode); } MPI_Get_count(&status, MPI_BYTE, &incount); - if ( *src < 0 ) *src = status.MPI_SOURCE; + if ( *src < 0 ) *src = status.MPI_SOURCE; return incount; #endif } @@ -116,7 +115,7 @@ int MH_Send(void* buf, unsigned int count, int dest, int mid, MPI_Comm comm ) #endif } -/****************************************************************************/ +/****************************************************************************/ /* wrapper function for interprocessor communication for matvec and getrow */ /*--------------------------------------------------------------------------*/ @@ -130,7 +129,7 @@ int MH_ExchBdry(double *vec, void *obj) MH_Context *context; MH_Matrix *Amat; MPI_Comm comm; - MPI_Request *request; + MPI_Request *request; int sendProcCnt, recvProcCnt; int *sendProc, *recvProc; @@ -171,7 +170,7 @@ int MH_ExchBdry(double *vec, void *obj) dbuf[j] = vec[tempList[j]]; } MH_Send((void*) dbuf, leng, dest, msgid, comm); - if ( dbuf != NULL ) free( dbuf ); + hypre_TFree(dbuf , HYPRE_MEMORY_HOST); } offset = nRows; for ( i = 0; i < recvProcCnt; i++ ) @@ -181,16 +180,17 @@ int MH_ExchBdry(double *vec, void *obj) MH_Wait((void*) &(vec[offset]), leng, &src, &msgid, comm, &request[i]); offset += recvLeng[i]; } - if ( recvProcCnt > 0 ) free ( request ); + if ( recvProcCnt > 0 ) + hypre_TFree(request , HYPRE_MEMORY_HOST); return 1; #endif } -/****************************************************************************/ +/****************************************************************************/ /* wrapper function for interprocessor communication for matvec and getrow */ /*--------------------------------------------------------------------------*/ -int MH_ExchBdryBack(double *vec, void *obj, int *length, double **outvec, +int MH_ExchBdryBack(double *vec, void *obj, int *length, double **outvec, int **outindices) { #ifdef HYPRE_SEQUENTIAL @@ -203,7 +203,7 @@ int MH_ExchBdryBack(double *vec, void *obj, int *length, double **outvec, MH_Context *context; MH_Matrix *Amat; MPI_Comm comm; - MPI_Request *request; + MPI_Request *request; int sendProcCnt, recvProcCnt; int *sendProc, *recvProc; @@ -231,13 +231,13 @@ int MH_ExchBdryBack(double *vec, void *obj, int *length, double **outvec, (*outindices) = hypre_TAlloc(int, leng , HYPRE_MEMORY_HOST); (*length) = leng; offset = 0; - for ( i = 0; i < sendProcCnt; i++ ) + for ( i = 0; i < sendProcCnt; i++ ) { - for ( j = 0; j < sendLeng[i]; j++ ) + for ( j = 0; j < sendLeng[i]; j++ ) (*outindices)[offset+j] = sendList[i][j]; offset += sendLeng[i]; - } - } + } + } else { (*outvec) = NULL; @@ -270,12 +270,13 @@ int MH_ExchBdryBack(double *vec, void *obj, int *length, double **outvec, MH_Wait((void*) &((*outvec)[offset]), leng, &src, &msgid, comm, &request[i]); offset += sendLeng[i]; } - if ( sendProcCnt > 0 ) free ( request ); + if ( sendProcCnt > 0 ) + hypre_TFree(request, HYPRE_MEMORY_HOST); return 1; #endif } -/****************************************************************************/ +/****************************************************************************/ /* matvec function for local matrix structure MH_Matrix */ /*--------------------------------------------------------------------------*/ @@ -301,19 +302,19 @@ int MH_MatVec(void *obj, int leng1, double p[], int leng2, double ap[]) dbuf = hypre_TAlloc( double , length , HYPRE_MEMORY_HOST); for ( i = 0; i < nRows; i++ ) dbuf[i] = p[i]; MH_ExchBdry(dbuf, obj); - for ( i = 0 ; i < nRows; i++ ) + for ( i = 0 ; i < nRows; i++ ) { sum = 0.0; ibeg = rowptr[i]; iend = rowptr[i+1]; for ( j = ibeg; j < iend; j++ ) - { + { k = colnum[j]; sum += ( values[j] * dbuf[k] ); } ap[i] = sum; } - if ( dbuf != NULL ) free( dbuf ); + hypre_TFree(dbuf, HYPRE_MEMORY_HOST); return 1; } @@ -361,7 +362,7 @@ int HYPRE_LSI_MLCreate( MPI_Comm comm, HYPRE_Solver *solver) /* create an internal ML data structure */ MH_Link *link = hypre_TAlloc( MH_Link , 1, HYPRE_MEMORY_HOST); - if ( link == NULL ) return 1; + if ( link == NULL ) return 1; /* fill in all other default parameters */ @@ -380,7 +381,7 @@ int HYPRE_LSI_MLCreate( MPI_Comm comm, HYPRE_Solver *solver) link->ag_threshold = 0.08; /* threshold for aggregation */ link->contxt = NULL; /* context for matvec */ link->coarse_solver = 0; /* default = SuperLU */ - + /* create the ML structure */ ML_Create( &(link->ml_ptr), link->nlevels ); @@ -408,25 +409,25 @@ int HYPRE_LSI_MLDestroy( HYPRE_Solver solver ) if ( link->ml_ag != NULL ) ML_Aggregate_Destroy( &(link->ml_ag) ); if ( link->ml_amg != NULL ) ML_AMG_Destroy( &(link->ml_amg) ); ML_Destroy( &(link->ml_ptr) ); - if ( link->contxt->partition != NULL ) free( link->contxt->partition ); + hypre_TFree(link->contxt->partition, HYPRE_MEMORY_HOST); if ( link->contxt->Amat != NULL ) { Amat = (MH_Matrix *) link->contxt->Amat; - if ( Amat->sendProc != NULL ) free (Amat->sendProc); - if ( Amat->sendLeng != NULL ) free (Amat->sendLeng); - if ( Amat->sendList != NULL ) + hypre_TFree(Amat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendLeng, HYPRE_MEMORY_HOST); + if ( Amat->sendList != NULL ) { for (i = 0; i < Amat->sendProcCnt; i++ ) - if (Amat->sendList[i] != NULL) free (Amat->sendList[i]); - free (Amat->sendList); + hypre_TFree(Amat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendList, HYPRE_MEMORY_HOST); } - if ( Amat->recvProc != NULL ) free (Amat->recvProc); - if ( Amat->recvLeng != NULL ) free (Amat->recvLeng); - if ( Amat->map != NULL ) free (Amat->map); - free( Amat ); + hypre_TFree(Amat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->recvLeng, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->map, HYPRE_MEMORY_HOST); + hypre_TFree(Amat, HYPRE_MEMORY_HOST); } - if ( link->contxt != NULL ) free( link->contxt ); - free( link ); + hypre_TFree(link->contxt, HYPRE_MEMORY_HOST); + hypre_TFree(link, HYPRE_MEMORY_HOST); return 0; #else @@ -451,25 +452,25 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, MH_Context *context; MH_Matrix *mh_mat; - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* fetch the ML pointer */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ MH_Link *link = (MH_Link *) solver; ML *ml = link->ml_ptr; nlevels = link->nlevels; - - /* -------------------------------------------------------- */ + + /* -------------------------------------------------------- */ /* set up the parallel environment */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ MPI_Comm_rank(link->comm, &my_id); MPI_Comm_size(link->comm, &nprocs); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* fetch the matrix row partition information and put it */ /* into the matrix data object (for matvec and getrow) */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ HYPRE_ParCSRMatrixGetRowPartitioning( A, &row_partition ); localEqns = row_partition[my_id+1] - row_partition[my_id]; @@ -483,11 +484,11 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST); context->Amat = mh_mat; HYPRE_LSI_MLConstructMHMatrix(A,mh_mat,link->comm, - context->partition,context); + context->partition,context); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* set up the ML communicator information */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Set_Comm_Communicator(ml, link->comm); ML_Set_Comm_MyRank(ml, my_id); @@ -496,9 +497,9 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, ML_Set_Comm_Recv(ml, MH_Irecv); ML_Set_Comm_Wait(ml, MH_Wait); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* set up the ML matrix information */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Init_Amatrix(ml, nlevels-1, localEqns, localEqns, (void *) context); ML_Set_Amatrix_Matvec(ml, nlevels-1, MH_MatVec); @@ -506,12 +507,12 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, for (i=0; irecvProcCnt; i++ ) length += mh_mat->recvLeng[i]; ML_Set_Amatrix_Getrow(ml, nlevels-1, MH_GetRow, MH_ExchBdry, length); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* create an AMG or aggregate context */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ if ( link->method == 0 ) - { + { ML_AMG_Create(&(link->ml_amg)); ML_AMG_Set_Threshold( link->ml_amg, link->ag_threshold ); if ( link->num_PDEs > 1 ) @@ -519,7 +520,7 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, else ML_AMG_Set_AMGScheme_Scalar(link->ml_amg); ML_AMG_Set_MaxLevels( link->ml_amg, link->nlevels ); - coarsest_level = ML_Gen_MGHierarchy_UsingAMG(ml, nlevels-1, + coarsest_level = ML_Gen_MGHierarchy_UsingAMG(ml, nlevels-1, ML_DECREASING, link->ml_amg); } else @@ -542,24 +543,24 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, default: ML_Aggregate_Set_CoarsenScheme_Uncoupled(link->ml_ag); break; } - coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, nlevels-1, + coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, nlevels-1, ML_DECREASING, link->ml_ag); } - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* perform aggregation */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ if ( my_id == 0 ) printf("ML : number of levels = %d\n", coarsest_level); coarsest_level = nlevels - coarsest_level; - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* set up smoother and coarse solver */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ - for (level = nlevels-1; level > coarsest_level; level--) + for (level = nlevels-1; level > coarsest_level; level--) { sweeps = link->pre_sweeps; wght = link->jacobi_wt; @@ -594,7 +595,7 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, { Nblocks = ML_Aggregate_Get_AggrCount( link->ml_ag, level ); ML_Aggregate_Get_AggrMap( link->ml_ag, level, &blockList ); - ML_Gen_Smoother_VBlockSymGaussSeidel(ml,level, ML_PRESMOOTHER, + ML_Gen_Smoother_VBlockSymGaussSeidel(ml,level, ML_PRESMOOTHER, sweeps, 1.0, Nblocks, blockList); } else @@ -618,7 +619,7 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, } break; case 6 : - ML_Gen_Smoother_OverlappedDDILUT(ml,level, ML_PRESMOOTHER); + ML_Gen_Smoother_OverlappedDDILUT(ml,level, ML_PRESMOOTHER); break; case 7 : ML_Gen_Smoother_VBlockAdditiveSchwarz(ml,level,ML_PRESMOOTHER, @@ -721,7 +722,7 @@ int HYPRE_LSI_MLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, ML_Gen_Smoother_GaussSeidel(ml,coarsest_level,ML_PRESMOOTHER,50,1.0); } ML_Gen_Solver(ml, ML_MGV, nlevels-1, coarsest_level); - + return 0; #else printf("ML not linked.\n"); @@ -757,7 +758,7 @@ int HYPRE_LSI_MLSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, ML_Krylov_Solve(ml_kry, leng, rhs, sol); ML_Krylov_Destroy(&ml_kry); */ - + ML_Solve_AMGV(ml, rhs, sol); /*ML_Iterate(ml, sol, rhs);*/ @@ -775,16 +776,16 @@ int HYPRE_LSI_MLSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, int HYPRE_LSI_MLSetStrongThreshold(HYPRE_Solver solver,double strong_threshold) { MH_Link *link = (MH_Link *) solver; - + if ( strong_threshold < 0.0 ) { printf("HYPRE_LSI_MLSetStrongThreshold WARNING : reset to 0.\n"); link->ag_threshold = 0.0; - } + } else { link->ag_threshold = strong_threshold; - } + } return( 0 ); } @@ -826,11 +827,11 @@ int HYPRE_LSI_MLSetNumPreSmoothings( HYPRE_Solver solver, int num_sweeps ) { printf("HYPRE_LSI_MLSetNumPreSmoothings WARNING : reset to 0.\n"); link->pre_sweeps = 0; - } + } else { link->pre_sweeps = num_sweeps; - } + } return( 0 ); } @@ -846,11 +847,11 @@ int HYPRE_LSI_MLSetNumPostSmoothings( HYPRE_Solver solver, int num_sweeps ) { printf("HYPRE_LSI_MLSetNumPostSmoothings WARNING : reset to 0.\n"); link->post_sweeps = 0; - } + } else { link->post_sweeps = num_sweeps; - } + } return( 0 ); } @@ -866,11 +867,11 @@ int HYPRE_LSI_MLSetPreSmoother( HYPRE_Solver solver, int smoother_type ) { printf("HYPRE_LSI_MLSetPreSmoother WARNING : set to Jacobi.\n"); link->pre = 0; - } + } else { link->pre = smoother_type; - } + } return( 0 ); } @@ -886,11 +887,11 @@ int HYPRE_LSI_MLSetPostSmoother( HYPRE_Solver solver, int smoother_type ) { printf("HYPRE_LSI_MLSetPostSmoother WARNING : set to Jacobi.\n"); link->post = 0; - } + } else { link->post = smoother_type; - } + } return( 0 ); } @@ -906,11 +907,11 @@ int HYPRE_LSI_MLSetDampingFactor( HYPRE_Solver solver, double factor ) { printf("HYPRE_LSI_MLSetDampingFactor WARNING : set to 0.5.\n"); link->jacobi_wt = 0.5; - } + } else { link->jacobi_wt = factor; - } + } return( 0 ); } @@ -926,11 +927,11 @@ int HYPRE_LSI_MLSetCoarseSolver( HYPRE_Solver solver, int solver_id ) { printf("HYPRE_LSI_MLSetCoarseSolver WARNING : reset to Aggr\n"); link->coarse_solver = 1; - } + } else { link->coarse_solver = solver_id; - } + } return( 0 ); } @@ -946,11 +947,11 @@ int HYPRE_LSI_MLSetCoarsenScheme( HYPRE_Solver solver, int scheme ) { printf("HYPRE_LSI_MLSetCoarsenScheme WARNING : reset to uncoupled\n"); link->coarsen_scheme = 1; - } + } else { link->coarsen_scheme = scheme; - } + } return( 0 ); } @@ -966,11 +967,11 @@ int HYPRE_LSI_MLSetBGSBlockSize( HYPRE_Solver solver, int size ) { printf("HYPRE_LSI_MLSetBGSBlockSize WARNING : reset to 1.\n"); link->BGS_blocksize = 1; - } + } else { link->BGS_blocksize = size; - } + } return( 0 ); } @@ -979,7 +980,7 @@ int HYPRE_LSI_MLSetBGSBlockSize( HYPRE_Solver solver, int size ) /*--------------------------------------------------------------------------*/ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, - MPI_Comm comm, int *partition,MH_Context *obj) + MPI_Comm comm, int *partition,MH_Context *obj) { int i, j, index, my_id, nprocs; int rowLeng, *colInd, startRow, endRow, localEqns; @@ -996,7 +997,7 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, /* -------------------------------------------------------- */ /* get machine information and local matrix information */ /* -------------------------------------------------------- */ - + #ifdef HYPRE_SEQUENTIAL my_id = 0; nprocs = 1; @@ -1063,31 +1064,31 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, ncnt = 0; for ( i = 1; i < externLeng; i++ ) { - if ( externList[i] != externList[ncnt] ) + if ( externList[i] != externList[ncnt] ) externList[++ncnt] = externList[i]; } if ( externLeng > 0 ) externLeng = ncnt + 1; /* -------------------------------------------------------- */ /* allocate the CSR matrix */ - /* -------------------------------------------------------- */ - - nnz = 0; - for ( i = 0; i < localEqns; i++ ) nnz += diagSize[i] + offdiagSize[i]; - rowptr = hypre_TAlloc(int, (localEqns + 1) , HYPRE_MEMORY_HOST); - columns = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST); - values = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); - rowptr[0] = 0; - for ( i = 1; i <= localEqns; i++ ) + /* -------------------------------------------------------- */ + + nnz = 0; + for ( i = 0; i < localEqns; i++ ) nnz += diagSize[i] + offdiagSize[i]; + rowptr = hypre_TAlloc(int, (localEqns + 1) , HYPRE_MEMORY_HOST); + columns = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST); + values = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); + rowptr[0] = 0; + for ( i = 1; i <= localEqns; i++ ) rowptr[i] = rowptr[i-1] + diagSize[i-1] + offdiagSize[i-1]; - free( diagSize ); - free( offdiagSize ); + hypre_TFree(diagSize, HYPRE_MEMORY_HOST); + hypre_TFree(offdiagSize, HYPRE_MEMORY_HOST); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* put the matrix data in the CSR matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ - rowptr[0] = 0; + rowptr[0] = 0; ncnt = 0; for ( i = startRow; i <= endRow; i++ ) { @@ -1095,7 +1096,7 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, for (j = 0; j < rowLeng; j++) { index = colInd[j]; - if ( colVal[j] != 0.0 ) + if ( colVal[j] != 0.0 ) { if ( index < startRow || index > endRow ) { @@ -1114,11 +1115,11 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, rowptr[i-startRow+1] = ncnt; HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal); } - assert( ncnt == nnz ); - - /* -------------------------------------------------------- */ + hypre_assert( ncnt == nnz ); + + /* -------------------------------------------------------- */ /* initialize the MH_Matrix data structure */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ mh_mat->Nrows = localEqns; mh_mat->rowptr = rowptr; @@ -1132,18 +1133,18 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, mh_mat->recvProc = NULL; mh_mat->sendList = NULL; mh_mat->map = externList; - - /* -------------------------------------------------------- */ + + /* -------------------------------------------------------- */ /* form the remote portion of the matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ #ifndef HYPRE_SEQUENTIAL - if ( nprocs > 1 ) + if ( nprocs > 1 ) { - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* count number of elements to be received from each */ /* remote processor (assume sequential mapping) */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ tempCnt = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST); for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; @@ -1151,7 +1152,7 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, { for ( j = 0; j < nprocs; j++ ) { - if ( externList[i] >= partition[j] && + if ( externList[i] >= partition[j] && externList[i] < partition[j+1] ) { tempCnt[j]++; @@ -1160,9 +1161,9 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* compile a list processors data is to be received from */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ recvProcCnt = 0; for ( i = 0; i < nprocs; i++ ) @@ -1172,64 +1173,64 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, recvProcCnt = 0; for ( i = 0; i < nprocs; i++ ) { - if ( tempCnt[i] > 0 ) + if ( tempCnt[i] > 0 ) { recvProc[recvProcCnt] = i; recvLeng[recvProcCnt++] = tempCnt[i]; } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor has to find out how many processors it */ /* has to send data to */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ sendLeng = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST); for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; for ( i = 0; i < recvProcCnt; i++ ) tempCnt[recvProc[i]] = 1; MPI_Allreduce(tempCnt, sendLeng, nprocs, MPI_INT, MPI_SUM, comm ); sendProcCnt = sendLeng[my_id]; - free( sendLeng ); + hypre_TFree(sendLeng, HYPRE_MEMORY_HOST); if ( sendProcCnt > 0 ) { sendLeng = hypre_TAlloc(int, sendProcCnt , HYPRE_MEMORY_HOST); sendProc = hypre_TAlloc(int, sendProcCnt , HYPRE_MEMORY_HOST); sendList = hypre_TAlloc(int*, sendProcCnt , HYPRE_MEMORY_HOST); } - else + else { sendLeng = sendProc = NULL; sendList = NULL; } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the lengths of data expected */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ msgid = 539; - for ( i = 0; i < recvProcCnt; i++ ) + for ( i = 0; i < recvProcCnt; i++ ) { MPI_Send((void*) &recvLeng[i],1,MPI_INT,recvProc[i],msgid,comm); } - for ( i = 0; i < sendProcCnt; i++ ) + for ( i = 0; i < sendProcCnt; i++ ) { MPI_Recv((void*) &sendLeng[i],1,MPI_INT,MPI_ANY_SOURCE,msgid, comm,&status); sendProc[i] = status.MPI_SOURCE; sendList[i] = hypre_TAlloc(int, sendLeng[i] , HYPRE_MEMORY_HOST); - if ( sendList[i] == NULL ) + if ( sendList[i] == NULL ) printf("allocate problem %d \n", sendLeng[i]); } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the equation numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ - for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; + for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; ncnt = 1; - for ( i = 0; i < externLeng; i++ ) + for ( i = 0; i < externLeng; i++ ) { if ( externList[i] >= partition[ncnt] ) { @@ -1237,23 +1238,23 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, i--; ncnt++; } - } - for ( i = ncnt-1; i < nprocs; i++ ) tempCnt[i] = externLeng; + } + for ( i = ncnt-1; i < nprocs; i++ ) tempCnt[i] = externLeng; - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* send the global equation numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ if ( sendProcCnt > 0 ) Request = hypre_TAlloc(MPI_Request, sendProcCnt , HYPRE_MEMORY_HOST); msgid = 540; - for ( i = 0; i < sendProcCnt; i++ ) + for ( i = 0; i < sendProcCnt; i++ ) { MPI_Irecv((void*)sendList[i],sendLeng[i],MPI_INT,sendProc[i], msgid,comm,&Request[i]); } - for ( i = 0; i < recvProcCnt; i++ ) + for ( i = 0; i < recvProcCnt; i++ ) { if ( recvProc[i] == 0 ) j = 0; else j = tempCnt[recvProc[i]-1]; @@ -1261,18 +1262,19 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, MPI_Send((void*) &externList[j], rowLeng, MPI_INT, recvProc[i], msgid, comm); } - for ( i = 0; i < sendProcCnt; i++ ) + for ( i = 0; i < sendProcCnt; i++ ) { MPI_Wait( &Request[i], &status ); } - if ( sendProcCnt > 0 ) free( Request ); + if ( sendProcCnt > 0 ) + hypre_TFree(Request, HYPRE_MEMORY_HOST); - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ for ( i = 0; i < sendProcCnt; i++ ) - { + { for ( j = 0; j < sendLeng[i]; j++ ) { index = sendList[i][j] - startRow; @@ -1285,9 +1287,9 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ mh_mat->sendProcCnt = sendProcCnt; mh_mat->recvProcCnt = recvProcCnt; @@ -1297,11 +1299,11 @@ int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, mh_mat->recvProc = recvProc; mh_mat->sendList = sendList; - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* clean up */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ - free( tempCnt ); + hypre_TFree(tempCnt, HYPRE_MEMORY_HOST); } return 0; #else diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_mli.cxx b/src/FEI_mv/fei-hypre/HYPRE_LSI_mli.cxx index f8900aa81..c7707c16f 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_mli.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_mli.cxx @@ -48,7 +48,6 @@ #include #include #include -#include #include #if 0 /* RDF: Not sure this is really needed */ diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_mlmaxwell.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_mlmaxwell.c index 5ce9afc8c..37c89374a 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_mlmaxwell.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_mlmaxwell.c @@ -5,11 +5,11 @@ * SPDX-License-Identifier: (Apache-2.0 OR MIT) ******************************************************************************/ -/****************************************************************************/ +/****************************************************************************/ /* HYPRE_LSI_MLMaxwell interface */ /*--------------------------------------------------------------------------*/ /* local functions : - * + * * ML_ExchBdry * ML_MatVec * ML_GetRow @@ -25,7 +25,6 @@ #include #include -#include #include #include "../../parcsr_ls/HYPRE_parcsr_ls.h" @@ -38,12 +37,12 @@ extern void hypre_qsort0(int *, int, int); extern int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix, - HYPRE_ML_Matrix *, int *, MPI_Comm, MLMaxwell_Context*); + HYPRE_ML_Matrix *, int *, MPI_Comm, MLMaxwell_Context*); /****************************************************************************/ /* communication functions on parallel platforms */ /*--------------------------------------------------------------------------*/ - + int ML_Irecv(void* buf, unsigned int count, int *src, int *mid, MPI_Comm comm, MPI_Request *requests) { @@ -51,7 +50,7 @@ int ML_Irecv(void* buf, unsigned int count, int *src, int *mid, return 0; #else int mypid, lsrc, retcode; - + if (*src < 0) lsrc = MPI_ANY_SOURCE; else lsrc = (*src); retcode = MPI_Irecv(buf, (int) count,MPI_BYTE,lsrc,*mid,comm,requests); if (retcode != 0) @@ -62,7 +61,7 @@ int ML_Irecv(void* buf, unsigned int count, int *src, int *mid, return 0; #endif } - + int ML_Wait(void* buf, unsigned int count, int *src, int *mid, MPI_Comm comm, MPI_Request *requests) { @@ -71,7 +70,7 @@ int ML_Wait(void* buf, unsigned int count, int *src, int *mid, #else MPI_Status status; int mypid, incount, retcode; - + retcode = MPI_Wait(requests, &status); if (retcode != 0) { @@ -99,8 +98,8 @@ int ML_Send(void* buf, unsigned int count, int dest, int mid, MPI_Comm comm) return 0; #endif } - -/****************************************************************************/ + +/****************************************************************************/ /* wrapper function for interprocessor communication for matvec and getrow */ /*--------------------------------------------------------------------------*/ @@ -115,7 +114,7 @@ int ML_ExchBdry(double *vec, void *obj) double *dbuf; HYPRE_ML_Matrix *Amat; MPI_Comm comm; - MPI_Request *requests; + MPI_Request *requests; MLMaxwell_Context *context; context = (MLMaxwell_Context *) obj; @@ -150,7 +149,7 @@ int ML_ExchBdry(double *vec, void *obj) tempList = sendList[i]; for (j = 0; j < sendLeng[i]; j++) dbuf[j] = vec[tempList[j]]; ML_Send((void*) dbuf, leng, dest, msgid, comm); - if (dbuf != NULL) free(dbuf); + hypre_TFree(dbuf, HYPRE_MEMORY_HOST); } offset = nRows; for (i = 0; i < recvProcCnt; i++) @@ -160,12 +159,13 @@ int ML_ExchBdry(double *vec, void *obj) ML_Wait((void*) &(vec[offset]), leng, &src, &msgid, comm, &requests[i]); offset += recvLeng[i]; } - if (recvProcCnt > 0) free (requests); + if (recvProcCnt > 0) + hypre_TFree(requests, HYPRE_MEMORY_HOST); return 1; #endif } -/****************************************************************************/ +/****************************************************************************/ /* matvec function for local matrix structure HYPRE_ML_Matrix */ /*--------------------------------------------------------------------------*/ @@ -193,19 +193,19 @@ int ML_MatVec(void *obj, int leng1, double p[], int leng2, double ap[]) dbuf = hypre_TAlloc(double, length , HYPRE_MEMORY_HOST); for (i = 0; i < nRows; i++) dbuf[i] = p[i]; ML_ExchBdry(dbuf, (void *) context); - for (i = 0 ; i < nRows; i++) + for (i = 0 ; i < nRows; i++) { sum = 0.0; ibeg = rowptr[i]; iend = rowptr[i+1]; for (j = ibeg; j < iend; j++) - { + { k = colInd[j]; sum += (colVal[j] * dbuf[k]); } ap[i] = sum; } - if (dbuf != NULL) free(dbuf); + hypre_TFree(dbuf, HYPRE_MEMORY_HOST); return 1; #else @@ -274,7 +274,7 @@ int HYPRE_LSI_MLMaxwellCreate(MPI_Comm comm, HYPRE_Solver *solver) /* create an internal ML data structure */ MLMaxwell_Link *link = hypre_TAlloc(MLMaxwell_Link, 1, HYPRE_MEMORY_HOST); - if (link == NULL) return 1; + if (link == NULL) return 1; /* fill in all other default parameters */ @@ -297,7 +297,7 @@ int HYPRE_LSI_MLMaxwellCreate(MPI_Comm comm, HYPRE_Solver *solver) link->GTmat_array = NULL; link->node_args = NULL; link->edge_args = NULL; - + ML_Create(&(link->ml_ee), link->nlevels); ML_Create(&(link->ml_nn), link->nlevels); @@ -324,66 +324,66 @@ int HYPRE_LSI_MLMaxwellDestroy(HYPRE_Solver solver) if (link->ml_ag != NULL) ML_Aggregate_Destroy(&(link->ml_ag)); if (link->ml_ee != NULL) ML_Destroy(&(link->ml_ee)); if (link->ml_nn != NULL) ML_Destroy(&(link->ml_nn)); - if (link->Aee_contxt->partition != NULL) free(link->Aee_contxt->partition); - if (link->Ann_contxt->partition != NULL) free(link->Ann_contxt->partition); + hypre_TFree(link->Aee_contxt->partition, HYPRE_MEMORY_HOST); + hypre_TFree(link->Ann_contxt->partition, HYPRE_MEMORY_HOST); if (link->Aee_contxt->Amat != NULL) { Amat = (HYPRE_ML_Matrix *) link->Aee_contxt->Amat; - if (Amat->sendProc != NULL ) free (Amat->sendProc); - if (Amat->sendLeng != NULL ) free (Amat->sendLeng); - if (Amat->sendList != NULL ) + hypre_TFree(Amat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendLeng, HYPRE_MEMORY_HOST); + if (Amat->sendList != NULL ) { for (i = 0; i < Amat->sendProcCnt; i++) - if (Amat->sendList[i] != NULL) free (Amat->sendList[i]); - free (Amat->sendList); + hypre_TFree(Amat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendList, HYPRE_MEMORY_HOST); } - if (Amat->recvProc != NULL) free (Amat->recvProc); - if (Amat->recvLeng != NULL) free (Amat->recvLeng); - if (Amat->map != NULL) free (Amat->map); - free(Amat); + hypre_TFree(Amat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->recvLeng, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->map, HYPRE_MEMORY_HOST); + hypre_TFree(Amat, HYPRE_MEMORY_HOST); } - if (link->Aee_contxt != NULL) free(link->Aee_contxt); + hypre_TFree(link->Aee_contxt, HYPRE_MEMORY_HOST); if (link->Ann_contxt->Amat != NULL) { Amat = (HYPRE_ML_Matrix *) link->Ann_contxt->Amat; - if (Amat->sendProc != NULL ) free (Amat->sendProc); - if (Amat->sendLeng != NULL ) free (Amat->sendLeng); - if (Amat->sendList != NULL ) + hypre_TFree(Amat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendLeng, HYPRE_MEMORY_HOST); + if (Amat->sendList != NULL ) { for (i = 0; i < Amat->sendProcCnt; i++) - if (Amat->sendList[i] != NULL) free (Amat->sendList[i]); - free (Amat->sendList); + hypre_TFree(Amat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendList, HYPRE_MEMORY_HOST); } - if (Amat->recvProc != NULL) free (Amat->recvProc); - if (Amat->recvLeng != NULL) free (Amat->recvLeng); - if (Amat->map != NULL) free (Amat->map); - free(Amat); + hypre_TFree(Amat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->recvLeng, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->map, HYPRE_MEMORY_HOST); + hypre_TFree(Amat, HYPRE_MEMORY_HOST); } - if (link->Ann_contxt != NULL) free(link->Ann_contxt); + hypre_TFree(link->Ann_contxt, HYPRE_MEMORY_HOST); if (link->G_contxt->Amat != NULL) { Amat = (HYPRE_ML_Matrix *) link->G_contxt->Amat; - if (Amat->sendProc != NULL ) free (Amat->sendProc); - if (Amat->sendLeng != NULL ) free (Amat->sendLeng); - if (Amat->sendList != NULL ) + hypre_TFree(Amat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendLeng, HYPRE_MEMORY_HOST); + if (Amat->sendList != NULL ) { for (i = 0; i < Amat->sendProcCnt; i++) - if (Amat->sendList[i] != NULL) free (Amat->sendList[i]); - free (Amat->sendList); + hypre_TFree(Amat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(Amat->sendList, HYPRE_MEMORY_HOST); } - if (Amat->recvProc != NULL) free (Amat->recvProc); - if (Amat->recvLeng != NULL) free (Amat->recvLeng); - if (Amat->map != NULL) free (Amat->map); - free(Amat); + hypre_TFree(Amat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->recvLeng, HYPRE_MEMORY_HOST); + hypre_TFree(Amat->map, HYPRE_MEMORY_HOST); + hypre_TFree(Amat, HYPRE_MEMORY_HOST); } - if (link->G_contxt != NULL) free(link->G_contxt); + hypre_TFree(link->G_contxt, HYPRE_MEMORY_HOST); if (link->Gmat != NULL) ML_Operator_Destroy(&(link->Gmat)); if (link->GTmat != NULL) ML_Operator_Destroy(&(link->GTmat)); if (link->Gmat_array != NULL) - ML_MGHierarchy_ReitzingerDestroy(link->nlevels-2, + ML_MGHierarchy_ReitzingerDestroy(link->nlevels-2, &(link->Gmat_array), &(link->GTmat_array)); if (link->node_args != NULL) @@ -391,7 +391,7 @@ int HYPRE_LSI_MLMaxwellDestroy(HYPRE_Solver solver) if (link->edge_args != NULL) ML_Smoother_Arglist_Delete(&(link->edge_args)); - free(link); + hypre_TFree(link, HYPRE_MEMORY_HOST); return 0; #else @@ -414,35 +414,35 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, int hiptmair_type=HALF_HIPTMAIR, Nits_per_presmooth=1; int Ncoarse_edge, Ncoarse_node; double edge_coarsening_rate, node_coarsening_rate; - double node_omega = ML_DDEFAULT, edge_omega = ML_DDEFAULT; + double node_omega = ML_DDEFAULT, edge_omega = ML_DDEFAULT; ML *ml_ee, *ml_nn; ML_Operator *Gmat, *GTmat; MLMaxwell_Link *link; HYPRE_ML_Matrix *mh_Aee, *mh_G, *mh_Ann; MLMaxwell_Context *Aee_context, *G_context, *Ann_context; - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* set up the parallel environment */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ link = (MLMaxwell_Link *) solver; MPI_Comm_rank(link->comm, &mypid); MPI_Comm_size(link->comm, &nprocs); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* create ML structures */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ nlevels = link->nlevels; ML_Create(&(link->ml_ee), nlevels); ML_Create(&(link->ml_nn), nlevels); ml_ee = link->ml_ee; ml_nn = link->ml_nn; - - /* -------------------------------------------------------- */ + + /* -------------------------------------------------------- */ /* fetch the matrix row partition information and put it */ /* into the matrix data object (for matvec and getrow) */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ Aee_context = hypre_TAlloc(MLMaxwell_Context, 1, HYPRE_MEMORY_HOST); link->Aee_contxt = Aee_context; @@ -455,7 +455,7 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, hypre_TFree(row_partition, HYPRE_MEMORY_HOST); mh_Aee = hypre_TAlloc(HYPRE_ML_Matrix, 1, HYPRE_MEMORY_HOST); HYPRE_LSI_MLConstructMLMatrix(A_ee,mh_Aee,Aee_context->partition, - link->comm,Aee_context); + link->comm,Aee_context); Aee_context->Amat = mh_Aee; Ann_context = hypre_TAlloc(MLMaxwell_Context, 1, HYPRE_MEMORY_HOST); @@ -469,7 +469,7 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, hypre_TFree(row_partition, HYPRE_MEMORY_HOST); mh_Ann = hypre_TAlloc(HYPRE_ML_Matrix, 1, HYPRE_MEMORY_HOST); HYPRE_LSI_MLConstructMLMatrix(link->hypreAnn,mh_Ann,Ann_context->partition, - link->comm,Ann_context); + link->comm,Ann_context); Ann_context->Amat = mh_Ann; G_context = hypre_TAlloc(MLMaxwell_Context, 1, HYPRE_MEMORY_HOST); @@ -482,12 +482,12 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, hypre_TFree(row_partition, HYPRE_MEMORY_HOST); mh_G = hypre_TAlloc(HYPRE_ML_Matrix, 1, HYPRE_MEMORY_HOST); HYPRE_LSI_MLConstructMLMatrix(link->hypreG,mh_G,G_context->partition, - link->comm,G_context); + link->comm,G_context); G_context->Amat = mh_G; - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* Build A_ee directly as an ML matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Init_Amatrix(ml_ee,nlevels-1,edgeNEqns,edgeNEqns,(void *)Aee_context); length = edgeNEqns; @@ -495,9 +495,9 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, ML_Set_Amatrix_Getrow(ml_ee, nlevels-1, ML_GetRow, ML_ExchBdry, length); ML_Operator_Set_ApplyFunc(&(ml_ee->Amat[nlevels-1]), ML_MatVec); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* Build A_nn directly as an ML matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Init_Amatrix(ml_nn, nlevels-1,nodeNEqns,nodeNEqns,(void *)Ann_context); length = nodeNEqns; @@ -505,9 +505,9 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, ML_Set_Amatrix_Getrow(ml_nn, nlevels-1, ML_GetRow, ML_ExchBdry, length); ML_Operator_Set_ApplyFunc(&(ml_nn->Amat[nlevels-1]), ML_MatVec); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* Build G matrix and its transpose */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ Gmat = ML_Operator_Create(ml_ee->comm); ML_Operator_Set_Getrow(Gmat, edgeNEqns, ML_GetRow); @@ -522,9 +522,9 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, ML_Operator_Transpose_byrow(Gmat, GTmat); link->GTmat = GTmat; - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* create an AMG or aggregate context */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Set_PrintLevel(2); ML_Set_Tolerance(ml_ee, 1.0e-8); @@ -539,10 +539,10 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, GTmat, &(link->Gmat_array), &(link->GTmat_array), link->smoothP_flag, 1.5, 0, ML_DDEFAULT); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* Set the Hiptmair subsmoothers */ - /* -------------------------------------------------------- */ - + /* -------------------------------------------------------- */ + if (link->node_smoother == (void *) ML_Gen_Smoother_SymGaussSeidel) { link->node_args = ML_Smoother_Arglist_Create(2); @@ -570,9 +570,9 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, ML_gsum_scalar_int(&Nfine_edge, &itmp, ml_ee->comm); } - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* perform aggregation */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ if (mypid == 0) printf("HYPRE_MLMaxwell : number of levels = %d\n", coarsest_level); @@ -583,7 +583,7 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, /* set up at all levels */ /* -------------------------------------------------------- */ - for (level = nlevels-1; level >= coarsest_level; level--) + for (level = nlevels-1; level >= coarsest_level; level--) { if (link->edge_smoother == (void *) ML_Gen_Smoother_MLS) { @@ -615,16 +615,16 @@ int HYPRE_LSI_MLMaxwellSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_ee, } ML_Gen_Smoother_Hiptmair(ml_ee, level, ML_BOTH, Nits_per_presmooth, link->Gmat_array, link->GTmat_array, NULL, - link->edge_smoother, link->edge_args, + link->edge_smoother, link->edge_args, link->node_smoother, link->node_args, hiptmair_type); } - - /* -------------------------------------------------------- */ + + /* -------------------------------------------------------- */ /* set up smoother and coarse solver */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ML_Gen_Solver(ml_ee, ML_MGV, nlevels-1, coarsest_level); - + return 0; #else printf("ML not linked.\n"); @@ -646,7 +646,7 @@ int HYPRE_LSI_MLMaxwellSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, rhs = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) b)); sol = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *) x)); - + ML_Solve_AMGV(ml_ee, rhs, sol); return 0; @@ -664,16 +664,16 @@ int HYPRE_LSI_MLMaxwellSetStrengthThreshold(HYPRE_Solver solver, double strength_threshold) { MLMaxwell_Link *link = (MLMaxwell_Link *) solver; - + if (strength_threshold < 0.0) { printf("HYPRE_LSI_MLMaxwellSetStrengthThreshold WARNING: set to 0.\n"); link->ag_threshold = 0.0; - } + } else { link->ag_threshold = strength_threshold; - } + } return( 0 ); } @@ -705,7 +705,7 @@ int HYPRE_LSI_MLMaxwellSetANNMatrix(HYPRE_Solver solver, HYPRE_ParCSRMatrix ANN) int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, HYPRE_ML_Matrix *ml_mat, int *partition, - MPI_Comm comm, MLMaxwell_Context *obj) + MPI_Comm comm, MLMaxwell_Context *obj) { int i, j, index, mypid, nprocs; int rowLeng, *colInd, startRow, endRow, localEqns; @@ -722,7 +722,7 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, /* -------------------------------------------------------- */ /* get machine information and local matrix information */ /* -------------------------------------------------------- */ - + #ifdef HYPRE_SEQUENTIAL mypid = 0; nprocs = 1; @@ -798,24 +798,24 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, /* -------------------------------------------------------- */ /* allocate the CSR matrix */ - /* -------------------------------------------------------- */ - - nnz = 0; - for (i = 0; i < localEqns; i++) nnz += diagSize[i] + offdiagSize[i]; - rowptr = hypre_TAlloc(int, (localEqns + 1) , HYPRE_MEMORY_HOST); - columns = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST); - values = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); - rowptr[0] = 0; + /* -------------------------------------------------------- */ + + nnz = 0; + for (i = 0; i < localEqns; i++) nnz += diagSize[i] + offdiagSize[i]; + rowptr = hypre_TAlloc(int, (localEqns + 1) , HYPRE_MEMORY_HOST); + columns = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST); + values = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); + rowptr[0] = 0; for (i = 1; i <= localEqns; i++) rowptr[i] = rowptr[i-1] + diagSize[i-1] + offdiagSize[i-1]; - free(diagSize); - free(offdiagSize); + hypre_TFree(diagSize, HYPRE_MEMORY_HOST); + hypre_TFree(offdiagSize, HYPRE_MEMORY_HOST); - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ /* put the matrix data in the CSR matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ - rowptr[0] = 0; + rowptr[0] = 0; ncnt = 0; for (i = startRow; i <= endRow; i++) { @@ -842,11 +842,11 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, rowptr[i-startRow+1] = ncnt; HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal); } - assert(ncnt == nnz); - - /* -------------------------------------------------------- */ + hypre_assert(ncnt == nnz); + + /* -------------------------------------------------------- */ /* initialize the MH_Matrix data structure */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ ml_mat->Nrows = localEqns; ml_mat->rowptr = rowptr; @@ -860,18 +860,18 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, ml_mat->recvProc = NULL; ml_mat->sendList = NULL; ml_mat->map = externList; - - /* -------------------------------------------------------- */ + + /* -------------------------------------------------------- */ /* form the remote portion of the matrix */ - /* -------------------------------------------------------- */ + /* -------------------------------------------------------- */ #ifndef HYPRE_SEQUENTIAL - if (nprocs > 1) + if (nprocs > 1) { - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* count number of elements to be received from each */ /* remote processor (assume sequential mapping) */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ tempCnt = hypre_TAlloc(int, nprocs, HYPRE_MEMORY_HOST); for (i = 0; i < nprocs; i++) tempCnt[i] = 0; @@ -879,7 +879,7 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, { for ( j = 0; j < nprocs; j++) { - if (externList[i] >= partition[j] && + if (externList[i] >= partition[j] && externList[i] < partition[j+1]) { tempCnt[j]++; @@ -888,9 +888,9 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* compile a list processors data is to be received from */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ recvProcCnt = 0; for (i = 0; i < nprocs; i++) @@ -900,64 +900,64 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, recvProcCnt = 0; for (i = 0; i < nprocs; i++) { - if (tempCnt[i] > 0) + if (tempCnt[i] > 0) { recvProc[recvProcCnt] = i; recvLeng[recvProcCnt++] = tempCnt[i]; } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor has to find out how many processors it */ /* has to send data to */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ sendLeng = hypre_TAlloc(int, nprocs , HYPRE_MEMORY_HOST); for (i = 0; i < nprocs; i++) tempCnt[i] = 0; for (i = 0; i < recvProcCnt; i++) tempCnt[recvProc[i]] = 1; MPI_Allreduce(tempCnt, sendLeng, nprocs, MPI_INT, MPI_SUM, comm); sendProcCnt = sendLeng[mypid]; - free(sendLeng); + hypre_TFree(sendLeng, HYPRE_MEMORY_HOST); if (sendProcCnt > 0) { sendLeng = hypre_TAlloc(int, sendProcCnt , HYPRE_MEMORY_HOST); sendProc = hypre_TAlloc(int, sendProcCnt , HYPRE_MEMORY_HOST); sendList = hypre_TAlloc(int*, sendProcCnt , HYPRE_MEMORY_HOST); } - else + else { sendLeng = sendProc = NULL; sendList = NULL; } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the lengths of data expected */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ msgid = 539; for (i = 0; i < recvProcCnt; i++) { MPI_Send((void*) &recvLeng[i],1,MPI_INT,recvProc[i],msgid,comm); } - for (i = 0; i < sendProcCnt; i++) + for (i = 0; i < sendProcCnt; i++) { MPI_Recv((void*) &sendLeng[i],1,MPI_INT,MPI_ANY_SOURCE,msgid, comm,&status); sendProc[i] = status.MPI_SOURCE; sendList[i] = hypre_TAlloc(int, sendLeng[i] , HYPRE_MEMORY_HOST); - if (sendList[i] == NULL) + if (sendList[i] == NULL) printf("allocate problem %d \n", sendLeng[i]); } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the equation numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ - for (i = 0; i < nprocs; i++) tempCnt[i] = 0; + for (i = 0; i < nprocs; i++) tempCnt[i] = 0; ncnt = 1; - for (i = 0; i < externLeng; i++) + for (i = 0; i < externLeng; i++) { if ( externList[i] >= partition[ncnt] ) { @@ -965,23 +965,23 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, i--; ncnt++; } - } - for (i = ncnt-1; i < nprocs; i++) tempCnt[i] = externLeng; + } + for (i = ncnt-1; i < nprocs; i++) tempCnt[i] = externLeng; - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* send the global equation numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ if (sendProcCnt > 0) requests = hypre_TAlloc(MPI_Request, sendProcCnt, HYPRE_MEMORY_HOST); msgid = 540; - for (i = 0; i < sendProcCnt; i++) + for (i = 0; i < sendProcCnt; i++) { MPI_Irecv((void*)sendList[i],sendLeng[i],MPI_INT,sendProc[i], msgid,comm,&requests[i]); } - for (i = 0; i < recvProcCnt; i++) + for (i = 0; i < recvProcCnt; i++) { if (recvProc[i] == 0) j = 0; else j = tempCnt[recvProc[i]-1]; @@ -989,18 +989,19 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, MPI_Send((void*) &externList[j], rowLeng, MPI_INT, recvProc[i], msgid, comm); } - for (i = 0; i < sendProcCnt; i++) + for (i = 0; i < sendProcCnt; i++) { MPI_Wait( &requests[i], &status ); } - if (sendProcCnt > 0) free(requests); + if (sendProcCnt > 0) + hypre_TFree(requests, HYPRE_MEMORY_HOST); - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ for (i = 0; i < sendProcCnt; i++) - { + { for (j = 0; j < sendLeng[i]; j++) { index = sendList[i][j] - startRow; @@ -1013,9 +1014,9 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, } } - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ ml_mat->sendProcCnt = sendProcCnt; ml_mat->recvProcCnt = recvProcCnt; @@ -1025,11 +1026,11 @@ int HYPRE_LSI_MLConstructMLMatrix(HYPRE_ParCSRMatrix A, ml_mat->recvProc = recvProc; ml_mat->sendList = sendList; - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ /* clean up */ - /* ----------------------------------------------------- */ + /* ----------------------------------------------------- */ - free(tempCnt); + hypre_TFree(tempCnt, HYPRE_MEMORY_HOST); } return 0; #else diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_poly.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_poly.c index 59f631bbc..167e72c68 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_poly.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_poly.c @@ -37,13 +37,13 @@ HYPRE_LSI_Poly; #define habs(x) ((x > 0) ? (x) : -(x)) /*-------------------------------------------------------------------------- - * HYPRE_LSI_PolyCreate - Return a polynomial preconditioner object "solver". + * HYPRE_LSI_PolyCreate - Return a polynomial preconditioner object "solver". *--------------------------------------------------------------------------*/ int HYPRE_LSI_PolyCreate( MPI_Comm comm, HYPRE_Solver *solver ) { HYPRE_LSI_Poly *poly_ptr; - + poly_ptr = hypre_TAlloc(HYPRE_LSI_Poly, 1, HYPRE_MEMORY_HOST); if (poly_ptr == NULL) return 1; @@ -68,8 +68,8 @@ int HYPRE_LSI_PolyDestroy( HYPRE_Solver solver ) HYPRE_LSI_Poly *poly_ptr; poly_ptr = (HYPRE_LSI_Poly *) solver; - if ( poly_ptr->coefficients != NULL ) free(poly_ptr->coefficients); - free(poly_ptr); + hypre_TFree(poly_ptr->coefficients, HYPRE_MEMORY_HOST); + hypre_TFree(poly_ptr, HYPRE_MEMORY_HOST); return 0; } @@ -85,14 +85,14 @@ int HYPRE_LSI_PolySetOrder(HYPRE_Solver solver, int order ) poly_ptr->order = order; if ( poly_ptr->order < 0 ) poly_ptr->order = 0; if ( poly_ptr->order > 8 ) poly_ptr->order = 8; - if ( poly_ptr->coefficients != NULL ) free( poly_ptr->coefficients ); + hypre_TFree(poly_ptr->coefficients, HYPRE_MEMORY_HOST); poly_ptr->coefficients = NULL; return 0; } /*-------------------------------------------------------------------------- - * HYPRE_LSI_PolySetOutputLevel - Set debug level + * HYPRE_LSI_PolySetOutputLevel - Set debug level *--------------------------------------------------------------------------*/ int HYPRE_LSI_PolySetOutputLevel(HYPRE_Solver solver, int level) @@ -127,20 +127,20 @@ int HYPRE_LSI_PolySolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, exit(1); } orig_rhs = hypre_TAlloc(double, Nrows , HYPRE_MEMORY_HOST); - for ( i = 0; i < Nrows; i++ ) + for ( i = 0; i < Nrows; i++ ) { - orig_rhs[i] = rhs[i]; - soln[i] = rhs[i] * coefs[order]; + orig_rhs[i] = rhs[i]; + soln[i] = rhs[i] * coefs[order]; } - for (i = order - 1; i >= 0; i-- ) + for (i = order - 1; i >= 0; i-- ) { HYPRE_ParCSRMatrixMatvec(1.0, A, x, 0.0, b); mult = coefs[i]; for ( j = 0; j < Nrows; j++ ) soln[j] = mult * orig_rhs[j] + rhs[j]; } - for ( i = 0; i < Nrows; i++ ) rhs[i] = orig_rhs[i]; - free( orig_rhs ); + for ( i = 0; i < Nrows; i++ ) rhs[i] = orig_rhs[i]; + hypre_TFree(orig_rhs, HYPRE_MEMORY_HOST); return 0; } @@ -183,7 +183,7 @@ int HYPRE_LSI_PolySetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, startRow = row_partition[my_id]; endRow = row_partition[my_id+1] - 1; - hypre_TFree( row_partition , HYPRE_MEMORY_HOST); + hypre_TFree( row_partition , HYPRE_MEMORY_HOST); poly_ptr->Nrows = endRow - startRow + 1; max_norm = 0.0; @@ -202,7 +202,7 @@ int HYPRE_LSI_PolySetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, HYPRE_ParCSRMatrixRestoreRow(A_csr, i, &rowLeng, &colInd, &colVal); } #ifndef HYPRE_SEQUENTIAL - MPI_Allreduce(&max_norm, &dtemp, 1, MPI_DOUBLE, MPI_MAX, poly_ptr->comm); + MPI_Allreduce(&max_norm, &dtemp, 1, MPI_DOUBLE, MPI_MAX, poly_ptr->comm); #endif if ( pos_diag == 0 && neg_diag > 0 ) max_norm = - max_norm; @@ -210,13 +210,13 @@ int HYPRE_LSI_PolySetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, /* fill in the coefficient table */ /* ---------------------------------------------------------------- */ - switch ( order ) + switch ( order ) { case 0: coefs[0] = 1.0; break; case 1: coefs[0] = 5.0; coefs[1] = -1.0; break; - case 2: coefs[0] = 14.0; coefs[1] = -7.0; coefs[2] = 1.0; + case 2: coefs[0] = 14.0; coefs[1] = -7.0; coefs[2] = 1.0; break; - case 3: coefs[0] = 30.0; coefs[1] = -27.0; coefs[2] = 9.0; + case 3: coefs[0] = 30.0; coefs[1] = -27.0; coefs[2] = 9.0; coefs[3] = -1.0; break; case 4: coefs[0] = 55.0; coefs[1] = -77.0; coefs[2] = 44.0; coefs[3] = -11.0; coefs[4] = 1.0; break; @@ -226,7 +226,7 @@ int HYPRE_LSI_PolySetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, case 6: coefs[0] = 140.0; coefs[1] = -378.0; coefs[2] = 450.0; coefs[3] = -275.0; coefs[4] = 90.0; coefs[5] = -15.0; coefs[6] = 1.0; break; - case 7: coefs[0] = 204.0; coefs[1] = -714.0; coefs[2] = 1122.0; + case 7: coefs[0] = 204.0; coefs[1] = -714.0; coefs[2] = 1122.0; coefs[3] = -935.0; coefs[4] = 442.0; coefs[5] = -119.0; coefs[6] = 17.0; coefs[7] = -1.0; break; case 8: coefs[0] = 285.0; coefs[1] = -1254.0; coefs[2] = 2508.0; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.cxx b/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.cxx index eeaac4aec..ed562bb62 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.cxx @@ -8,7 +8,6 @@ #include #include #include -#include #include "HYPRE_FEI_includes.h" @@ -103,7 +102,7 @@ int HYPRE_LSI_Schur::computeBlockInfo() endRow = APartition_[mypid+1] - 1; localNrows = endRow - startRow + 1; globalNrows = APartition_[nprocs]; - + //------------------------------------------------------------------ // find the local size of the (2,2) block //------------------------------------------------------------------ @@ -113,10 +112,10 @@ int HYPRE_LSI_Schur::computeBlockInfo() for ( irow = startRow; irow <= endRow; irow++ ) { nodeNum = lookup_->getAssociatedNodeNumber(irow); - if ( nodeNum != lastNodeNum ) + if ( nodeNum != lastNodeNum ) { - if (count == 1) break; - lastNodeNum = nodeNum; + if (count == 1) break; + lastNodeNum = nodeNum; count = 1; } else count++; @@ -128,11 +127,11 @@ int HYPRE_LSI_Schur::computeBlockInfo() printf("%4d HYPRE_LSI_Schur : P22_size = %d\n", mypid, P22Size_); //------------------------------------------------------------------ - // allocate array for storing indices of (2,2) block variables + // allocate array for storing indices of (2,2) block variables //------------------------------------------------------------------ if ( P22Size_ > 0 ) P22LocalInds_ = new int[P22Size_]; - else P22LocalInds_ = NULL; + else P22LocalInds_ = NULL; //------------------------------------------------------------------ // compose a local list of rows for the (2,2) block @@ -143,16 +142,16 @@ int HYPRE_LSI_Schur::computeBlockInfo() for ( irow = startRow; irow <= endRow; irow++ ) { nodeNum = lookup_->getAssociatedNodeNumber(irow); - if ( nodeNum != lastNodeNum ) + if ( nodeNum != lastNodeNum ) { - if (count == 1) break; - lastNodeNum = nodeNum; + if (count == 1) break; + lastNodeNum = nodeNum; count = 1; } else count++; } index = irow - 1; - for ( irow = index; irow <= endRow; irow++ ) + for ( irow = index; irow <= endRow; irow++ ) P22LocalInds_[P22Size_++] = irow; //------------------------------------------------------------------ @@ -179,7 +178,7 @@ int HYPRE_LSI_Schur::computeBlockInfo() P22Offsets_ = new int[nprocs]; MPI_Allgather(&P22Size_, 1, MPI_INT, P22Offsets_, 1, MPI_INT, mpiComm_); dispArray[0] = 0; - for ( j = 1; j < nprocs; j++ ) + for ( j = 1; j < nprocs; j++ ) dispArray[j] = dispArray[j-1] + P22Offsets_[j-1]; MPI_Allgatherv(P22LocalInds_, P22Size_, MPI_INT, P22GlobalInds_, P22Offsets_, dispArray, MPI_INT, mpiComm_); @@ -193,7 +192,7 @@ int HYPRE_LSI_Schur::computeBlockInfo() j, P22LocalInds_[j]); } return 0; -} +} //****************************************************************************** // Given a matrix A, build the 2 x 2 blocks @@ -256,19 +255,19 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) if ( outputLevel_ >= 1 ) { printf("%4d HYPRE_LSI_Schur(1,1) : StartRow = %d\n",mypid,A11StartRow); - printf("%4d HYPRE_LSI_Schur(1,1) : GlobalDim = %d %d\n",mypid,A11GNRows, + printf("%4d HYPRE_LSI_Schur(1,1) : GlobalDim = %d %d\n",mypid,A11GNRows, A11GNCols); - printf("%4d HYPRE_LSI_Schur(1,1) : LocalDim = %d %d\n",mypid,A11NRows, + printf("%4d HYPRE_LSI_Schur(1,1) : LocalDim = %d %d\n",mypid,A11NRows, A11NCols); printf("%4d HYPRE_LSI_Schur(1,2) : StartRow = %d\n",mypid,A12StartRow); - printf("%4d HYPRE_LSI_Schur(1,2) : GlobalDim = %d %d\n",mypid,A12GNRows, + printf("%4d HYPRE_LSI_Schur(1,2) : GlobalDim = %d %d\n",mypid,A12GNRows, A12GNCols); - printf("%4d HYPRE_LSI_Schur(1,2) : LocalDim = %d %d\n",mypid,A12NRows, + printf("%4d HYPRE_LSI_Schur(1,2) : LocalDim = %d %d\n",mypid,A12NRows, A12NCols); printf("%4d HYPRE_LSI_Schur(2,2) : StartRow = %d\n",mypid,A22StartRow); - printf("%4d HYPRE_LSI_Schur(2,2) : GlobalDim = %d %d\n",mypid,A22GNRows, + printf("%4d HYPRE_LSI_Schur(2,2) : GlobalDim = %d %d\n",mypid,A22GNRows, A22GNCols); - printf("%4d HYPRE_LSI_Schur(2,2) : LocalDim = %d %d\n",mypid,A22NRows, + printf("%4d HYPRE_LSI_Schur(2,2) : LocalDim = %d %d\n",mypid,A22NRows, A22NCols); } @@ -287,14 +286,14 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) A22RowCnt = 0; HYPRE_IJMatrixGetObject(Amat, (void**) &Amat_csr); - for ( irow = AStartRow; irow < AStartRow+ANRows; irow++ ) + for ( irow = AStartRow; irow < AStartRow+ANRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(Amat_csr, irow, &rowSize, &inds, &vals); searchInd = hypre_BinarySearch(P22LocalInds_, irow, P22Size_); if ( searchInd < 0 ) // A(1,1) or A(1,2) block { A11NewSize = A12NewSize = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { index = inds[j]; searchInd = hypre_BinarySearch(P22GlobalInds_,index,P22GSize_); @@ -304,9 +303,9 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) if ( A12NewSize == 0 ) A12NewSize = 1; A11RowLengs[A11RowCnt++] = A11NewSize; A12RowLengs[A12RowCnt++] = A12NewSize; - A11MaxRowLeng = (A11NewSize > A11MaxRowLeng) ? + A11MaxRowLeng = (A11NewSize > A11MaxRowLeng) ? A11NewSize : A11MaxRowLeng; - A12MaxRowLeng = (A12NewSize > A12MaxRowLeng) ? + A12MaxRowLeng = (A12NewSize > A12MaxRowLeng) ? A12NewSize : A12MaxRowLeng; if ( A11NewSize != 1 ) printf("%4d HYPRE_LSI_Schur WARNING - A11 row length > 1 : %d\n", @@ -315,14 +314,14 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) else // A(2,2) block { A22NewSize = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { index = inds[j]; searchInd = hypre_BinarySearch(P22GlobalInds_,index,P22GSize_); if (searchInd >= 0) A22NewSize++; } A22RowLengs[A22RowCnt++] = A22NewSize; - A22MaxRowLeng = (A22NewSize > A22MaxRowLeng) ? + A22MaxRowLeng = (A22NewSize > A22MaxRowLeng) ? A22NewSize : A22MaxRowLeng; } HYPRE_ParCSRMatrixRestoreRow(Amat_csr, irow, &rowSize, &inds, &vals); @@ -337,14 +336,14 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) ierr += HYPRE_IJMatrixSetObjectType(A11mat_, HYPRE_PARCSR); ierr = HYPRE_IJMatrixSetRowSizes(A11mat_, A11RowLengs); ierr += HYPRE_IJMatrixInitialize(A11mat_); - assert(!ierr); + hypre_assert(!ierr); delete [] A11RowLengs; ierr = HYPRE_IJMatrixCreate(mpiComm_, A12StartRow, A12StartRow+A12NRows-1, A12StartCol, A12StartCol+A12NCols-1, &A12mat_); ierr += HYPRE_IJMatrixSetObjectType(A12mat_, HYPRE_PARCSR); ierr = HYPRE_IJMatrixSetRowSizes(A12mat_, A12RowLengs); ierr += HYPRE_IJMatrixInitialize(A12mat_); - assert(!ierr); + hypre_assert(!ierr); delete [] A12RowLengs; if ( A22MaxRowLeng > 0 ) { @@ -353,7 +352,7 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) ierr += HYPRE_IJMatrixSetObjectType(A22mat_, HYPRE_PARCSR); ierr = HYPRE_IJMatrixSetRowSizes(A22mat_, A22RowLengs); ierr += HYPRE_IJMatrixInitialize(A22mat_); - assert(!ierr); + hypre_assert(!ierr); } else A22mat_ = NULL; delete [] A22RowLengs; @@ -373,18 +372,18 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) A12RowCnt = A12StartRow; A22RowCnt = A22StartRow; - for ( irow = AStartRow; irow < AStartRow+ANRows; irow++ ) + for ( irow = AStartRow; irow < AStartRow+ANRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(Amat_csr, irow, &rowSize, &inds, &vals); searchInd = hypre_BinarySearch(P22LocalInds_, irow, P22Size_); if ( searchInd < 0 ) // A(1,1) or A(1,2) block { A11NewSize = A12NewSize = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { index = inds[j]; searchInd = HYPRE_LSI_Search(P22GlobalInds_,index,P22GSize_); - if (searchInd >= 0) // A(1,2) block + if (searchInd >= 0) // A(1,2) block { A12_inds[A12NewSize] = searchInd; A12_vals[A12NewSize++] = vals[j]; @@ -414,10 +413,10 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) exit(1); } } - HYPRE_IJMatrixSetValues(A11mat_, 1, &A11NewSize, + HYPRE_IJMatrixSetValues(A11mat_, 1, &A11NewSize, (const int *) &A11RowCnt, (const int *) A11_inds, (const double *) A11_vals); - HYPRE_IJMatrixSetValues(A12mat_, 1, &A12NewSize, + HYPRE_IJMatrixSetValues(A12mat_, 1, &A12NewSize, (const int *) &A12RowCnt, (const int *) A12_inds, (const double *) A12_vals); A11RowCnt++; @@ -426,11 +425,11 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) else if ( A22MaxRowLeng > 0 ) // A(2,2) block { A22NewSize = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { index = inds[j]; searchInd = hypre_BinarySearch(P22GlobalInds_,index,P22GSize_); - if (searchInd >= 0) + if (searchInd >= 0) { A22_inds[A22NewSize] = searchInd; A22_vals[A22NewSize++] = vals[j]; @@ -442,7 +441,7 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) A22_vals[0] = 0.0; A22NewSize = 1; } - HYPRE_IJMatrixSetValues(A22mat_, 1, &A22NewSize, + HYPRE_IJMatrixSetValues(A22mat_, 1, &A22NewSize, (const int *) &A22RowCnt, (const int *) A22_inds, (const double *) A22_vals); A22RowCnt++; @@ -457,22 +456,22 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) delete [] A22_vals; //------------------------------------------------------------------ - // finally assemble the matrix + // finally assemble the matrix //------------------------------------------------------------------ ierr = HYPRE_IJMatrixAssemble(A11mat_); ierr += HYPRE_IJMatrixGetObject(A11mat_, (void **) &A11mat_csr); - assert( !ierr ); + hypre_assert( !ierr ); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) A11mat_csr); ierr = HYPRE_IJMatrixAssemble(A12mat_); ierr += HYPRE_IJMatrixGetObject(A12mat_, (void **) &A12mat_csr); - assert( !ierr ); + hypre_assert( !ierr ); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) A12mat_csr); if ( A22mat_ != NULL ) { ierr = HYPRE_IJMatrixAssemble(A22mat_); ierr += HYPRE_IJMatrixGetObject(A22mat_, (void **) &A22mat_csr); - assert( !ierr ); + hypre_assert( !ierr ); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) A22mat_csr); } else A22mat_csr = NULL; @@ -481,7 +480,7 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) { sprintf( fname, "A11.%d", mypid); fp = fopen( fname, "w" ); - for ( irow = A11StartRow; irow < A11StartRow+A11NRows; irow++ ) + for ( irow = A11StartRow; irow < A11StartRow+A11NRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(A11mat_csr,irow,&rowSize,&inds,&vals); for ( j = 0; j < rowSize; j++ ) @@ -491,7 +490,7 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) fclose(fp); sprintf( fname, "A12.%d", mypid); fp = fopen( fname, "w" ); - for ( irow = A12StartRow; irow < A12StartRow+A12NRows; irow++ ) + for ( irow = A12StartRow; irow < A12StartRow+A12NRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(A12mat_csr,irow,&rowSize,&inds,&vals); for ( j = 0; j < rowSize; j++ ) @@ -503,7 +502,7 @@ int HYPRE_LSI_Schur::buildBlocks(HYPRE_IJMatrix Amat) { sprintf( fname, "A22.%d", mypid); fp = fopen( fname, "w" ); - for ( irow = A22StartRow; irow < A22StartRow+A22NRows; irow++ ) + for ( irow = A22StartRow; irow < A22StartRow+A22NRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(A22mat_csr,irow,&rowSize,&inds,&vals); for ( j = 0; j < rowSize; j++ ) @@ -568,8 +567,8 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, //------------------------------------------------------------------ // create Pressure Poisson matrix (T = C^T M^{-1} C) //------------------------------------------------------------------ - - if (outputLevel_ >= 1) + + if (outputLevel_ >= 1) printf("%4d : HYPRE_LSI_Schur setup : C^T M^{-1} C begins\n", mypid); HYPRE_IJMatrixGetObject(A11mat_, (void **) &Mmat_csr); @@ -580,7 +579,7 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, (hypre_ParCSRMatrix *) Cmat_csr, (hypre_ParCSRMatrix **) &RAP_csr); - if (outputLevel_ >= 1) + if (outputLevel_ >= 1) printf("%4d : HYPRE_LSI_Schur setup : C^T M^{-1} C ends\n", mypid); //------------------------------------------------------------------ @@ -592,13 +591,13 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, ierr = HYPRE_IJMatrixCreate(mpiComm_, SStartRow, SStartRow+SNRows-1, SStartRow, SStartRow+SNRows-1, &Smat); ierr += HYPRE_IJMatrixSetObjectType(Smat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( A22mat_ != NULL ) HYPRE_IJMatrixGetObject(A22mat_, (void **) &A22mat_csr); SRowLengs = new int[SNRows]; maxRowSize = 0; - for ( irow = SStartRow; irow < SStartRow+SNRows; irow++ ) + for ( irow = SStartRow; irow < SStartRow+SNRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(RAP_csr,irow,&rowSize,&colInd,NULL); newRowSize = rowSize; @@ -630,10 +629,10 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, } ierr = HYPRE_IJMatrixSetRowSizes(Smat, SRowLengs); ierr += HYPRE_IJMatrixInitialize(Smat); - assert(!ierr); + hypre_assert(!ierr); delete [] SRowLengs; - for ( irow = SStartRow; irow < SStartRow+SNRows; irow++ ) + for ( irow = SStartRow; irow < SStartRow+SNRows; irow++ ) { HYPRE_ParCSRMatrixGetRow(RAP_csr,irow,&rowSize,&colInd,&colVal); if ( A22mat_csr == NULL ) @@ -641,7 +640,7 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, newRowSize = rowSize; newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { newColInd[j] = colInd[j]; newColVal[j] = - colVal[j]; @@ -653,12 +652,12 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, newRowSize = rowSize + rowSize2; newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { newColInd[j] = colInd[j]; newColVal[j] = - colVal[j]; } - for (j = 0; j < rowSize2; j++) + for (j = 0; j < rowSize2; j++) { newColInd[j+rowSize] = colInd2[j]; newColVal[j+rowSize] = colVal2[j]; @@ -700,7 +699,7 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, Svec_ = (*rrhs); //------------------------------------------------------------------ - // construct the new solution and residual vectors + // construct the new solution and residual vectors //------------------------------------------------------------------ V2Leng = P22Size_; @@ -709,12 +708,12 @@ int HYPRE_LSI_Schur::setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, HYPRE_IJVectorSetObjectType(X2vec, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(X2vec); ierr += HYPRE_IJVectorAssemble(X2vec); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(mpiComm_, V2Start, V2Start+V2Leng-1, &R2vec); HYPRE_IJVectorSetObjectType(R2vec, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(R2vec); ierr += HYPRE_IJVectorAssemble(R2vec); - assert(!ierr); + hypre_assert(!ierr); (*rsol) = X2vec; (*rres) = R2vec; return 0; @@ -731,8 +730,8 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) int rowSize, *colInd; double *colVal, ddata; HYPRE_ParVector F1_csr, F2_csr; - HYPRE_IJVector F2vec, R2vec, X2vec; - HYPRE_ParCSRMatrix A11_csr, C_csr; + HYPRE_IJVector F2vec, R2vec, X2vec; + HYPRE_ParCSRMatrix A11_csr, C_csr; //------------------------------------------------------------------ // error checking @@ -741,7 +740,7 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) if ( assembled_ == 0 ) return 1; //------------------------------------------------------------------ - // get machine and matrix information + // get machine and matrix information //------------------------------------------------------------------ MPI_Comm_rank( mpiComm_, &mypid ); @@ -750,7 +749,7 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) AEnd = APartition_[mypid+1] - 1; ANRows = AEnd - AStart + 1; HYPRE_IJMatrixGetObject(A11mat_, (void**) &A11_csr); - + //------------------------------------------------------------------ // construct the reduced right hand side //------------------------------------------------------------------ @@ -761,14 +760,14 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) HYPRE_IJVectorSetObjectType(F1vec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(F1vec_); ierr += HYPRE_IJVectorAssemble(F1vec_); - assert(!ierr); + hypre_assert(!ierr); V2Leng = P22Size_; V2Start = P22Offsets_[mypid]; HYPRE_IJVectorCreate(mpiComm_, V2Start, V2Start+V2Leng-1, &F2vec); HYPRE_IJVectorSetObjectType(F2vec, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(F2vec); ierr += HYPRE_IJVectorAssemble(F2vec); - assert(!ierr); + hypre_assert(!ierr); f1Ind = V1Start; f2Ind = V2Start; @@ -783,14 +782,14 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) ierr = HYPRE_IJVectorSetValues(F1vec_, 1, (const int *) &f1Ind, (const double *) &ddata); HYPRE_ParCSRMatrixRestoreRow(A11_csr,f1Ind,&rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); f1Ind++; } else { ierr = HYPRE_IJVectorSetValues(F2vec, 1, (const int *) &f2Ind, (const double *) &ddata); - assert( !ierr ); + hypre_assert( !ierr ); f2Ind++; } } @@ -804,7 +803,7 @@ int HYPRE_LSI_Schur::computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs) } //****************************************************************************** -// compute the long solution +// compute the long solution //------------------------------------------------------------------------------ int HYPRE_LSI_Schur::computeSol(HYPRE_IJVector X2vec, HYPRE_IJVector Xvec) @@ -846,10 +845,10 @@ int HYPRE_LSI_Schur::computeSol(HYPRE_IJVector X2vec, HYPRE_IJVector Xvec) HYPRE_IJVectorSetObjectType(X1vec, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(X1vec); ierr += HYPRE_IJVectorAssemble(X1vec); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ - // recover X1 + // recover X1 //------------------------------------------------------------------ HYPRE_ParCSRMatrixMatvec( -1.0, C_csr, X2_csr, 1.0, F1_csr ); @@ -862,22 +861,22 @@ int HYPRE_LSI_Schur::computeSol(HYPRE_IJVector X2vec, HYPRE_IJVector Xvec) V1Cnt = AStart - P22Offsets_[mypid]; V2Cnt = P22Offsets_[mypid]; xvals = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector*)Xvec)); - for ( irow = AStart; irow < AEnd; irow++ ) + for ( irow = AStart; irow < AEnd; irow++ ) { searchInd = hypre_BinarySearch( P22LocalInds_, irow, P22Size_); if ( searchInd >= 0 ) { ierr = HYPRE_IJVectorGetValues(X2vec, 1, &V2Cnt, &xvals[irow-AStart]); - assert( !ierr ); + hypre_assert( !ierr ); V2Cnt++; } else { ierr = HYPRE_IJVectorGetValues(X1vec, 1, &V1Cnt, &xvals[irow-AStart]); - assert( !ierr ); + hypre_assert( !ierr ); V1Cnt++; } - } + } //------------------------------------------------------------------ // clean up and return @@ -893,7 +892,7 @@ int HYPRE_LSI_Schur::computeSol(HYPRE_IJVector X2vec, HYPRE_IJVector Xvec) int HYPRE_LSI_Schur::print() { - int mypid, irow, j, nnz, V2Leng, V2Start, rowSize, *colInd; + int mypid, irow, j, nnz, V2Leng, V2Start, rowSize, *colInd; double *colVal, ddata; FILE *fp; char fname[100]; diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.h b/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.h index bfa7b66fb..78a048827 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.h +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_schur.h @@ -6,7 +6,7 @@ ******************************************************************************/ // ************************************************************************* -// This is the HYPRE implementation of Schur reduction +// This is the HYPRE implementation of Schur reduction // ************************************************************************* #ifndef __HYPRE_LSI_SCHURH__ @@ -19,13 +19,12 @@ #include #include #include -#include #include #include "HYPRE.h" #include "IJ_mv/HYPRE_IJ_mv.h" // ************************************************************************* -// local defines +// local defines // ------------------------------------------------------------------------- #include "HYPRE_FEI_includes.h" @@ -38,10 +37,10 @@ class HYPRE_LSI_Schur { HYPRE_IJMatrix A11mat_; // mass matrix (should be diagonal) HYPRE_IJMatrix A12mat_; // gradient (divergence) matrix - HYPRE_IJMatrix A22mat_; // stabilization matrix + HYPRE_IJMatrix A22mat_; // stabilization matrix HYPRE_IJVector F1vec_; // rhs for block(1,1) - HYPRE_IJMatrix Smat_; // Schur complement matrix - HYPRE_IJVector Svec_; // reduced RHS + HYPRE_IJMatrix Smat_; // Schur complement matrix + HYPRE_IJVector Svec_; // reduced RHS int *APartition_; // processor partition of matrix A int P22Size_; // number of pressure variables int P22GSize_; // global number of pressure variables @@ -58,9 +57,9 @@ class HYPRE_LSI_Schur HYPRE_LSI_Schur(); virtual ~HYPRE_LSI_Schur(); int setLookup( Lookup *lookup ); - int setup(HYPRE_IJMatrix Amat, + int setup(HYPRE_IJMatrix Amat, HYPRE_IJVector sol, HYPRE_IJVector rhs, - HYPRE_IJMatrix *redA, HYPRE_IJVector *rsol, + HYPRE_IJMatrix *redA, HYPRE_IJVector *rsol, HYPRE_IJVector *rrhs, HYPRE_IJVector *rres); int computeRHS(HYPRE_IJVector rhs, HYPRE_IJVector *rrhs); int computeSol(HYPRE_IJVector rsol, HYPRE_IJVector sol); diff --git a/src/FEI_mv/fei-hypre/HYPRE_LSI_schwarz.c b/src/FEI_mv/fei-hypre/HYPRE_LSI_schwarz.c index 84fd3ab86..c54b13876 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LSI_schwarz.c +++ b/src/FEI_mv/fei-hypre/HYPRE_LSI_schwarz.c @@ -58,8 +58,8 @@ typedef struct HYPRE_LSI_Schwarz_Struct extern int HYPRE_LSI_MLConstructMHMatrix(HYPRE_ParCSRMatrix,MH_Matrix *, MPI_Comm, int *, MH_Context *); extern int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr, - MH_Matrix *Amat, int total_recv_leng, int *recv_lengths, - int *ext_ja, double *ext_aa, int *map, int *map2, + MH_Matrix *Amat, int total_recv_leng, int *recv_lengths, + int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset); extern int HYPRE_LSI_ILUTDecompose(HYPRE_LSI_Schwarz *sch_ptr); extern void hypre_qsort0(int *, int, int); @@ -76,7 +76,7 @@ extern int HYPRE_LSI_Search(int *, int, int); int HYPRE_LSI_SchwarzCreate( MPI_Comm comm, HYPRE_Solver *solver ) { HYPRE_LSI_Schwarz *sch_ptr; - + sch_ptr = hypre_TAlloc(HYPRE_LSI_Schwarz, 1, HYPRE_MEMORY_HOST); if (sch_ptr == NULL) return 1; @@ -115,61 +115,67 @@ int HYPRE_LSI_SchwarzDestroy( HYPRE_Solver solver ) sch_ptr = (HYPRE_LSI_Schwarz *) solver; if ( sch_ptr->bmat_ia != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->bmat_ia[i]); - free(sch_ptr->bmat_ia); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->bmat_ia[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->bmat_ia, HYPRE_MEMORY_HOST); } if ( sch_ptr->bmat_ja != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->bmat_ja[i]); - free(sch_ptr->bmat_ja); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->bmat_ja[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->bmat_ja, HYPRE_MEMORY_HOST); } if ( sch_ptr->bmat_aa != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->bmat_aa[i]); - free(sch_ptr->bmat_aa); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->bmat_aa[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->bmat_aa, HYPRE_MEMORY_HOST); } if ( sch_ptr->aux_bmat_ia != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->aux_bmat_ia[i]); - free(sch_ptr->aux_bmat_ia); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->aux_bmat_ia[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->aux_bmat_ia, HYPRE_MEMORY_HOST); } if ( sch_ptr->aux_bmat_ja != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->aux_bmat_ja[i]); - free(sch_ptr->aux_bmat_ja); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->aux_bmat_ja[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->aux_bmat_ja, HYPRE_MEMORY_HOST); } if ( sch_ptr->aux_bmat_aa != NULL ) { - for ( i = 0; i < sch_ptr->nblocks; i++ ) free(sch_ptr->aux_bmat_aa[i]); - free(sch_ptr->aux_bmat_aa); + for ( i = 0; i < sch_ptr->nblocks; i++ ) + hypre_TFree(sch_ptr->aux_bmat_aa[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->aux_bmat_aa, HYPRE_MEMORY_HOST); } - if ( sch_ptr->blk_sizes != NULL ) free(sch_ptr->blk_sizes); + if ( sch_ptr->blk_sizes != NULL ) + hypre_TFree(sch_ptr->blk_sizes, HYPRE_MEMORY_HOST); if ( sch_ptr->blk_indices != NULL ) { for ( i = 0; i < sch_ptr->nblocks; i++ ) if ( sch_ptr->blk_indices[i] != NULL ) - free( sch_ptr->blk_indices[i] ); + hypre_TFree(sch_ptr->blk_indices[i], HYPRE_MEMORY_HOST); } - if ( sch_ptr->mh_mat != NULL ) + if ( sch_ptr->mh_mat != NULL ) { - if (sch_ptr->mh_mat->sendProc != NULL) free(sch_ptr->mh_mat->sendProc); - if (sch_ptr->mh_mat->sendLeng != NULL) free(sch_ptr->mh_mat->sendLeng); - if (sch_ptr->mh_mat->recvProc != NULL) free(sch_ptr->mh_mat->recvProc); - if (sch_ptr->mh_mat->recvLeng != NULL) free(sch_ptr->mh_mat->recvLeng); + hypre_TFree(sch_ptr->mh_mat->sendProc, HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->mh_mat->sendLeng, HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->mh_mat->recvProc, HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->mh_mat->recvLeng, HYPRE_MEMORY_HOST); for ( i = 0; i < sch_ptr->mh_mat->sendProcCnt; i++ ) - if (sch_ptr->mh_mat->sendList[i] != NULL) - free(sch_ptr->mh_mat->sendList[i]); - if (sch_ptr->mh_mat->sendList != NULL) free(sch_ptr->mh_mat->sendList); - free( sch_ptr->mh_mat ); - } + hypre_TFree(sch_ptr->mh_mat->sendList[i], HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->mh_mat->sendList, HYPRE_MEMORY_HOST); + hypre_TFree(sch_ptr->mh_mat, HYPRE_MEMORY_HOST); + } sch_ptr->mh_mat = NULL; - free(sch_ptr); + hypre_TFree(sch_ptr, HYPRE_MEMORY_HOST); return 0; } /*-------------------------------------------------------------------------- - * HYPRE_LSI_SchwarzSetOutputLevel - Set debug level + * HYPRE_LSI_SchwarzSetOutputLevel - Set debug level *-------------------------------------------------------------------------*/ int HYPRE_LSI_SchwarzSetOutputLevel(HYPRE_Solver solver, int level) @@ -182,7 +188,7 @@ int HYPRE_LSI_SchwarzSetOutputLevel(HYPRE_Solver solver, int level) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_SchwarzSetBlockSize - Set block size + * HYPRE_LSI_SchwarzSetBlockSize - Set block size *-------------------------------------------------------------------------*/ int HYPRE_LSI_SchwarzSetNBlocks(HYPRE_Solver solver, int nblks) @@ -195,7 +201,7 @@ int HYPRE_LSI_SchwarzSetNBlocks(HYPRE_Solver solver, int nblks) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_SchwarzSetBlockSize - Set block size + * HYPRE_LSI_SchwarzSetBlockSize - Set block size *-------------------------------------------------------------------------*/ int HYPRE_LSI_SchwarzSetBlockSize(HYPRE_Solver solver, int blksize) @@ -208,7 +214,7 @@ int HYPRE_LSI_SchwarzSetBlockSize(HYPRE_Solver solver, int blksize) } /*-------------------------------------------------------------------------- - * HYPRE_LSI_SchwarzSetILUTFillin - Set fillin for block solve + * HYPRE_LSI_SchwarzSetILUTFillin - Set fillin for block solve *-------------------------------------------------------------------------*/ int HYPRE_LSI_SchwarzSetILUTFillin(HYPRE_Solver solver, double fillin) @@ -252,7 +258,7 @@ int HYPRE_LSI_SchwarzSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, nblocks = sch_ptr->nblocks; max_blk_size = 0; for ( i = 0; i < nblocks; i++ ) - if (sch_ptr->blk_sizes[i] > max_blk_size) + if (sch_ptr->blk_sizes[i] > max_blk_size) max_blk_size = sch_ptr->blk_sizes[i]; /* --------------------------------------------------------- @@ -315,8 +321,8 @@ int HYPRE_LSI_SchwarzSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, else ddata -= (aux_mat_aa[j]*xbuffer[index]); } solbuf[i] = ddata; - } - } + } + } for ( i = 0; i < nrows; i++ ) { ddata = 0.0; @@ -430,11 +436,12 @@ int HYPRE_LSI_SchwarzSolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix Amat, /* clean up */ /* --------------------------------------------------------- */ - free(xbuffer); - free( idiag ); - free( solbuf ); - free( dbuffer ); - free( context ); + hypre_TFree(xbuffer, HYPRE_MEMORY_HOST); + hypre_TFree(idiag, HYPRE_MEMORY_HOST); + hypre_TFree(solbuf, HYPRE_MEMORY_HOST); + hypre_TFree(dbuffer, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + return 0; } @@ -476,18 +483,18 @@ int HYPRE_LSI_SchwarzSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, mh_mat = hypre_TAlloc( MH_Matrix, 1, HYPRE_MEMORY_HOST); context->Amat = mh_mat; HYPRE_LSI_MLConstructMHMatrix(A_csr, mh_mat, comm, - context->partition,context); + context->partition,context); sch_ptr->Nrows = mh_mat->Nrows; sch_ptr->mh_mat = mh_mat; /* --------------------------------------------------------- */ /* compose the enlarged overlapped local matrix */ /* --------------------------------------------------------- */ - + if ( overlap_flag ) { - HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng, - &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset,comm); + HYPRE_LSI_DDIlutComposeOverlappedMatrix(mh_mat, &total_recv_leng, + &recv_lengths, &int_buf, &dble_buf, &map, &map2,&offset,comm); } else { @@ -504,8 +511,8 @@ int HYPRE_LSI_SchwarzSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, MPI_Allreduce(parray2,parray,nprocs,MPI_INT,MPI_SUM,MPI_COMM_WORLD); offset = 0; for (i = 0; i < mypid; i++) offset += parray[i]; - free(parray); - free(parray2); + hypre_TFree(parray, HYPRE_MEMORY_HOST); + hypre_TFree(parray2, HYPRE_MEMORY_HOST); } /* --------------------------------------------------------- */ @@ -519,21 +526,18 @@ int HYPRE_LSI_SchwarzSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, /* clean up */ /* --------------------------------------------------------- */ - if ( map != NULL ) free(map); - if ( map2 != NULL ) free(map2); - if ( int_buf != NULL ) free(int_buf); - if ( dble_buf != NULL ) free(dble_buf); - if ( recv_lengths != NULL ) free(recv_lengths); - free( context->partition ); - free( context ); - if ( mh_mat->rowptr != NULL ) free( mh_mat->rowptr ); - if ( mh_mat->colnum != NULL ) free( mh_mat->colnum ); - if ( mh_mat->values != NULL ) free( mh_mat->values ); - if ( mh_mat->map != NULL ) free( mh_mat->map ); - mh_mat->rowptr = NULL; - mh_mat->colnum = NULL; - mh_mat->values = NULL; - mh_mat->map = NULL; + hypre_TFree(map, HYPRE_MEMORY_HOST); + hypre_TFree(map2, HYPRE_MEMORY_HOST); + hypre_TFree(int_buf, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(recv_lengths, HYPRE_MEMORY_HOST); + hypre_TFree(context->partition, HYPRE_MEMORY_HOST); + hypre_TFree(context, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->rowptr, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->colnum, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->values, HYPRE_MEMORY_HOST); + hypre_TFree(mh_mat->map, HYPRE_MEMORY_HOST); + return 0; } @@ -542,7 +546,7 @@ int HYPRE_LSI_SchwarzSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, /**************************************************************************/ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, - int total_recv_leng, int *recv_lengths, int *ext_ja, + int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int i, j, k, nnz, *mat_ia, *mat_ja; @@ -623,8 +627,8 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, { tmp_blk_leng[i] = 5 * blk_size[i] + 5; blk_indices[i] = hypre_TAlloc(int, tmp_blk_leng[i] , HYPRE_MEMORY_HOST); - for (j = 0; j < blk_size[i]; j++) - blk_indices[i][j] = sch_ptr->block_size * i + j; + for (j = 0; j < blk_size[i]; j++) + blk_indices[i][j] = sch_ptr->block_size * i + j; } max_blk_size = 0; for ( i = 0; i < nblocks; i++ ) @@ -641,9 +645,9 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, tmp_indices = blk_indices[i]; tmp_blk_leng[i] = 2 * ( blk_size[i] + rowleng ) + 2; blk_indices[i] = hypre_TAlloc(int, tmp_blk_leng[i] , HYPRE_MEMORY_HOST); - for (k = 0; k < blk_size[i]; k++) + for (k = 0; k < blk_size[i]; k++) blk_indices[i][k] = tmp_indices[k]; - free( tmp_indices ); + hypre_TFree(tmp_indices, HYPRE_MEMORY_HOST); } for ( k = 0; k < rowleng; k++ ) { @@ -659,8 +663,8 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, blk_size[i] = ncnt + 1; if ( blk_size[i] > max_blk_size ) max_blk_size = blk_size[i]; } - free(tmp_blk_leng); - } + hypre_TFree(tmp_blk_leng, HYPRE_MEMORY_HOST); + } /* --------------------------------------------------------- */ /* compute the memory requirements for each block */ @@ -721,7 +725,7 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, } for ( k = 0; k < rowleng; k++ ) { - if ( nblocks > 1 ) + if ( nblocks > 1 ) index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]); else index = cols[k]; @@ -735,7 +739,7 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, mat_ia = bmat_ia[i]; mat_ja = bmat_ja[i]; mat_aa = bmat_aa[i]; - if ( nblocks > 1 ) + if ( nblocks > 1 ) { aux_bmat_ia[i] = hypre_TAlloc(int, (blk_size[i] + 1) , HYPRE_MEMORY_HOST); aux_bmat_ja[i] = hypre_TAlloc(int, aux_nnz , HYPRE_MEMORY_HOST); @@ -782,7 +786,7 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, } for ( k = 0; k < rowleng; k++ ) { - if ( nblocks > 1 ) + if ( nblocks > 1 ) index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]); else index = cols[k]; if ( index >= 0 ) @@ -803,8 +807,9 @@ int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, if ( mat_ja[j] < 0 || mat_ja[j] >= length ) printf("block %d has index %d\n", i, mat_ja[j]); } - free( cols ); - free( vals ); + + hypre_TFree(cols, HYPRE_MEMORY_HOST); + hypre_TFree(vals, HYPRE_MEMORY_HOST); /* --------------------------------------------------------- */ /* decompose each block */ @@ -838,7 +843,7 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) nblocks = sch_ptr->nblocks; max_blk_size = 0; for ( blk = 0; blk < nblocks; blk++ ) - if ( sch_ptr->blk_sizes[blk] > max_blk_size ) + if ( sch_ptr->blk_sizes[blk] > max_blk_size ) max_blk_size = sch_ptr->blk_sizes[blk]; fillin = sch_ptr->fillin; tau = sch_ptr->threshold; @@ -858,7 +863,7 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) printflag = nblocks / 10 + 1; for ( blk = 0; blk < nblocks; blk++ ) { - if ( output_level > 0 && blk % printflag == 0 && blk != 0 ) + if ( output_level > 0 && blk % printflag == 0 && blk != 0 ) printf("%4d : Schwarz : processing block %6d (%6d)\n",mypid,blk,nblocks); mat_ia = sch_ptr->bmat_ia[blk]; mat_ja = sch_ptr->bmat_ja[blk]; @@ -878,13 +883,13 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) vals = &(mat_aa[index]); rleng = mat_ia[i+1] - index; ddata = 0.0; - for ( j = 0; j < rleng; j++ ) ddata += habs( vals[j] ); + for ( j = 0; j < rleng; j++ ) ddata += habs( vals[j] ); rowNorms[i] = ddata; } printflag2 = nrows / 10 + 1; for ( i = 0; i < nrows; i++ ) { - if ( output_level > 0 && i % printflag2 == 0 && i != 0 ) + if ( output_level > 0 && i % printflag2 == 0 && i != 0 ) printf("%4d : Schwarz : block %6d row %6d (%6d)\n",mypid,blk, i, nrows); track_leng = 0; @@ -892,7 +897,7 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) cols = &(mat_ja[index]); vals = &(mat_aa[index]); rleng = mat_ia[i+1] - index; - for ( j = 0; j < rleng; j++ ) + for ( j = 0; j < rleng; j++ ) { dble_buf[cols[j]] = vals[j]; track_array[track_leng++] = cols[j]; @@ -902,7 +907,7 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) for ( j = 0; j < track_leng; j++ ) { index = track_array[j]; - if ( dble_buf[index] != 0 ) + if ( dble_buf[index] != 0 ) { if ( index < i ) Lcount++; else if ( index > i ) Ucount++; @@ -921,26 +926,26 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) for ( k = new_ia[j]; k < new_ia[j+1]; k++ ) { colIndex = new_ja[k]; - if ( colIndex > j ) + if ( colIndex > j ) { if ( dble_buf[colIndex] != 0.0 ) dble_buf[colIndex] -= (ddata * new_aa[k]); else { dble_buf[colIndex] = - (ddata * new_aa[k]); - if ( dble_buf[colIndex] != 0.0 ) + if ( dble_buf[colIndex] != 0.0 ) track_array[track_leng++] = colIndex; } } } dble_buf[j] = ddata; - } + } else dble_buf[j] = 0.0; } - for ( j = 0; j < rleng; j++ ) + for ( j = 0; j < rleng; j++ ) { - vals[j] = dble_buf[cols[j]]; - if ( cols[j] != i ) dble_buf[cols[j]] = 0.0; + vals[j] = dble_buf[cols[j]]; + if ( cols[j] != i ) dble_buf[cols[j]] = 0.0; } sortcnt = 0; for ( j = 0; j < track_leng; j++ ) @@ -957,12 +962,12 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) else dble_buf[index] = 0.0; } } - if ( sortcnt > Lcount ) + if ( sortcnt > Lcount ) { HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Lcount); for ( j = Lcount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < rleng; j++ ) + for ( j = 0; j < rleng; j++ ) { if ( cols[j] < i && vals[j] != 0.0 ) { @@ -1004,7 +1009,7 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) HYPRE_LSI_SplitDSort(sortvals,sortcnt,sortcols,Ucount); for ( j = Ucount; j < sortcnt; j++ ) dble_buf[sortcols[j]] = 0.0; } - for ( j = 0; j < rleng; j++ ) + for ( j = 0; j < rleng; j++ ) { if ( cols[j] > i && vals[j] != 0.0 ) { @@ -1025,9 +1030,9 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) dble_buf[i] = 0.0; new_ia[i+1] = nnz; } - free( mat_ia ); - free( mat_ja ); - free( mat_aa ); + hypre_TFree(mat_ia, HYPRE_MEMORY_HOST); + hypre_TFree(mat_ja, HYPRE_MEMORY_HOST); + hypre_TFree(mat_aa, HYPRE_MEMORY_HOST); sch_ptr->bmat_ia[blk] = new_ia; sch_ptr->bmat_ja[blk] = new_ja; sch_ptr->bmat_aa[blk] = new_aa; @@ -1045,12 +1050,14 @@ int HYPRE_LSI_ILUTDecompose( HYPRE_LSI_Schwarz *sch_ptr ) } } } - free( track_array ); - free( dble_buf ); - free( diagonal ); - free( rowNorms ); - free( sortcols ); - free( sortvals ); + + hypre_TFree(track_array, HYPRE_MEMORY_HOST); + hypre_TFree(dble_buf, HYPRE_MEMORY_HOST); + hypre_TFree(diagonal, HYPRE_MEMORY_HOST); + hypre_TFree(rowNorms, HYPRE_MEMORY_HOST); + hypre_TFree(sortcols, HYPRE_MEMORY_HOST); + hypre_TFree(sortvals, HYPRE_MEMORY_HOST); + return 0; } diff --git a/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.cxx b/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.cxx index fc6fe39aa..467e94bc6 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.cxx @@ -13,7 +13,6 @@ #include #include #include -#include #include #if 0 /* RDF: Not sure this is really needed */ @@ -49,7 +48,7 @@ #include "HYPRE_SlideReduction.h" //*************************************************************************** -// timers +// timers //--------------------------------------------------------------------------- #ifdef HYPRE_SEQUENTIAL @@ -59,7 +58,7 @@ extern "C" double LSC_Wtime() { clock_t ticks; - double seconds; + double seconds; ticks = clock() ; seconds = (double) ticks / (double) CLOCKS_PER_SEC; return seconds; @@ -74,7 +73,7 @@ double LSC_Wtime() extern "C" { int HYPRE_LSI_qsort1a( int *, int *, int, int ); int HYPRE_LSI_PartitionMatrix(int, int, int*, int**, double**, int*, int**); - int HYPRE_LSI_GetMatrixDiagonal(int, int, int *, int **, double **, int *, + int HYPRE_LSI_GetMatrixDiagonal(int, int, int *, int **, double **, int *, int *, double *); } @@ -103,7 +102,7 @@ extern "C" { int HYPRE_LSI_AMGeSetNElements(int); int HYPRE_LSI_AMGeSetSystemSize(int); int HYPRE_LSI_AMGePutRow(int,int,double*,int*); - int HYPRE_LSI_AMGeSolve( double *rhs, double *sol ); + int HYPRE_LSI_AMGeSolve( double *rhs, double *sol ); int HYPRE_LSI_AMGeSetBoundary( int leng, int *colInd ); int HYPRE_LSI_AMGeWriteToFile(); #endif @@ -118,7 +117,7 @@ extern "C" { // constructor //--------------------------------------------------------------------------- -HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : +HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : comm_(comm), HYOutputLevel_(0), memOptimizerFlag_(0), @@ -187,19 +186,19 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : nConstraints_(0), constrList_(NULL), matrixPartition_(0), - HYSolver_(NULL), + HYSolver_(NULL), maxIterations_(1000), tolerance_(1.0e-6), normAbsRel_(0), pcgRecomputeRes_(0), - HYPrecon_(NULL), - HYPreconReuse_(0), + HYPrecon_(NULL), + HYPreconReuse_(0), HYPreconSetup_(0), lookup_(NULL), haveLookup_(0) { //------------------------------------------------------------------- - // find my processor ID + // find my processor ID //------------------------------------------------------------------- MPI_Comm_rank(comm, &mypid_); @@ -234,11 +233,11 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : amgSystemSize_ = 1; // system size amgMaxIter_ = 1; // number of iterations amgNumSweeps_[0] = 1; // no. of sweeps for fine grid - amgNumSweeps_[1] = 1; // no. of presmoothing sweeps - amgNumSweeps_[2] = 1; // no. of postsmoothing sweeps + amgNumSweeps_[1] = 1; // no. of presmoothing sweeps + amgNumSweeps_[2] = 1; // no. of postsmoothing sweeps amgNumSweeps_[3] = 1; // no. of sweeps for coarsest grid amgRelaxType_[0] = 3; // hybrid for the fine grid - amgRelaxType_[1] = 3; // hybrid for presmoothing + amgRelaxType_[1] = 3; // hybrid for presmoothing amgRelaxType_[2] = 3; // hybrid for postsmoothing amgRelaxType_[3] = 9; // direct for the coarsest level amgGridRlxType_ = 0; // smoothes all points @@ -258,8 +257,8 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : amgInterpType_ = 0; amgPmax_ = 0; - for (int i = 0; i < 25; i++) amgRelaxWeight_[i] = 1.0; - for (int j = 0; j < 25; j++) amgRelaxOmega_[j] = 1.0; + for (int i = 0; i < 25; i++) amgRelaxWeight_[i] = 1.0; + for (int j = 0; j < 25; j++) amgRelaxOmega_[j] = 1.0; pilutFillin_ = 0; // how many nonzeros to keep in L and U pilutDropTol_ = 0.0; @@ -289,10 +288,10 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : euclidargc_ = 2; // parameters information for Euclid euclidargv_ = new char*[euclidargc_*2]; for (int k = 0; k < euclidargc_*2; k++) euclidargv_[k] = new char[50]; - strcpy(euclidargv_[0], "-level"); - strcpy(euclidargv_[1], "0"); - strcpy(euclidargv_[2], "-sparseA"); - strcpy(euclidargv_[3], "0.0"); + strcpy(euclidargv_[0], "-level"); + strcpy(euclidargv_[1], "0"); + strcpy(euclidargv_[2], "-sparseA"); + strcpy(euclidargv_[3], "0.0"); superluOrdering_ = 0; // natural ordering in SuperLU superluScale_[0] = 'N'; // no scaling in SuperLUX @@ -306,7 +305,7 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : mlStrongThreshold_ = 0.08; // one suggested by Vanek/Brezina/Mandel mlCoarseSolver_ = 0; // default coarse solver = SuperLU mlCoarsenScheme_ = 1; // default coarsening scheme = uncoupled - mlNumPDEs_ = 3; // default block size + mlNumPDEs_ = 3; // default block size truncThresh_ = 0.0; rnorm_ = 0.0; @@ -375,7 +374,7 @@ HYPRE_LinSysCore::HYPRE_LinSysCore(MPI_Comm comm) : // destructor //--------------------------------------------------------------------------- -HYPRE_LinSysCore::~HYPRE_LinSysCore() +HYPRE_LinSysCore::~HYPRE_LinSysCore() { int i; //------------------------------------------------------------------- @@ -392,23 +391,23 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() if ( HYA_ != NULL ) {HYPRE_IJMatrixDestroy(HYA_); HYA_ = NULL;} if ( HYx_ != NULL ) {HYPRE_IJVectorDestroy(HYx_); HYx_ = NULL;} if ( HYr_ != NULL ) {HYPRE_IJVectorDestroy(HYr_); HYr_ = NULL;} - if ( HYbs_ != NULL ) + if ( HYbs_ != NULL ) { - for ( i = 0; i < numRHSs_; i++ ) + for ( i = 0; i < numRHSs_; i++ ) if ( HYbs_[i] != NULL ) HYPRE_IJVectorDestroy(HYbs_[i]); delete [] HYbs_; HYbs_ = NULL; } - if ( HYpbs_ != NULL ) + if ( HYpbs_ != NULL ) { - for ( i = 0; i <= projectSize_; i++ ) + for ( i = 0; i <= projectSize_; i++ ) if ( HYpbs_[i] != NULL ) HYPRE_IJVectorDestroy(HYpbs_[i]); delete [] HYpbs_; HYpbs_ = NULL; } - if ( HYpxs_ != NULL ) + if ( HYpxs_ != NULL ) { - for ( i = 0; i <= projectSize_; i++ ) + for ( i = 0; i <= projectSize_; i++ ) if ( HYpxs_[i] != NULL ) HYPRE_IJVectorDestroy(HYpxs_[i]); delete [] HYpxs_; HYpxs_ = NULL; @@ -441,7 +440,7 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() delete [] colValues_; colValues_ = NULL; } - if ( rowLengths_ != NULL ) + if ( rowLengths_ != NULL ) { delete [] rowLengths_; rowLengths_ = NULL; @@ -476,12 +475,12 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() // clean up direct matrix access variables //------------------------------------------------------------------- - if ( mapFromSolnList_ != NULL ) + if ( mapFromSolnList_ != NULL ) { delete [] mapFromSolnList_; mapFromSolnList_ = NULL; } - if ( mapFromSolnList2_ != NULL ) + if ( mapFromSolnList2_ != NULL ) { delete [] mapFromSolnList2_; mapFromSolnList2_ = NULL; @@ -553,7 +552,7 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() { // Destroy G and coordinate vectors // OLD WAY - if( amsG_ == NULL ) { + if( amsG_ == NULL ) { HYPRE_AMSFEIDestroy( HYPrecon_ ); } HYPRE_AMSDestroy( HYPrecon_ ); @@ -580,28 +579,28 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() // clean up variable for various reduction //------------------------------------------------------------------- - if ( constrList_ != NULL ) + if ( constrList_ != NULL ) { - delete [] constrList_; + delete [] constrList_; constrList_ = NULL; } - if (selectedList_ != NULL) + if (selectedList_ != NULL) { - delete [] selectedList_; + delete [] selectedList_; selectedList_ = NULL; } - if (selectedListAux_ != NULL) + if (selectedListAux_ != NULL) { - delete [] selectedListAux_; + delete [] selectedListAux_; selectedListAux_ = NULL; } - + //------------------------------------------------------------------- // deallocate local storage for MLI //------------------------------------------------------------------- #ifdef HAVE_MLI - if ( feData_ != NULL ) + if ( feData_ != NULL ) { if (haveFEData_ == 1) HYPRE_LSI_MLIFEDataDestroy(feData_); else if (haveFEData_ == 2) HYPRE_LSI_MLISFEIDestroy(feData_); @@ -629,9 +628,10 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() // HYPRE_ParCSRMatrixDestroy(maxwellGEN_); // maxwellGEN_ = NULL; //} - if (AMSData_.EdgeNodeList_ != NULL) delete [] AMSData_.EdgeNodeList_; - if (AMSData_.NodeNumbers_ != NULL) delete [] AMSData_.NodeNumbers_; - if (AMSData_.NodalCoord_ != NULL) delete [] AMSData_.NodalCoord_; + // This data seems to be freed by hypre_AMSFEIDestroy in ams.c + //if (AMSData_.EdgeNodeList_ != NULL) hypre_TFree(AMSData_.EdgeNodeList_, HYPRE_MEMORY_HOST); + if (AMSData_.NodeNumbers_ != NULL) hypre_TFree(AMSData_.NodeNumbers_, HYPRE_MEMORY_HOST); + if (AMSData_.NodalCoord_ != NULL) hypre_TFree(AMSData_.NodalCoord_, HYPRE_MEMORY_HOST); if (FEI_mixedDiag_ != NULL) delete [] FEI_mixedDiag_; //------------------------------------------------------------------- @@ -647,7 +647,7 @@ HYPRE_LinSysCore::~HYPRE_LinSysCore() //--------------------------------------------------------------------------- #ifndef NOFEI -LinearSystemCore* HYPRE_LinSysCore::clone() +LinearSystemCore* HYPRE_LinSysCore::clone() { return(new HYPRE_LinSysCore(comm_)); } @@ -692,7 +692,7 @@ int HYPRE_LinSysCore::setLookup(Lookup& lookup) //--------------------------------------------------------------------------- int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, - int firstLocalEqn, int numLocalEqns) + int firstLocalEqn, int numLocalEqns) { int i; @@ -757,7 +757,7 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, // error checking //------------------------------------------------------------------- - if ( ( firstLocalEqn <= 0 ) || + if ( ( firstLocalEqn <= 0 ) || ( firstLocalEqn+numLocalEqns-1) > numGlobalEqns) { printf("%4d : createMatricesVectors: invalid local equation nos.\n", @@ -778,14 +778,14 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, if ( HYA_ != NULL ) {HYPRE_IJMatrixDestroy(HYA_); HYA_ = NULL;} if ( HYx_ != NULL ) {HYPRE_IJVectorDestroy(HYx_); HYx_ = NULL;} if ( HYr_ != NULL ) {HYPRE_IJVectorDestroy(HYr_); HYr_ = NULL;} - if ( HYbs_ != NULL ) + if ( HYbs_ != NULL ) { - for ( i = 0; i < numRHSs_; i++ ) + for ( i = 0; i < numRHSs_; i++ ) if ( HYbs_[i] != NULL ) HYPRE_IJVectorDestroy(HYbs_[i]); delete [] HYbs_; HYbs_ = NULL; } - if (reducedA_ != NULL) HYPRE_IJMatrixDestroy(reducedA_); + if (reducedA_ != NULL) HYPRE_IJMatrixDestroy(reducedA_); if (reducedB_ != NULL) HYPRE_IJVectorDestroy(reducedB_); if (reducedX_ != NULL) HYPRE_IJVectorDestroy(reducedX_); if (reducedR_ != NULL) HYPRE_IJVectorDestroy(reducedR_); @@ -805,7 +805,7 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, //------------------------------------------------------------------- // instantiate the matrix (can also handle rectangular matrix) //------------------------------------------------------------------- - + if (localStartCol_ == -1) HYPRE_IJMatrixCreate(comm_, localStartRow_-1,localEndRow_-1, localStartRow_-1,localEndRow_-1, &HYA_); @@ -846,7 +846,7 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, //------------------------------------------------------------------- #ifdef HAVE_MLI - if ( feData_ != NULL ) + if ( feData_ != NULL ) { if (haveFEData_ == 1) HYPRE_LSI_MLIFEDataDestroy(feData_); else if (haveFEData_ == 2) HYPRE_LSI_MLISFEIDestroy(feData_); @@ -868,7 +868,7 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, HYPRE_LSI_AMGeSetNNodes(numGlobalRows_); HYPRE_LSI_AMGeSetNElements(numGlobalRows_); HYPRE_LSI_AMGeSetSystemSize(1); -#endif +#endif //------------------------------------------------------------------- // instantiate the residual vector @@ -882,14 +882,14 @@ int HYPRE_LinSysCore::createMatricesAndVectors(int numGlobalEqns, schurReductionCreated_ = 0; systemAssembled_ = 0; normalEqnFlag_ &= 1; - if ( HYnormalA_ != NULL ) + if ( HYnormalA_ != NULL ) { - HYPRE_IJMatrixDestroy(HYnormalA_); + HYPRE_IJMatrixDestroy(HYnormalA_); HYnormalA_ = NULL; } - if ( HYnormalB_ != NULL ) + if ( HYnormalB_ != NULL ) { - HYPRE_IJVectorDestroy(HYnormalB_); + HYPRE_IJVectorDestroy(HYnormalB_); HYnormalB_ = NULL; } @@ -927,7 +927,7 @@ int HYPRE_LinSysCore::setGlobalOffsets(int leng, int* nodeOffsets, int firstLocalEqn = eqnOffsets[mypid_] + 1; int numLocalEqns = eqnOffsets[mypid_+1] - firstLocalEqn + 1; int numGlobalEqns = eqnOffsets[numProcs_]; - createMatricesAndVectors(numGlobalEqns, firstLocalEqn, numLocalEqns); + createMatricesAndVectors(numGlobalEqns, firstLocalEqn, numLocalEqns); //------------------------------------------------------------------- // diagnostic message @@ -1030,13 +1030,13 @@ int HYPRE_LinSysCore::allocateMatrix(int **colIndices, int *rowLengths) double *vals; //------------------------------------------------------------------- - // diagnoistic message and error checking + // diagnoistic message and error checking //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) { printf("%4d : HYPRE_LSC::entering allocateMatrix.\n", mypid_); - if ( localEndRow_ < localStartRow_ ) + if ( localEndRow_ < localStartRow_ ) { printf("allocateMatrix WARNING : createMatrixAndVectors should be\n"); printf(" called before allocateMatrix.\n"); @@ -1077,10 +1077,10 @@ int HYPRE_LinSysCore::allocateMatrix(int **colIndices, int *rowLengths) for ( i = 0; i < nsize; i++ ) { rowLeng = rowLengths_[i] = rowLengths[i]; - if ( rowLeng > 0 ) + if ( rowLeng > 0 ) { colIndices_[i] = new int[rowLeng]; - assert( colIndices_[i] != NULL ); + hypre_assert( colIndices_[i] != NULL ); } else colIndices_[i] = NULL; indPtr = colIndices_[i]; @@ -1092,10 +1092,10 @@ int HYPRE_LinSysCore::allocateMatrix(int **colIndices, int *rowLengths) if ( searchFlag ) hypre_qsort0( indPtr, 0, rowLeng-1); maxSize = ( rowLeng > maxSize ) ? rowLeng : maxSize; minSize = ( rowLeng < minSize ) ? rowLeng : minSize; - if ( rowLeng > 0 ) + if ( rowLeng > 0 ) { colValues_[i] = new double[rowLeng]; - assert( colValues_[i] != NULL ); + hypre_assert( colValues_[i] != NULL ); } vals = colValues_[i]; for ( j = 0; j < rowLeng; j++ ) vals[j] = 0.0; @@ -1103,12 +1103,12 @@ int HYPRE_LinSysCore::allocateMatrix(int **colIndices, int *rowLengths) MPI_Allreduce(&maxSize, &pilutMaxNnzPerRow_,1,MPI_INT,MPI_MAX,comm_); //------------------------------------------------------------------- - // diagnoistic message + // diagnoistic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) { - printf("%4d : allocateMatrix : max/min nnz/row = %d %d\n", mypid_, + printf("%4d : allocateMatrix : max/min nnz/row = %d %d\n", mypid_, maxSize, minSize); printf("%4d : HYPRE_LSC::leaving allocateMatrix.\n", mypid_); } @@ -1116,7 +1116,7 @@ int HYPRE_LinSysCore::allocateMatrix(int **colIndices, int *rowLengths) } //*************************************************************************** -// to establish the structures/objects associated with the linear algebra +// to establish the structures/objects associated with the linear algebra // library. i.e., do initial allocations, etc. //--------------------------------------------------------------------------- @@ -1127,7 +1127,7 @@ int HYPRE_LinSysCore::setMatrixStructure(int** ptColIndices, int* ptRowLengths, int i, j; //------------------------------------------------------------------- - // diagnoistic message + // diagnoistic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -1137,7 +1137,7 @@ int HYPRE_LinSysCore::setMatrixStructure(int** ptColIndices, int* ptRowLengths, { int nRows = localEndRow_ - localStartRow_ + 1; for (i = 0; i < nRows; i++) - for (j = 0; j < ptRowLengths[i]; j++) + for (j = 0; j < ptRowLengths[i]; j++) printf(" %4d : row, col = %d %d\n",mypid_, localStartRow_+i, ptColIndices[i][j]+1); } @@ -1160,7 +1160,7 @@ int HYPRE_LinSysCore::setMatrixStructure(int** ptColIndices, int* ptRowLengths, for ( j = 0; j < ptRowLengths[i]; j++ ) ptColIndices[i][j]--; //------------------------------------------------------------------- - // diagnoistic message + // diagnoistic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -1172,7 +1172,7 @@ int HYPRE_LinSysCore::setMatrixStructure(int** ptColIndices, int* ptRowLengths, // set Lagrange multiplier equations //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::setMultCREqns(int multCRSetID, int numCRs, +int HYPRE_LinSysCore::setMultCREqns(int multCRSetID, int numCRs, int numNodesPerCR, int** nodeNumbers, int** eqnNumbers, int* fieldIDs, int* multiplierEqnNumbers) { @@ -1193,7 +1193,7 @@ int HYPRE_LinSysCore::setMultCREqns(int multCRSetID, int numCRs, // set penalty constraint equations //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::setPenCREqns(int penCRSetID, int numCRs, +int HYPRE_LinSysCore::setPenCREqns(int penCRSetID, int numCRs, int numNodesPerCR, int** nodeNumbers, int** eqnNumbers, int* fieldIDs) { @@ -1220,7 +1220,7 @@ int HYPRE_LinSysCore::resetMatrixAndVector(double setValue) double *vals; //------------------------------------------------------------------- - // diagnoistic message + // diagnoistic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -1242,8 +1242,8 @@ int HYPRE_LinSysCore::resetMatrixAndVector(double setValue) { cols[i] = localStartRow_ + i - 1; vals[i] = 0.0; - } - for (i = 0; i < numRHSs_; i++) + } + for (i = 0; i < numRHSs_; i++) HYPRE_IJVectorSetValues(HYbs_[i], localNRows, (const int *) cols, (const double *) vals); @@ -1254,14 +1254,14 @@ int HYPRE_LinSysCore::resetMatrixAndVector(double setValue) schurReductionCreated_ = 0; projectCurrSize_ = 0; normalEqnFlag_ &= 1; - if ( HYnormalA_ != NULL ) + if ( HYnormalA_ != NULL ) { - HYPRE_IJMatrixDestroy(HYnormalA_); + HYPRE_IJMatrixDestroy(HYnormalA_); HYnormalA_ = NULL; } - if ( HYnormalB_ != NULL ) + if ( HYnormalB_ != NULL ) { - HYPRE_IJVectorDestroy(HYnormalB_); + HYPRE_IJVectorDestroy(HYnormalB_); HYnormalB_ = NULL; } @@ -1313,7 +1313,7 @@ int HYPRE_LinSysCore::resetMatrixAndVector(double setValue) //------------------------------------------------------------------- #ifdef HAVE_MLI - if ( feData_ != NULL ) + if ( feData_ != NULL ) { if (haveFEData_ == 1) HYPRE_LSI_MLIFEDataDestroy(feData_); else if (haveFEData_ == 2) HYPRE_LSI_MLISFEIDestroy(feData_); @@ -1339,7 +1339,7 @@ int HYPRE_LinSysCore::resetMatrixAndVector(double setValue) // new function to reset matrix independently //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::resetMatrix(double setValue) +int HYPRE_LinSysCore::resetMatrix(double setValue) { int i, j, size; @@ -1411,9 +1411,9 @@ int HYPRE_LinSysCore::resetMatrix(double setValue) schurReductionCreated_ = 0; projectCurrSize_ = 0; normalEqnFlag_ &= 5; - if ( HYnormalA_ != NULL ) + if ( HYnormalA_ != NULL ) { - HYPRE_IJMatrixDestroy(HYnormalA_); + HYPRE_IJMatrixDestroy(HYnormalA_); HYnormalA_ = NULL; } @@ -1422,7 +1422,7 @@ int HYPRE_LinSysCore::resetMatrix(double setValue) //------------------------------------------------------------------- #ifdef HAVE_MLI - if ( feData_ != NULL ) + if ( feData_ != NULL ) { if (haveFEData_ == 1) HYPRE_LSI_MLIFEDataDestroy(feData_); else if (haveFEData_ == 2) HYPRE_LSI_MLISFEIDestroy(feData_); @@ -1448,7 +1448,7 @@ int HYPRE_LinSysCore::resetMatrix(double setValue) // new function to reset vectors independently //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::resetRHSVector(double setValue) +int HYPRE_LinSysCore::resetRHSVector(double setValue) { int i, localNRows, *cols; double *vals; @@ -1473,18 +1473,18 @@ int HYPRE_LinSysCore::resetRHSVector(double setValue) { cols[i] = localStartRow_ + i - 1; vals[i] = setValue; - } - for (i = 0; i < numRHSs_; i++) - if ( HYbs_[i] != NULL ) + } + for (i = 0; i < numRHSs_; i++) + if ( HYbs_[i] != NULL ) HYPRE_IJVectorSetValues(HYbs_[i], localNRows, (const int *) cols, (const double *) vals); delete [] cols; delete [] vals; } normalEqnFlag_ &= 3; - if ( HYnormalB_ != NULL ) + if ( HYnormalB_ != NULL ) { - HYPRE_IJVectorDestroy(HYnormalB_); + HYPRE_IJVectorDestroy(HYnormalB_); HYnormalB_ = NULL; } @@ -1516,7 +1516,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int row, int numValues, printf("%4d : row number = %d.\n", mypid_, row); if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 6 ) for ( i = 0; i < numValues; i++ ) - printf(" %4d : row,col = %d %d, data = %e\n", mypid_, + printf(" %4d : row,col = %d %d, data = %e\n", mypid_, row+1, scatterIndices[i]+1, values[i]); } @@ -1547,16 +1547,16 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int row, int numValues, // load the local matrix //------------------------------------------------------------------- - for ( i = 0; i < numValues; i++ ) + for ( i = 0; i < numValues; i++ ) { colIndex = scatterIndices[i]; - index = hypre_BinarySearch(colIndices_[localRow], colIndex, + index = hypre_BinarySearch(colIndices_[localRow], colIndex, rowLengths_[localRow]); if ( index < 0 ) { printf("%4d : sumIntoSystemMatrix ERROR - loading column",mypid_); printf(" that has not been declared before - %d.\n",colIndex); - for ( j = 0; j < rowLengths_[localRow]; j++ ) + for ( j = 0; j < rowLengths_[localRow]; j++ ) printf(" available column index = %d\n", colIndices_[localRow][j]); exit(1); @@ -1569,7 +1569,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int row, int numValues, #endif //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 5 ) @@ -1585,7 +1585,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, int numPtCols, const int* ptCols, const double* const* values) { - int i, j, k, index, colIndex, localRow, orderFlag=0; + int i, j, k, index, colIndex, localRow, orderFlag=0; int *indptr, rowLeng, useOld; double *valptr, *auxValues; @@ -1603,7 +1603,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, localRow = ptRows[i] - localStartRow_ + 1; for ( j = 0; j < numPtCols; j++ ) printf(" %4d : row,col,val = %8d %8d %e\n",mypid_, - ptRows[i]+1, ptCols[j]+1, values[i][j]); + ptRows[i]+1, ptCols[j]+1, values[i][j]); } } } @@ -1631,16 +1631,16 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, useOld = orderFlag = 0; if ( numPtCols == nStored_ && storedIndices_ != NULL ) { - for ( i = 0; i < numPtCols; i++ ) + for ( i = 0; i < numPtCols; i++ ) if ( storedIndices_[i] != ptCols[i] ) break; if ( i == numPtCols ) useOld = 1; } - if ( ! useOld ) + if ( ! useOld ) { - for ( i = 1; i < numPtCols; i++ ) + for ( i = 1; i < numPtCols; i++ ) if ( ptCols[i] < ptCols[i-1] ) { orderFlag = 1; break; } if ( orderFlag == 1 ) - { + { if ( numPtCols != nStored_ ) { if ( storedIndices_ != NULL ) delete [] storedIndices_; @@ -1649,10 +1649,10 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, auxStoredIndices_ = new int[numPtCols]; nStored_ = numPtCols; } - for ( i = 0; i < numPtCols; i++ ) + for ( i = 0; i < numPtCols; i++ ) { storedIndices_[i] = ptCols[i]; - auxStoredIndices_[i] = i; + auxStoredIndices_[i] = i; } HYPRE_LSI_qsort1a(storedIndices_,auxStoredIndices_,0,numPtCols-1); for ( i = 0; i < numPtCols; i++ ) storedIndices_[i] = ptCols[i]; @@ -1666,7 +1666,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, nStored_ = 0; } } - for ( i = 0; i < numPtRows; i++ ) + for ( i = 0; i < numPtRows; i++ ) { localRow = ptRows[i] - localStartRow_ + 1; indptr = colIndices_[localRow]; @@ -1674,7 +1674,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, rowLeng = rowLengths_[localRow]; auxValues = (double *) values[i]; index = 0; - for ( j = 0; j < numPtCols; j++ ) + for ( j = 0; j < numPtCols; j++ ) { if ( storedIndices_ ) colIndex = storedIndices_[auxStoredIndices_[j]] + 1; @@ -1682,15 +1682,15 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, colIndex = ptCols[j] + 1; if (FEI_mixedDiag_ && ptRows[i] == ptCols[j] && numPtRows > 1) - FEI_mixedDiag_[ptCols[numPtCols-1]-localStartRow_+1] += auxValues[j]; + FEI_mixedDiag_[ptCols[numPtCols-1]-localStartRow_+1] += auxValues[j]; - while ( index < rowLeng && indptr[index] < colIndex ) index++; + while ( index < rowLeng && indptr[index] < colIndex ) index++; if ( index >= rowLeng ) { printf("%4d : sumIntoSystemMatrix ERROR - loading column",mypid_); printf(" that has not been declared before - %d (row=%d).\n", colIndex, ptRows[i]+1); - for ( k = 0; k < rowLeng; k++ ) + for ( k = 0; k < rowLeng; k++ ) printf(" available column index = %d\n", indptr[k]); exit(1); } @@ -1705,7 +1705,7 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, #endif //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 5 ) @@ -1714,11 +1714,11 @@ int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, } //*************************************************************************** -// add nonzero entries into the matrix data structure +// add nonzero entries into the matrix data structure //--------------------------------------------------------------------------- int HYPRE_LinSysCore::sumIntoSystemMatrix(int numPtRows, const int* ptRows, - int numPtCols, const int* ptCols, int numBlkRows, + int numPtCols, const int* ptCols, int numBlkRows, const int* blkRows, int numBlkCols, const int* blkCols, const double* const* values) { @@ -1771,13 +1771,13 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, if ( rowLengths_ == NULL && colIndices_ == NULL ) { localNRows = localEndRow_ - localStartRow_ + 1; - if ( localNRows > 0 ) + if ( localNRows > 0 ) { rowLengths_ = new int[localNRows]; colIndices_ = new int*[localNRows]; colValues_ = new double*[localNRows]; } - for ( i = 0; i < localNRows; i++ ) + for ( i = 0; i < localNRows; i++ ) { rowLengths_[i] = 0; colIndices_[i] = NULL; @@ -1789,7 +1789,7 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, // first adjust memory allocation (conservative) //------------------------------------------------------------------- - for ( i = 0; i < numPtRows; i++ ) + for ( i = 0; i < numPtRows; i++ ) { localRow = ptRows[i] - localStartRow_ + 1; if ( rowLengths_[localRow] > 0 ) @@ -1797,7 +1797,7 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, newLeng = rowLengths_[localRow] + numPtCols; tempInd = new int[newLeng]; tempVal = new double[newLeng]; - for ( j = 0; j < rowLengths_[localRow]; j++ ) + for ( j = 0; j < rowLengths_[localRow]; j++ ) { tempVal[j] = colValues_[localRow][j]; tempInd[j] = colIndices_[localRow][j]; @@ -1820,7 +1820,7 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, // load the local matrix //------------------------------------------------------------------- - for ( i = 0; i < numPtRows; i++ ) + for ( i = 0; i < numPtRows; i++ ) { localRow = ptRows[i] - localStartRow_ + 1; if ( rowLengths_[localRow] > 0 ) @@ -1828,7 +1828,7 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, newLeng = rowLengths_[localRow]; tempInd = colIndices_[localRow]; tempVal = colValues_[localRow]; - for ( j = 0; j < numPtCols; j++ ) + for ( j = 0; j < numPtCols; j++ ) { colIndex = ptCols[j] + 1; index = hypre_BinarySearch(tempInd, colIndex, newLeng); @@ -1846,14 +1846,14 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, { tempInd = colIndices_[localRow]; tempVal = colValues_[localRow]; - for ( j = 0; j < numPtCols; j++ ) + for ( j = 0; j < numPtCols; j++ ) { colIndex = ptCols[j] + 1; tempInd[j] = colIndex; tempVal[j] = values[i][j]; } sortFlag = 0; - for ( j = 1; j < numPtCols; j++ ) + for ( j = 1; j < numPtCols; j++ ) if ( tempInd[j] < tempInd[j-1] ) sortFlag = 1; rowLengths_[localRow] = numPtCols; if ( sortFlag == 1 ) hypre_qsort1( tempInd, tempVal, 0, numPtCols-1 ); @@ -1861,7 +1861,7 @@ int HYPRE_LinSysCore::putIntoSystemMatrix(int numPtRows, const int* ptRows, } //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 5 ) @@ -1917,7 +1917,7 @@ int HYPRE_LinSysCore::getMatrixRow(int row, double* coefs, int* indices, colVal = colValues_[rowIndex]; minLeng = len; if ( minLeng > rowLeng ) minLeng = rowLeng; - for( i = 0; i < minLeng; i++ ) + for( i = 0; i < minLeng; i++ ) { coefs[i] = colVal[i]; indices[i] = colInd[i]; @@ -1933,7 +1933,7 @@ int HYPRE_LinSysCore::getMatrixRow(int row, double* coefs, int* indices, HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowLeng,&colInd,&colVal); minLeng = len; if ( minLeng > rowLeng ) minLeng = rowLeng; - for( i = 0; i < minLeng; i++ ) + for( i = 0; i < minLeng; i++ ) { coefs[i] = colVal[i]; indices[i] = colInd[i]; @@ -1963,7 +1963,7 @@ int HYPRE_LinSysCore::sumIntoRHSVector(int num, const double* values, if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 6 ) { for ( i = 0; i < num; i++ ) - printf("%d : sumIntoRHSVector - %d = %e.\n", mypid_, indices[i], + printf("%d : sumIntoRHSVector - %d = %e.\n", mypid_, indices[i], values[i]); } } @@ -1990,7 +1990,7 @@ int HYPRE_LinSysCore::sumIntoRHSVector(int num, const double* values, } } - HYPRE_IJVectorAddToValues(HYb_, num, (const int *) localInds, + HYPRE_IJVectorAddToValues(HYb_, num, (const int *) localInds, (const double *) values); delete [] localInds; @@ -2075,7 +2075,7 @@ int HYPRE_LinSysCore::matrixLoadComplete() printf("%4d : HYPRE_LSC::entering matrixLoadComplete.\n",mypid_); //------------------------------------------------------------------- - // Write MLI FEData information to a file + // Write MLI FEData information to a file //------------------------------------------------------------------- #ifdef HAVE_MLI @@ -2099,7 +2099,7 @@ int HYPRE_LinSysCore::matrixLoadComplete() if ( systemAssembled_ != 1 ) { //---------------------------------------------------------------- - // set system matrix initialization parameters + // set system matrix initialization parameters //---------------------------------------------------------------- HYPRE_IJMatrixSetRowSizes(HYA_, rowLengths_); @@ -2132,7 +2132,7 @@ int HYPRE_LinSysCore::matrixLoadComplete() eqnNum = localStartRow_ - 1 + i; leng = rowLengths_[i]; newLeng = 0; - for ( j = 0; j < leng; j++ ) + for ( j = 0; j < leng; j++ ) { if ( habs(colValues_[i][j]) >= truncThresh_ ) { @@ -2153,7 +2153,7 @@ int HYPRE_LinSysCore::matrixLoadComplete() } delete [] colValues_; colValues_ = NULL; - if ( memOptimizerFlag_ != 0 ) + if ( memOptimizerFlag_ != 0 ) { delete [] colIndices_; colIndices_ = NULL; @@ -2172,14 +2172,14 @@ int HYPRE_LinSysCore::matrixLoadComplete() currR_ = HYr_; if (slideObj_ != NULL) { - slideObj = (HYPRE_SlideReduction *) slideObj_; + slideObj = (HYPRE_SlideReduction *) slideObj_; delete slideObj; } slideObj_ = NULL; } //------------------------------------------------------------------- - // diagnostics : print the matrix and rhs to files + // diagnostics : print the matrix and rhs to files //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_PRINTMAT) && @@ -2291,7 +2291,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, for ( j = 0; j < fieldSize; j++ ) printf("putNodalFieldData : %4d %2d = %e\n",i,j, data[i*fieldSize+j]); - } + } if ( HYPreconID_ == HYMLI && lookup_ != NULL ) { blockIDs = (int *) lookup_->getElemBlockIDs(); @@ -2299,13 +2299,13 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, nodeFieldIDs = (int **) lookup_->getFieldIDsTable(blockID); nodeFieldID = nodeFieldIDs[0][0]; //checkFieldSize = lookup_->getFieldSize(nodeFieldID); - //assert( checkFieldSize == fieldSize ); + //hypre_assert( checkFieldSize == fieldSize ); eqnNumbers = new int[numNodes]; newData = new double[numNodes*fieldSize]; newNumNodes = 0; for ( i = 0; i < numNodes*fieldSize; i++ ) newData[i] = -99999.9; for ( i = 0; i < numNodes; i++ ) - { + { index = lookup_->getEqnNumber(nodeNumbers[i],nodeFieldID); /* ====== @@ -2314,8 +2314,8 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, if ( index >= localStartRow_-1 && index < localEndRow_) { if ( newData[newNumNodes*fieldSize] == -99999.9 ) - { - for ( j = 0; j < fieldSize; j++ ) + { + for ( j = 0; j < fieldSize; j++ ) newData[newNumNodes*fieldSize+j] = data[i*fieldSize+j]; eqnNumbers[newNumNodes++] = index; } @@ -2325,7 +2325,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, if ( MLI_NodalCoord_ == NULL ) { MLI_EqnNumbers_ = new int[nRows/fieldSize]; - for (i=0; i EdgeNodeList + // nodal FEI) ===> EdgeNodeList //------------------------------------------------------------------- if (fieldID == -4) @@ -2376,7 +2376,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, for (j = 0; j < fieldSize; j++) printf("putNodalFieldData : %4d %2d = %e\n",i,j, data[i*fieldSize+j]); - } + } if (lookup_ != NULL && fieldSize == 2 && numNodes > 0) { @@ -2389,28 +2389,28 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, iArray = new int[numEdges*fieldSize]; newNumEdges = 0; for (i = 0; i < numEdges; i++) - { + { index = lookup_->getEqnNumber(nodeNumbers[i],nodeFieldID); if (index >= localStartRow_-1 && index < localEndRow_) { - for (j = 0; j < fieldSize; j++) + for (j = 0; j < fieldSize; j++) iArray[newNumEdges*fieldSize+j] = (int) data[i*fieldSize+j]; eqnNumbers[newNumEdges++] = index; } } nRows = localEndRow_ - localStartRow_ + 1; - if (AMSData_.EdgeNodeList_ != NULL) delete [] AMSData_.EdgeNodeList_; + if (AMSData_.EdgeNodeList_ != NULL) hypre_TFree(AMSData_.EdgeNodeList_, HYPRE_MEMORY_HOST); AMSData_.EdgeNodeList_ = NULL; if (newNumEdges > 0) { AMSData_.numEdges_ = nRows; - AMSData_.EdgeNodeList_ = new int[nRows*fieldSize]; + AMSData_.EdgeNodeList_ = hypre_CTAlloc(HYPRE_BigInt, nRows*fieldSize, HYPRE_MEMORY_HOST); for (i = 0; i < nRows*fieldSize; i++) AMSData_.EdgeNodeList_[i] = -99999; for (i = 0; i < newNumEdges; i++) { index = eqnNumbers[i] - localStartRow_ + 1; - for (j = 0; j < fieldSize; j++ ) + for (j = 0; j < fieldSize; j++ ) AMSData_.EdgeNodeList_[index*fieldSize+j] = iArray[i*fieldSize+j]; } errCnt = 0; @@ -2438,15 +2438,15 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, for (j = 0; j < fieldSize; j++) printf("putNodalFieldData : %4d %2d = %e\n",i,j, data[i*fieldSize+j]); - } + } if (lookup_ != NULL && fieldSize == 3) { blockIDs = (int *) lookup_->getElemBlockIDs(); blockID = blockIDs[0]; nodeFieldIDs = (int **) lookup_->getFieldIDsTable(blockID); nodeFieldID = nodeFieldIDs[0][0]; - if (AMSData_.NodeNumbers_ != NULL) delete [] AMSData_.NodeNumbers_; - if (AMSData_.NodalCoord_ != NULL) delete [] AMSData_.NodalCoord_; + if (AMSData_.NodeNumbers_ != NULL) hypre_TFree(AMSData_.NodeNumbers_, HYPRE_MEMORY_HOST); + if (AMSData_.NodalCoord_ != NULL) hypre_TFree(AMSData_.NodalCoord_, HYPRE_MEMORY_HOST); AMSData_.NodeNumbers_ = NULL; AMSData_.NodalCoord_ = NULL; AMSData_.numNodes_ = 0; @@ -2454,8 +2454,8 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, { AMSData_.numNodes_ = numNodes; AMSData_.numLocalNodes_ = localEndRow_ - localStartRow_ + 1; - AMSData_.NodeNumbers_ = new int[numNodes]; - AMSData_.NodalCoord_ = new double[fieldSize*numNodes]; + AMSData_.NodeNumbers_ = hypre_CTAlloc(HYPRE_BigInt, numNodes, HYPRE_MEMORY_HOST); + AMSData_.NodalCoord_ = hypre_CTAlloc(HYPRE_Real, fieldSize*numNodes, HYPRE_MEMORY_HOST); for (i = 0; i < numNodes; i++) { index = lookup_->getEqnNumber(nodeNumbers[i],nodeFieldID); @@ -2483,7 +2483,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, nodeFieldIDs = (int **) lookup_->getFieldIDsTable(blockID); nodeFieldID = nodeFieldIDs[0][0]; //checkFieldSize = lookup_->getFieldSize(nodeFieldID); - assert( fieldSize == 1 ); + hypre_assert( fieldSize == 1 ); aleNodeNumbers = new int[numNodes]; eqnNumbers = new int[numNodes]; for ( i = 0; i < numNodes; i++ ) @@ -2504,7 +2504,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, delete [] procNRows; delete [] eqnNumbers; delete [] aleNodeNumbers; - } + } } //------------------------------------------------------------------- @@ -2767,7 +2767,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, HYPRE_ParCSRMatrixPrint(D1_csr, "D1.parmatrix"); } } - + if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) printf("%4d : HYPRE_LSC::leaving putNodalFieldData.\n",mypid_); return (0); @@ -2809,7 +2809,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, for ( int j = 0; j < fieldSize; j++ ) printf("putNodalFieldData : %4d %2d = %e\n",i,j, data[i*fieldSize+j]); - } + } if ( HYPreconID_ == HYMLI && lookup_ != NULL ) { blockIDs = (int *) lookup_->getElemBlockIDs(); @@ -2817,15 +2817,15 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, nodeFieldIDs = (int **) lookup_->getFieldIDsTable(blockID); nodeFieldID = nodeFieldIDs[0][0]; checkFieldSize = lookup_->getFieldSize(nodeFieldID); - //assert( checkFieldSize == fieldSize ); + //hypre_assert( checkFieldSize == fieldSize ); eqnNumbers = new int[numNodes]; for ( i = 0; i < numNodes; i++ ) eqnNumbers[i] = lookup_->getEqnNumber(nodeNumbers[i],nodeFieldID); HYPRE_LSI_MLILoadNodalCoordinates(HYPrecon_, numNodes, checkFieldSize, eqnNumbers, fieldSize, (double *) data); delete [] eqnNumbers; - } - } + } + } //------------------------------------------------------------------- // this is needed to set up the correct node equation map @@ -2843,7 +2843,7 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, nodeFieldIDs = (int **) lookup_->getFieldIDsTable(blockID); nodeFieldID = nodeFieldIDs[0][0]; checkFieldSize = lookup_->getFieldSize(nodeFieldID); - assert( fieldSize == 1 ); + hypre_assert( fieldSize == 1 ); aleNodeNumbers = new int[numNodes]; eqnNumbers = new int[numNodes]; for ( i = 0; i < numNodes; i++ ) @@ -2865,8 +2865,8 @@ int HYPRE_LinSysCore::putNodalFieldData(int fieldID, int fieldSize, delete [] procNRows; delete [] eqnNumbers; delete [] aleNodeNumbers; - } - } + } + } if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 2 ) printf("%4d : HYPRE_LSC::leaving putNodalFieldData.\n",mypid_); return (0); @@ -2917,13 +2917,13 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, numLocalRows = localEndRow_ - localStartRow_ + 1; if ( matrixPartition_ == 1 && HYPreconID_ == HYMLI ) { - HYPRE_LSI_PartitionMatrix(numLocalRows,localStartRow_,rowLengths_, - colIndices_,colValues_, &numLabels, &labels); + HYPRE_LSI_PartitionMatrix(numLocalRows,localStartRow_,rowLengths_, + colIndices_,colValues_, &numLabels, &labels); HYPRE_LSI_MLILoadMaterialLabels(HYPrecon_, numLabels, labels); free( labels ); matrixPartition_ = 2; } - + //------------------------------------------------------------------- // examine each row individually //------------------------------------------------------------------- @@ -3004,7 +3004,7 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, } } //**/============================================================= - for( i = 0; i < leng; i++ ) + for( i = 0; i < leng; i++ ) { localEqnNum = globalEqn[i] + 1 - localStartRow_; if ( localEqnNum >= 0 && localEqnNum < numLocalRows ) @@ -3018,19 +3018,19 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, if (mRHSFlag_ == 1) { count = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { colIndex = colInd[j]; - if (colIndex >= localStartRow_ && colIndex <= localEndRow_) + if (colIndex >= localStartRow_ && colIndex <= localEndRow_) { - if ( (colIndex-1) != globalEqn[i]) + if ( (colIndex-1) != globalEqn[i]) { rowSize2 = rowLengths_[colIndex-localStartRow_]; colInd2 = colIndices_[colIndex-localStartRow_]; colVal2 = colValues_ [colIndex-localStartRow_]; - for( k = 0; k < rowSize2; k++ ) + for( k = 0; k < rowSize2; k++ ) { - if (colInd2[k]-1 == globalEqn[i]) + if (colInd2[k]-1 == globalEqn[i]) count++; break; } @@ -3046,19 +3046,19 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, mRHSRowVals_[mRHSNumGEqns_] = new double[count]; } count = 0; - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { colIndex = colInd[j]; - if (colIndex >= localStartRow_ && colIndex <= localEndRow_) + if (colIndex >= localStartRow_ && colIndex <= localEndRow_) { - if ( (colIndex-1) != globalEqn[i]) + if ( (colIndex-1) != globalEqn[i]) { rowSize2 = rowLengths_[colIndex-localStartRow_]; colInd2 = colIndices_[colIndex-localStartRow_]; colVal2 = colValues_ [colIndex-localStartRow_]; - for( k = 0; k < rowSize2; k++ ) + for( k = 0; k < rowSize2; k++ ) { - if ( colInd2[k]-1 == globalEqn[i] ) + if ( colInd2[k]-1 == globalEqn[i] ) { mRHSRowVals_[mRHSNumGEqns_][count] = colVal2[k]; mRHSRowInds_[mRHSNumGEqns_][count] = colIndex; @@ -3073,22 +3073,22 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, } //=================================================== - for ( j = 0; j < rowSize; j++ ) + for ( j = 0; j < rowSize; j++ ) { colIndex = colInd[j]; if ( colIndex-1 == globalEqn[i] ) colVal[j] = 1.0; else colVal[j] = 0.0; - if ( colIndex >= localStartRow_ && colIndex <= localEndRow_) + if ( colIndex >= localStartRow_ && colIndex <= localEndRow_) { - if ( (colIndex-1) != globalEqn[i]) + if ( (colIndex-1) != globalEqn[i]) { rowSize2 = rowLengths_[colIndex-localStartRow_]; colInd2 = colIndices_[colIndex-localStartRow_]; colVal2 = colValues_ [colIndex-localStartRow_]; - for( k = 0; k < rowSize2; k++ ) + for( k = 0; k < rowSize2; k++ ) { - if ( colInd2[k]-1 == globalEqn[i] ) + if ( colInd2[k]-1 == globalEqn[i] ) { rhs_term = gamma1[i] / alpha[i] * colVal2[k]; eqnNum = colIndex - 1; @@ -3126,7 +3126,7 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, #endif //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 5 ) @@ -3145,7 +3145,7 @@ int HYPRE_LinSysCore::enforceEssentialBC(int* globalEqn, double* alpha, int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, int** colIndices, int* colIndLen, - double** coefs) + double** coefs) { int i, j, k, numLocalRows, localEqnNum, rowLen, *colInd, eqnNum; int *iarray, **i2array, count; @@ -3195,11 +3195,11 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, rowLen = mRHSNEntries_[k]; colInd = mRHSRowInds_[k]; colVal = mRHSRowVals_[k]; - for ( j = 0; j < colIndLen[i]; j++) + for ( j = 0; j < colIndLen[i]; j++) { - for ( k = 0; k < rowLen; k++ ) + for ( k = 0; k < rowLen; k++ ) { - if (colInd[k]-1 == colIndices[i][j]) + if (colInd[k]-1 == colIndices[i][j]) { rhs_term = colVal[k] * coefs[i][j]; HYPRE_IJVectorGetValues(HYb_,1,&eqnNum,&bval); @@ -3254,7 +3254,7 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, } //**/============================================================= - for( i = 0; i < numEqns; i++ ) + for( i = 0; i < numEqns; i++ ) { localEqnNum = globalEqns[i] + 1 - localStartRow_; if ( localEqnNum < 0 || localEqnNum >= numLocalRows ) @@ -3272,9 +3272,9 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, if (mRHSFlag_ == 1) { count = 0; - for ( j = 0; j < colIndLen[i]; j++) + for ( j = 0; j < colIndLen[i]; j++) { - for ( k = 0; k < rowLen; k++ ) + for ( k = 0; k < rowLen; k++ ) if (colInd[k]-1 == colIndices[i][j]) count++; } if (count > 0) @@ -3286,11 +3286,11 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, mRHSRowVals_[mRHSNumGEqns_] = new double[count]; } count = 0; - for ( j = 0; j < colIndLen[i]; j++) + for ( j = 0; j < colIndLen[i]; j++) { - for ( k = 0; k < rowLen; k++ ) + for ( k = 0; k < rowLen; k++ ) { - if (colInd[k]-1 == colIndices[i][j]) + if (colInd[k]-1 == colIndices[i][j]) { mRHSRowVals_[k][count] = colVal[k]; mRHSRowInds_[k][count] = colInd[k]; @@ -3300,11 +3300,11 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, } //=================================================== - for ( j = 0; j < colIndLen[i]; j++) + for ( j = 0; j < colIndLen[i]; j++) { - for ( k = 0; k < rowLen; k++ ) + for ( k = 0; k < rowLen; k++ ) { - if (colInd[k]-1 == colIndices[i][j]) + if (colInd[k]-1 == colIndices[i][j]) { rhs_term = colVal[k] * coefs[i][j]; HYPRE_IJVectorGetValues(HYb_,1,&eqnNum,&bval); @@ -3316,10 +3316,10 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, } } } - } + } //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 5 ) @@ -3336,7 +3336,7 @@ int HYPRE_LinSysCore::enforceRemoteEssBCs(int numEqns, int* globalEqns, //b[globalEqn] += gamma/beta; //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, +int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, double* beta, double* gamma1, int leng) { int i, j, numLocalRows, localEqnNum, *colInd, rowSize, eqnNum; @@ -3366,7 +3366,7 @@ int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, //**/ The following is for multiple right hand side (Mar 2009) if (mRHSFlag_ == 1 && currentRHS_ != 0) { - for( i = 0; i < leng; i++ ) + for( i = 0; i < leng; i++ ) { localEqnNum = globalEqn[i] + 1 - localStartRow_; if ( localEqnNum < 0 || localEqnNum >= numLocalRows ) @@ -3384,7 +3384,7 @@ int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, //============================================================ else { - for( i = 0; i < leng; i++ ) + for( i = 0; i < leng; i++ ) { localEqnNum = globalEqn[i] + 1 - localStartRow_; if ( localEqnNum < 0 || localEqnNum >= numLocalRows ) @@ -3396,9 +3396,9 @@ int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, colVal = colValues_[localEqnNum]; colInd = colIndices_[localEqnNum]; - for ( j = 0; j < rowSize; j++) + for ( j = 0; j < rowSize; j++) { - if ((colInd[j]-1) == globalEqn[i]) + if ((colInd[j]-1) == globalEqn[i]) { colVal[j] += alpha[i]/beta[i]; break; @@ -3431,7 +3431,7 @@ int HYPRE_LinSysCore::enforceOtherBC(int* globalEqn, double* alpha, //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::getMatrixPtr(Data& data) +int HYPRE_LinSysCore::getMatrixPtr(Data& data) { (void) data; printf("%4d : HYPRE_LSC::getMatrixPtr ERROR - not implemented.\n",mypid_); @@ -3446,7 +3446,7 @@ int HYPRE_LinSysCore::getMatrixPtr(Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::copyInMatrix(double scalar, const Data& data) +int HYPRE_LinSysCore::copyInMatrix(double scalar, const Data& data) { int i; char *name; @@ -3470,16 +3470,16 @@ int HYPRE_LinSysCore::copyInMatrix(double scalar, const Data& data) else if (!strcmp(name, "AMSData")) { auxAMSData = (HYPRE_FEI_AMSData *) data.getDataPtr(); - if (AMSData_.NodeNumbers_ != NULL) delete [] AMSData_.NodeNumbers_; - if (AMSData_.NodalCoord_ != NULL) delete [] AMSData_.NodalCoord_; + if (AMSData_.NodeNumbers_ != NULL) hypre_TFree(AMSData_.NodeNumbers_, HYPRE_MEMORY_HOST); + if (AMSData_.NodalCoord_ != NULL) hypre_TFree(AMSData_.NodalCoord_, HYPRE_MEMORY_HOST); AMSData_.NodeNumbers_ = NULL; AMSData_.NodalCoord_ = NULL; AMSData_.numNodes_ = auxAMSData->numNodes_; AMSData_.numLocalNodes_ = auxAMSData->numLocalNodes_; if (AMSData_.numNodes_ > 0) { - AMSData_.NodeNumbers_ = new int[AMSData_.numNodes_]; - AMSData_.NodalCoord_ = new double[AMSData_.numNodes_*mlNumPDEs_]; + AMSData_.NodeNumbers_ = hypre_CTAlloc(HYPRE_BigInt, AMSData_.numNodes_, HYPRE_MEMORY_HOST); + AMSData_.NodalCoord_ = hypre_CTAlloc(HYPRE_Real, AMSData_.numNodes_*mlNumPDEs_, HYPRE_MEMORY_HOST); for (i = 0; i < AMSData_.numNodes_; i++) AMSData_.NodeNumbers_[i] = auxAMSData->NodeNumbers_[i]; for (i = 0; i < AMSData_.numNodes_*mlNumPDEs_; i++) @@ -3500,7 +3500,7 @@ int HYPRE_LinSysCore::copyInMatrix(double scalar, const Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::copyOutMatrix(double scalar, Data& data) +int HYPRE_LinSysCore::copyOutMatrix(double scalar, Data& data) { char *name; @@ -3530,7 +3530,7 @@ int HYPRE_LinSysCore::copyOutMatrix(double scalar, Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::sumInMatrix(double scalar, const Data& data) +int HYPRE_LinSysCore::sumInMatrix(double scalar, const Data& data) { (void) scalar; (void) data; @@ -3545,7 +3545,7 @@ int HYPRE_LinSysCore::sumInMatrix(double scalar, const Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::getRHSVectorPtr(Data& data) +int HYPRE_LinSysCore::getRHSVectorPtr(Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3576,7 +3576,7 @@ int HYPRE_LinSysCore::getRHSVectorPtr(Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::copyInRHSVector(double scalar, const Data& data) +int HYPRE_LinSysCore::copyInRHSVector(double scalar, const Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3603,9 +3603,9 @@ int HYPRE_LinSysCore::copyInRHSVector(double scalar, const Data& data) HYPRE_IJVectorGetObject(HYb_, (void **) &destVec); else HYPRE_IJVectorGetObject(HYx_, (void **) &destVec); - + HYPRE_ParVectorCopy(srcVec, destVec); - + if ( scalar != 1.0 ) HYPRE_ParVectorScale( scalar, destVec); // do not destroy the incoming vector //HYPRE_IJVectorDestroy(inVec); @@ -3625,7 +3625,7 @@ int HYPRE_LinSysCore::copyInRHSVector(double scalar, const Data& data) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::copyOutRHSVector(double scalar, Data& data) +int HYPRE_LinSysCore::copyOutRHSVector(double scalar, Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3639,7 +3639,7 @@ int HYPRE_LinSysCore::copyOutRHSVector(double scalar, Data& data) //------------------------------------------------------------------- HYPRE_IJVector newVector; - HYPRE_IJVectorCreate(comm_, localStartRow_-1, localEndRow_-1, + HYPRE_IJVectorCreate(comm_, localStartRow_-1, localEndRow_-1, &newVector); HYPRE_IJVectorSetObjectType(newVector, HYPRE_PARCSR); HYPRE_IJVectorInitialize(newVector); @@ -3664,14 +3664,14 @@ int HYPRE_LinSysCore::copyOutRHSVector(double scalar, Data& data) printf("%4d : HYPRE_LSC::leaving copyOutRHSVector.\n",mypid_); return (0); } -#endif +#endif //*************************************************************************** // add the incoming ParCSR vector to the current right hand side (scaled) //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::sumInRHSVector(double scalar, const Data& data) +int HYPRE_LinSysCore::sumInRHSVector(double scalar, const Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3704,14 +3704,14 @@ int HYPRE_LinSysCore::sumInRHSVector(double scalar, const Data& data) printf("%4d : HYPRE_LSC::leaving sumInRHSVector.\n",mypid_); return (0); } -#endif +#endif //*************************************************************************** // deallocate an incoming IJ matrix //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::destroyMatrixData(Data& data) +int HYPRE_LinSysCore::destroyMatrixData(Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3740,14 +3740,14 @@ int HYPRE_LinSysCore::destroyMatrixData(Data& data) printf("%4d : HYPRE_LSC::leaving destroyMatrixData.\n",mypid_); return (0); } -#endif +#endif //*************************************************************************** // deallocate an incoming IJ vector //--------------------------------------------------------------------------- #ifndef NOFEI -int HYPRE_LinSysCore::destroyVectorData(Data& data) +int HYPRE_LinSysCore::destroyVectorData(Data& data) { //------------------------------------------------------------------- // diagnostic message @@ -3776,13 +3776,13 @@ int HYPRE_LinSysCore::destroyVectorData(Data& data) printf("%4d : HYPRE_LSC::leaving destroyVectorData.\n",mypid_); return (0); } -#endif +#endif //*************************************************************************** // set number of right hand side vectors //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) +int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) { //------------------------------------------------------------------- // diagnostic message @@ -3792,7 +3792,7 @@ int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) { printf("%4d : HYPRE_LSC::entering setNumRHSVectors.\n",mypid_); printf("%4d : HYPRE_LSC::incoming numRHSs = %d\n",mypid_,numRHSs); - for ( int i = 0; i < numRHSs_; i++ ) + for ( int i = 0; i < numRHSs_; i++ ) printf("%4d : HYPRE_LSC::incoming RHSIDs = %d\n",mypid_,rhsIDs[i]); } if (numRHSs < 0) @@ -3807,9 +3807,9 @@ int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) if ( matrixVectorsCreated_ ) { - if ( HYbs_ != NULL ) + if ( HYbs_ != NULL ) { - for ( int i = 0; i < numRHSs_; i++ ) + for ( int i = 0; i < numRHSs_; i++ ) if ( HYbs_[i] != NULL ) HYPRE_IJVectorDestroy(HYbs_[i]); delete [] HYbs_; HYbs_ = NULL; @@ -3842,7 +3842,7 @@ int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) delete [] rhsIDs_; numRHSs_ = numRHSs; rhsIDs_ = new int[numRHSs_]; - + for ( int i = 0; i < numRHSs; i++ ) rhsIDs_[i] = rhsIDs[i]; //------------------------------------------------------------------- @@ -3858,7 +3858,7 @@ int HYPRE_LinSysCore::setNumRHSVectors(int numRHSs, const int* rhsIDs) // select a right hand side vector //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::setRHSID(int rhsID) +int HYPRE_LinSysCore::setRHSID(int rhsID) { //------------------------------------------------------------------- // diagnostic message @@ -3891,7 +3891,7 @@ int HYPRE_LinSysCore::setRHSID(int rhsID) //--------------------------------------------------------------------------- int HYPRE_LinSysCore::putInitialGuess(const int* eqnNumbers, - const double* values, int leng) + const double* values, int leng) { int i, *localInds, *iarray, *iarray2; @@ -3915,7 +3915,7 @@ int HYPRE_LinSysCore::putInitialGuess(const int* eqnNumbers, mapFromSolnLengMax_ = mapFromSolnLengMax_ + 2 * leng; mapFromSolnList_ = new int[mapFromSolnLengMax_]; mapFromSolnList2_ = new int[mapFromSolnLengMax_]; - for ( i = 0; i < mapFromSolnLeng_; i++ ) + for ( i = 0; i < mapFromSolnLeng_; i++ ) { mapFromSolnList_[i] = iarray[i]; mapFromSolnList2_[i] = iarray2[i]; @@ -3928,7 +3928,7 @@ int HYPRE_LinSysCore::putInitialGuess(const int* eqnNumbers, localInds = new int[leng]; for ( i = 0; i < leng; i++ ) // change to 0-based { - if ((eqnNumbers[i]+1) >= localStartRow_ && + if ((eqnNumbers[i]+1) >= localStartRow_ && (eqnNumbers[i]+1) <= localEndRow_) localInds[i] = eqnNumbers[i]; else { @@ -3966,7 +3966,7 @@ int HYPRE_LinSysCore::putInitialGuess(const int* eqnNumbers, // used for getting the solution out of the solver, and into the application //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::getSolution(double* answers,int leng) +int HYPRE_LinSysCore::getSolution(double* answers,int leng) { int i, *equations; @@ -4010,7 +4010,7 @@ int HYPRE_LinSysCore::getSolution(double* answers,int leng) // used for getting the solution out of the solver, and into the application //--------------------------------------------------------------------------- -int HYPRE_LinSysCore::getSolnEntry(int eqnNumber, double& answer) +int HYPRE_LinSysCore::getSolnEntry(int eqnNumber, double& answer) { double val; int equation; @@ -4028,10 +4028,10 @@ int HYPRE_LinSysCore::getSolnEntry(int eqnNumber, double& answer) equation = eqnNumber; // incoming 0-based index - if (localStartCol_ == -1 && equation < localStartRow_-1 && + if (localStartCol_ == -1 && equation < localStartRow_-1 && equation > localEndRow_ ) { - printf("%d : getSolnEntry ERROR - index out of range = %d.\n", mypid_, + printf("%d : getSolnEntry ERROR - index out of range = %d.\n", mypid_, eqnNumber); exit(1); } @@ -4052,7 +4052,7 @@ int HYPRE_LinSysCore::getSolnEntry(int eqnNumber, double& answer) // select which Krylov solver to use //--------------------------------------------------------------------------- -void HYPRE_LinSysCore::selectSolver(char* name) +void HYPRE_LinSysCore::selectSolver(char* name) { //------------------------------------------------------------------- // diagnostic message @@ -4089,72 +4089,72 @@ void HYPRE_LinSysCore::selectSolver(char* name) if ( !strcmp(name, "cg" ) ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYPCG; } else if ( !strcmp(name, "lsicg" ) ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYLSICG; } else if ( !strcmp(name, "hybrid") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYHYBRID; } else if ( !strcmp(name, "gmres") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYGMRES; } else if ( !strcmp(name, "fgmres") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYFGMRES; } else if ( !strcmp(name, "bicgstab") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYCGSTAB; } else if ( !strcmp(name, "bicgstabl") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYCGSTABL; } else if ( !strcmp(name, "tfqmr") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYTFQMR; } else if ( !strcmp(name, "bicgs") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYBICGS; } else if ( !strcmp(name, "symqmr") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYSYMQMR; } else if ( !strcmp(name, "boomeramg") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYAMG; } else if ( !strcmp(name, "superlu") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYSUPERLU; } else if ( !strcmp(name, "superlux") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYSUPERLUX; } else if ( !strcmp(name, "dsuperlu") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); #ifdef HYPRE_USING_DSUPERLU HYSolverID_ = HYDSUPERLU; #else @@ -4165,12 +4165,12 @@ void HYPRE_LinSysCore::selectSolver(char* name) } else if ( !strcmp(name, "y12m") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYY12M; } else if ( !strcmp(name, "amge") ) { - strcpy( HYSolverName_, name ); + hypre_strcpy( HYSolverName_, name ); HYSolverID_ = HYAMGE; } else @@ -4292,7 +4292,7 @@ void HYPRE_LinSysCore::selectPreconditioner(char *name) else if (HYPreconID_ == HYMLI) HYPRE_LSI_MLIDestroy(HYPrecon_); #endif - else if (HYPreconID_ == HYUZAWA) + else if (HYPreconID_ == HYUZAWA) HYPRE_LSI_UzawaDestroy(HYPrecon_); #ifdef HAVE_SYSPDE else if (HYPreconID_ == HYSYSPDE) @@ -4310,63 +4310,63 @@ void HYPRE_LinSysCore::selectPreconditioner(char *name) if (!strcmp(name, "identity")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYIDENTITY; } else if (!strcmp(name, "diagonal")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYDIAGONAL; } else if (!strcmp(name, "pilut")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYPILUT; } else if (!strcmp(name, "parasails")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYPARASAILS; } else if (!strcmp(name, "boomeramg")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYBOOMERAMG; } else if (!strcmp(name, "ddilut")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYDDILUT; } else if (!strcmp(name, "schwarz")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYSCHWARZ; } else if (!strcmp(name, "ddict")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYDDICT; } else if (!strcmp(name, "poly")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYPOLY; } else if (!strcmp(name, "euclid")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYEUCLID; } else if (!strcmp(name, "blockP")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYBLOCK; } else if (!strcmp(name, "ml")) { #ifdef HAVE_ML - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYML; #else if ((HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3) @@ -4381,7 +4381,7 @@ void HYPRE_LinSysCore::selectPreconditioner(char *name) else if (!strcmp(name, "mlmaxwell")) { #ifdef HAVE_MLMAXWELL - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYMLMAXWELL; #else if ((HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3) @@ -4396,7 +4396,7 @@ void HYPRE_LinSysCore::selectPreconditioner(char *name) else if (!strcmp(name, "mli")) { #ifdef HAVE_MLI - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYMLI; #else if ((HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3) @@ -4410,25 +4410,25 @@ void HYPRE_LinSysCore::selectPreconditioner(char *name) } else if (!strcmp(name, "ams")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYAMS; } else if (!strcmp(name, "uzawa")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYUZAWA; } #ifdef HAVE_SYSPDE else if (!strcmp(name, "syspde")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYSYSPDE; } #endif #ifdef HYPRE_USING_DSUPERLU else if (!strcmp(name, "dsuperlu")) { - strcpy(HYPreconName_, name); + hypre_strcpy(HYPreconName_, name); HYPreconID_ = HYDSLU; } #endif @@ -4610,7 +4610,7 @@ int HYPRE_LinSysCore::formResidual(double* values, int leng) } //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -4629,13 +4629,13 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) int slideCheck[2]; #ifdef HAVE_MLI int *constrMap, *constrEqns, ncount, *iArray; - double *tempNodalCoord; + double *tempNodalCoord; #endif int *numSweeps, *relaxType, reduceAFlag; int *matSizes, *rowInd, retFlag, tempIter, nTrials; double rnorm=0.0, ddata, *colVal, *relaxWt, *diagVals; double stime, etime, ptime, rtime1, rtime2, newnorm; - double rnorm0, rnorm1, convRate, rateThresh; + double rnorm0, rnorm1, convRate, rateThresh; char fname[40], paramString[100]; FILE *fp; HYPRE_IJMatrix TempA, IJI; @@ -4653,7 +4653,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) HYPRE_ClearAllErrors(); //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -4673,13 +4673,13 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } else if ( schurReduction_ == 1 ) buildSchurReducedRHS(); - if ( schurReduction_ == 0 && slideReduction_ != 0 ) + if ( schurReduction_ == 0 && slideReduction_ != 0 ) { if ( constrList_ != NULL ) delete [] constrList_; constrList_ = NULL; if ( slideReduction_ == 1 ) buildSlideReducedSystem(); else if ( slideReduction_ == 2 ) buildSlideReducedSystem2(); - else if ( slideReduction_ == 3 || slideReduction_ == 4 ) + else if ( slideReduction_ == 3 || slideReduction_ == 4 ) { if (slideObj == NULL) { @@ -4757,7 +4757,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) MPI_Barrier(comm_); rtime2 = LSC_Wtime(); - + //------------------------------------------------------------------- // if normal equation requested //------------------------------------------------------------------- @@ -4776,7 +4776,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) rowInd = new int[localNRows]; colInd = new int[localNRows]; colVal = new double[localNRows]; - for ( i = 0; i < localNRows; i++ ) + for ( i = 0; i < localNRows; i++ ) { matSizes[i] = 1; rowInd[i] = localStartRow_ - 1 + i; @@ -4794,7 +4794,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) HYPRE_IJMatrixAssemble(IJI); HYPRE_IJMatrixGetObject(IJI, (void **) &I_csr); hypre_BoomerAMGBuildCoarseOperator((hypre_ParCSRMatrix*) A_csr, - (hypre_ParCSRMatrix*) I_csr, (hypre_ParCSRMatrix*) A_csr, + (hypre_ParCSRMatrix*) I_csr, (hypre_ParCSRMatrix*) A_csr, (hypre_ParCSRMatrix**) &normalA_csr); HYPRE_IJMatrixDestroy( IJI ); HYPRE_IJMatrixCreate(comm_, localStartRow_-1, @@ -4851,7 +4851,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) printf(" Did you forget to call matrixLoadComplete?\n"); exit(1); } - if ( (normalEqnFlag_ & 7) == 7 ) + if ( (normalEqnFlag_ & 7) == 7 ) { HYPRE_IJMatrixGetObject(HYnormalA_, (void **) &A_csr); HYPRE_IJVectorGetObject(HYnormalB_, (void **) &b_csr); @@ -4917,7 +4917,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) fp = fopen("rbm","w"); for (i = 0; i < MLI_NumNodes_; i++) for (j = 0; j < MLI_FieldSize_; j++) - fprintf(fp,"%8d %25.16e\n", MLI_EqnNumbers_[i]+j+1, + fprintf(fp,"%8d %25.16e\n", MLI_EqnNumbers_[i]+j+1, MLI_NodalCoord_[i*3+j]); fclose(fp); } @@ -4943,12 +4943,12 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) if ( projectionScheme_ == 1 ) { computeAConjProjection(A_csr, x_csr, b_csr); - } + } else if ( projectionScheme_ == 2 ) { computeMinResProjection(A_csr, x_csr, b_csr); - } - + } + #ifdef HAVE_MLI if ( HYPreconID_ == HYMLI && feData_ != NULL ) { @@ -4960,37 +4960,37 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) iArray = new int[MLI_NumNodes_]; for (i = 0; i < MLI_NumNodes_; i++) iArray[i] = i; HYPRE_LSI_qsort1a(MLI_EqnNumbers_, iArray, 0, MLI_NumNodes_-1); - tempNodalCoord = MLI_NodalCoord_; + tempNodalCoord = MLI_NodalCoord_; ncount = 1; - for (i = 1; i < MLI_NumNodes_; i++) + for (i = 1; i < MLI_NumNodes_; i++) if (MLI_EqnNumbers_[i] != MLI_EqnNumbers_[ncount-1]) ncount++; MLI_NodalCoord_ = new double[ncount*MLI_FieldSize_]; - for (j = 0; j < MLI_FieldSize_; j++) + for (j = 0; j < MLI_FieldSize_; j++) MLI_NodalCoord_[j] = tempNodalCoord[iArray[0]*MLI_FieldSize_+j]; ncount = 1; - for (i = 1; i < MLI_NumNodes_; i++) + for (i = 1; i < MLI_NumNodes_; i++) { - if (MLI_EqnNumbers_[i] != MLI_EqnNumbers_[ncount-1]) + if (MLI_EqnNumbers_[i] != MLI_EqnNumbers_[ncount-1]) { MLI_EqnNumbers_[ncount] = MLI_EqnNumbers_[i]; - for (j = 0; j < MLI_FieldSize_; j++) + for (j = 0; j < MLI_FieldSize_; j++) MLI_NodalCoord_[ncount*MLI_FieldSize_+j] = tempNodalCoord[iArray[i]*MLI_FieldSize_+j]; ncount++; } } MLI_NumNodes_ = ncount; - //assert((MLI_NumNodes_*MLI_FieldSize_)==(localEndRow_-localStartRow_+1)); + //hypre_assert((MLI_NumNodes_*MLI_FieldSize_)==(localEndRow_-localStartRow_+1)); delete [] tempNodalCoord; delete [] iArray; - for (i = 0; i < MLI_NumNodes_; i++) + for (i = 0; i < MLI_NumNodes_; i++) { - if (MLI_NodalCoord_[i] == -99999.0) + if (MLI_NodalCoord_[i] == -99999.0) printf("%d : HYPRE launchSolver ERROR - coord %d not filled.\n", mypid_, i); } - HYPRE_LSI_MLILoadNodalCoordinates(HYPrecon_, MLI_NumNodes_, - MLI_FieldSize_, MLI_EqnNumbers_, MLI_FieldSize_, + HYPRE_LSI_MLILoadNodalCoordinates(HYPrecon_, MLI_NumNodes_, + MLI_FieldSize_, MLI_EqnNumbers_, MLI_FieldSize_, MLI_NodalCoord_, localEndRow_-localStartRow_+1); } #endif @@ -5005,7 +5005,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) switch ( HYSolverID_ ) { //---------------------------------------------------------------- - // choose PCG + // choose PCG //---------------------------------------------------------------- case HYPCG : @@ -5091,7 +5091,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) if ( convRate > rateThresh ) { if ( MLI_Hybrid_NSIncr_ > 1 ) - sprintf(paramString, "MLI incrNullSpaceDim %d", + sprintf(paramString, "MLI incrNullSpaceDim %d", MLI_Hybrid_NSIncr_); else sprintf(paramString, "MLI incrNullSpaceDim 2"); @@ -5132,7 +5132,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose LSICG + // choose LSICG //---------------------------------------------------------------- case HYLSICG : @@ -5185,7 +5185,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose hybrid method : CG with diagonal/BoomerAMG preconditioner + // choose hybrid method : CG with diagonal/BoomerAMG preconditioner //---------------------------------------------------------------- case HYHYBRID : @@ -5248,7 +5248,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose GMRES + // choose GMRES //---------------------------------------------------------------- case HYGMRES : @@ -5334,7 +5334,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) if ( convRate > rateThresh ) { if ( MLI_Hybrid_NSIncr_ > 1 ) - sprintf(paramString, "MLI incrNullSpaceDim %d", + sprintf(paramString, "MLI incrNullSpaceDim %d", MLI_Hybrid_NSIncr_); else sprintf(paramString, "MLI incrNullSpaceDim 2"); @@ -5376,7 +5376,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose flexible GMRES + // choose flexible GMRES //---------------------------------------------------------------- case HYFGMRES : @@ -5410,7 +5410,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } if ( fgmresUpdateTol_ && HYPreconID_ == HYBLOCK ) - HYPRE_ParCSRFGMRESUpdatePrecondTolerance(HYSolver_, + HYPRE_ParCSRFGMRESUpdatePrecondTolerance(HYSolver_, HYPRE_LSI_BlockPrecondSetA11Tolerance); MPI_Barrier( comm_ ); @@ -5435,7 +5435,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose BiCGSTAB + // choose BiCGSTAB //---------------------------------------------------------------- case HYCGSTAB : @@ -5488,7 +5488,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose BiCGSTABL + // choose BiCGSTABL //---------------------------------------------------------------- case HYCGSTABL : @@ -5539,7 +5539,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose TFQMR + // choose TFQMR //---------------------------------------------------------------- case HYTFQMR : @@ -5590,7 +5590,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose BiCGS + // choose BiCGS //---------------------------------------------------------------- case HYBICGS : @@ -5641,7 +5641,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose Symmetric QMR + // choose Symmetric QMR //---------------------------------------------------------------- case HYSYMQMR : @@ -5692,7 +5692,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose Boomeramg + // choose Boomeramg //---------------------------------------------------------------- case HYAMG : @@ -5708,7 +5708,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose SuperLU (single processor) + // choose SuperLU (single processor) //---------------------------------------------------------------- case HYSUPERLU : @@ -5720,15 +5720,15 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } rnorm = solveUsingSuperLU(status); #ifndef NOFEI - if ( status == 1 ) status = 0; -#endif + if ( status == 1 ) status = 0; +#endif numIterations = 1; ptime = stime; //printf("SuperLU solver - return status = %d\n",status); break; //---------------------------------------------------------------- - // choose SuperLU (single processor) + // choose SuperLU (single processor) //---------------------------------------------------------------- case HYSUPERLUX : @@ -5740,14 +5740,14 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } rnorm = solveUsingSuperLUX(status); #ifndef NOFEI - if ( status == 1 ) status = 0; -#endif + if ( status == 1 ) status = 0; +#endif numIterations = 1; //printf("SuperLUX solver - return status = %d\n",status); break; //---------------------------------------------------------------- - // choose distributed SuperLU + // choose distributed SuperLU //---------------------------------------------------------------- case HYDSUPERLU : @@ -5760,8 +5760,8 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } rnorm = solveUsingDSuperLU(status); #ifndef NOFEI - if ( status == 1 ) status = 0; -#endif + if ( status == 1 ) status = 0; +#endif numIterations = 1; #else printf("distributed SuperLU not available.\n"); @@ -5770,7 +5770,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) break; //---------------------------------------------------------------- - // choose Y12M (single processor) + // choose Y12M (single processor) //---------------------------------------------------------------- case HYY12M : @@ -5783,8 +5783,8 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } solveUsingY12M(status); #ifndef NOFEI - if ( status == 1 ) status = 0; -#endif + if ( status == 1 ) status = 0; +#endif numIterations = 1; ptime = stime; //printf("Y12M solver - return status = %d\n",status); @@ -5792,11 +5792,11 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) #else printf("HYPRE_LSC : Y12M not available. \n"); exit(1); - break; + break; #endif //---------------------------------------------------------------- - // choose AMGE (single processor) + // choose AMGE (single processor) //---------------------------------------------------------------- case HYAMGE : @@ -5914,7 +5914,7 @@ int HYPRE_LinSysCore::launchSolver(int& solveStatus, int &iterations) } //------------------------------------------------------------------- - // diagnostic message + // diagnostic message //------------------------------------------------------------------- if ( (HYOutputLevel_ & HYFEI_SPECIALMASK) >= 3 ) @@ -5983,7 +5983,7 @@ void *HYPRE_LinSysCore::HYPRE_LSC_GetSolVector() } //*************************************************************************** -// this function fetches the matrix +// this function fetches the matrix //--------------------------------------------------------------------------- void *HYPRE_LinSysCore::HYPRE_LSC_GetMatrix() diff --git a/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.h b/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.h index 494de7759..c8fa799c2 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.h +++ b/src/FEI_mv/fei-hypre/HYPRE_LinSysCore.h @@ -21,7 +21,6 @@ #include #include #include -#include #include #ifdef NOFEI @@ -69,12 +68,12 @@ enum HYpreconID {HYIDENTITY,HYDIAGONAL,HYPILUT,HYPARASAILS,HYBOOMERAMG,HYML, typedef struct { - int *EdgeNodeList_; - int *NodeNumbers_; - int numEdges_; - int numLocalNodes_; - int numNodes_; - double *NodalCoord_; + HYPRE_BigInt *EdgeNodeList_; + HYPRE_BigInt *NodeNumbers_; + HYPRE_Int numEdges_; + HYPRE_Int numLocalNodes_; + HYPRE_Int numNodes_; + HYPRE_Real *NodalCoord_; } HYPRE_FEI_AMSData; // ************************************************************************* diff --git a/src/FEI_mv/fei-hypre/HYPRE_SlideReduction.cxx b/src/FEI_mv/fei-hypre/HYPRE_SlideReduction.cxx index 1611f1674..acf8258a2 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_SlideReduction.cxx +++ b/src/FEI_mv/fei-hypre/HYPRE_SlideReduction.cxx @@ -15,7 +15,6 @@ #include #include #include -#include #define HYPRE_SLIDEMAX 100 #define HYPRE_BITMASK2 3 @@ -38,7 +37,7 @@ #define habs(x) (((x) > 0.0) ? x : -(x)) -extern "C" +extern "C" { // int hypre_BoomerAMGBuildCoarseOperator(hypre_ParCSRMatrix*, // hypre_ParCSRMatrix*, hypre_ParCSRMatrix*, hypre_ParCSRMatrix**); @@ -159,7 +158,7 @@ int HYPRE_SlideReduction::setBlockMinNorm(double norm) } //*************************************************************************** -// get matrix number of rows +// get matrix number of rows //--------------------------------------------------------------------------- int HYPRE_SlideReduction::getMatrixNumRows() @@ -178,7 +177,7 @@ int HYPRE_SlideReduction::getMatrixNumRows() } //*************************************************************************** -// get matrix diagonal +// get matrix diagonal //--------------------------------------------------------------------------- double *HYPRE_SlideReduction::getMatrixDiagonal() @@ -190,7 +189,7 @@ double *HYPRE_SlideReduction::getMatrixDiagonal() // get reduced matrix //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getReducedMatrix(HYPRE_IJMatrix *mat) +int HYPRE_SlideReduction::getReducedMatrix(HYPRE_IJMatrix *mat) { (*mat) = reducedAmat_; return 0; @@ -200,7 +199,7 @@ int HYPRE_SlideReduction::getReducedMatrix(HYPRE_IJMatrix *mat) // get reduced rhs //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getReducedRHSVector(HYPRE_IJVector *rhs) +int HYPRE_SlideReduction::getReducedRHSVector(HYPRE_IJVector *rhs) { (*rhs) = reducedBvec_; return 0; @@ -210,7 +209,7 @@ int HYPRE_SlideReduction::getReducedRHSVector(HYPRE_IJVector *rhs) // get reduced solution vector //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getReducedSolnVector(HYPRE_IJVector *sol) +int HYPRE_SlideReduction::getReducedSolnVector(HYPRE_IJVector *sol) { (*sol) = reducedXvec_; return 0; @@ -220,7 +219,7 @@ int HYPRE_SlideReduction::getReducedSolnVector(HYPRE_IJVector *sol) // get auxiliary (temporary) vector //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getReducedAuxVector(HYPRE_IJVector *auxV ) +int HYPRE_SlideReduction::getReducedAuxVector(HYPRE_IJVector *auxV ) { (*auxV) = reducedRvec_; return 0; @@ -230,7 +229,7 @@ int HYPRE_SlideReduction::getReducedAuxVector(HYPRE_IJVector *auxV ) // get processor to constraint map //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getProcConstraintMap(int **map) +int HYPRE_SlideReduction::getProcConstraintMap(int **map) { (*map) = procNConstr_; return 0; @@ -240,7 +239,7 @@ int HYPRE_SlideReduction::getProcConstraintMap(int **map) // get slave equation list //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getSlaveEqnList(int **slist) +int HYPRE_SlideReduction::getSlaveEqnList(int **slist) { (*slist) = slaveEqnList_; return 0; @@ -251,7 +250,7 @@ int HYPRE_SlideReduction::getSlaveEqnList(int **slist) // (for oorrecting the null space) //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::getPerturbationMatrix(HYPRE_ParCSRMatrix *matrix) +int HYPRE_SlideReduction::getPerturbationMatrix(HYPRE_ParCSRMatrix *matrix) { (*matrix) = hypreRAP_; hypreRAP_ = NULL; @@ -272,7 +271,7 @@ int HYPRE_SlideReduction::getPerturbationMatrix(HYPRE_ParCSRMatrix *matrix) // i+1 has equation numbers higher than those of processor i //--------------------------------------------------------------------------- -int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, +int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, HYPRE_IJVector b) { int mypid, nprocs, ierr, maxBSize=HYPRE_SLIDEMAX, bSize=2; @@ -281,7 +280,7 @@ int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, HYPRE_ParVector b_csr; //------------------------------------------------------------------ - // initial set up + // initial set up //------------------------------------------------------------------ MPI_Comm_rank( mpiComm_, &mypid ); @@ -378,7 +377,7 @@ int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, //------------------------------------------------------------------ if (reduceAFlag == 1) - { + { if ( useSimpleScheme_ == 0 ) { ierr = findSlaveEqns1(); @@ -392,7 +391,7 @@ int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, composeGlobalList(); } } - + //------------------------------------------------------------------ // build the reduced matrix //------------------------------------------------------------------ @@ -424,7 +423,7 @@ int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, exit(1); } } - + //------------------------------------------------------------------ // clean up and return //------------------------------------------------------------------ @@ -435,14 +434,14 @@ int HYPRE_SlideReduction::setup(HYPRE_IJMatrix A, HYPRE_IJVector x, } //*************************************************************************** -// search for local constraints (end of the matrix block) +// search for local constraints (end of the matrix block) //--------------------------------------------------------------------------- int HYPRE_SlideReduction::findConstraints() { int mypid, nprocs, *procNRows, startRow, endRow; int nConstraints, irow, ncnt, isAConstr, jcol, rowSize, *colInd; - int *iTempList, ip, globalNConstr; + int *iTempList, ip, globalNConstr; double *colVal; HYPRE_ParCSRMatrix A_csr; @@ -467,27 +466,27 @@ int HYPRE_SlideReduction::findConstraints() #ifdef PRINTC int localNRows = endRow - startRow + 1; char filename[100]; - FILE *fp; + FILE *fp; sprintf( filename, "Constr.%d", localNRows); fp = fopen( filename, "w" ); #endif nConstraints = 0; - for ( irow = endRow; irow >= startRow; irow-- ) + for ( irow = endRow; irow >= startRow; irow-- ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); isAConstr = 1; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { - if ( colInd[jcol] == irow && colVal[jcol] != 0.0 ) + if ( colInd[jcol] == irow && colVal[jcol] != 0.0 ) { - isAConstr = 0; + isAConstr = 0; break; } } #ifdef PRINTC - if ( isAConstr ) + if ( isAConstr ) { - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) fprintf(fp,"%8d %8d %e\n",nConstraints+1,colInd[jcol]+1, colVal[jcol]); } @@ -517,7 +516,7 @@ int HYPRE_SlideReduction::findConstraints() delete [] iTempList; globalNConstr = 0; ncnt = 0; - for ( ip = 0; ip < nprocs; ip++ ) + for ( ip = 0; ip < nprocs; ip++ ) { ncnt = procNConstr_[ip]; procNConstr_[ip] = globalNConstr; @@ -536,10 +535,10 @@ int HYPRE_SlideReduction::findConstraints() if ( nConstraints > 0 ) constrBlkSizes_ = new int[nConstraints]; else constrBlkSizes_ = NULL; for ( irow = 0; irow < nConstraints; irow++ ) constrBlkSizes_[irow] = 0; - if ( nConstraints > 0 ) + if ( nConstraints > 0 ) { eqnStatuses_ = new int[endRow-nConstraints-startRow+1]; - for (irow = 0; irow < endRow-nConstraints-startRow+1; irow++ ) + for (irow = 0; irow < endRow-nConstraints-startRow+1; irow++ ) eqnStatuses_[irow] = 0; } else eqnStatuses_ = NULL; @@ -554,7 +553,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() { int mypid, nprocs, *procNRows, startRow, endRow; int nConstraints, irow, jcol, rowSize, ncnt, *colInd, index; - int nCandidates, *candidateList; + int nCandidates, *candidateList; int *constrListAux, colIndex, searchIndex, procIndex, uBound; int nSum, newEndRow; double *colVal, searchValue; @@ -574,7 +573,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() newEndRow = endRow - nConstraints; //------------------------------------------------------------------ - // compose candidate slave list (slaves in candidateList, corresponding + // compose candidate slave list (slaves in candidateList, corresponding // constraint equation in constrListAux) //------------------------------------------------------------------ @@ -590,19 +589,19 @@ int HYPRE_SlideReduction::findSlaveEqns1() // candidates are those with 1 link to the constraint list //------------------------------------------------------------------ - for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) + for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); ncnt = 0; constrListAux[irow-startRow] = -1; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; for ( procIndex = 1; procIndex <= nprocs; procIndex++ ) if ( colIndex < procNRows[procIndex] ) break; - uBound = procNRows[procIndex] - (procNConstr_[procIndex] - - procNConstr_[procIndex-1]); - if ( colIndex >= uBound && procIndex == (mypid+1) ) + uBound = procNRows[procIndex] - (procNConstr_[procIndex] - + procNConstr_[procIndex-1]); + if ( colIndex >= uBound && procIndex == (mypid+1) ) { ncnt++; searchIndex = colIndex; @@ -611,12 +610,12 @@ int HYPRE_SlideReduction::findSlaveEqns1() if ( ncnt > 1 ) break; } HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); - if (ncnt == 1 && searchIndex > newEndRow && searchIndex <= endRow) + if (ncnt == 1 && searchIndex > newEndRow && searchIndex <= endRow) { constrListAux[nCandidates] = searchIndex; candidateList[nCandidates++] = irow; if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 3 ) - printf("%4d : findSlaveEqns1 - candidate %d = %d(%d)\n", + printf("%4d : findSlaveEqns1 - candidate %d = %d(%d)\n", mypid, nCandidates-1, irow, searchIndex); } } @@ -630,18 +629,18 @@ int HYPRE_SlideReduction::findSlaveEqns1() // (search for candidates column index with maximum magnitude) // ==> slaveEqnList_ //--------------------------------------------------------------------- - + searchIndex = 0; - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); searchIndex = -1; searchValue = 1.0E-6; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { - if (colVal[jcol] != 0.0 && colInd[jcol] >= startRow && - colInd[jcol] <= (endRow-nConstraints) && - eqnStatuses_[colInd[jcol]-startRow] == 0) + if (colVal[jcol] != 0.0 && colInd[jcol] >= startRow && + colInd[jcol] <= (endRow-nConstraints) && + eqnStatuses_[colInd[jcol]-startRow] == 0) { colIndex = hypre_BinarySearch(candidateList, colInd[jcol], nCandidates); @@ -652,7 +651,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() searchIndex = colInd[jcol]; } } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); if ( searchIndex >= 0 ) { @@ -660,12 +659,12 @@ int HYPRE_SlideReduction::findSlaveEqns1() slaveEqnList_[index] = searchIndex; constrBlkInfo_[index] = index; constrBlkSizes_[index] = 1; - eqnStatuses_[searchIndex-startRow] = 1; + eqnStatuses_[searchIndex-startRow] = 1; if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) printf("%4d : findSlaveEqns1 - constr %7d <=> slave %d\n", mypid, irow, searchIndex); - } - else + } + else { slaveEqnList_[irow-endRow+nConstraints-1] = -1; if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) @@ -675,7 +674,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() } } } - if ( nConstraints > 0 ) + if ( nConstraints > 0 ) { delete [] constrListAux; delete [] candidateList; @@ -690,7 +689,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() for ( irow = 0; irow < nConstraints; irow++ ) if ( slaveEqnList_[irow] == -1 ) ncnt++; MPI_Allreduce(&ncnt, &nSum, 1, MPI_INT, MPI_SUM, mpiComm_); - if ( nSum > 0 ) + if ( nSum > 0 ) { if ( mypid == 0 && ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { @@ -701,7 +700,7 @@ int HYPRE_SlideReduction::findSlaveEqns1() if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { for ( irow = 0; irow < nConstraints; irow++ ) - if ( slaveEqnList_[irow] == -1 ) + if ( slaveEqnList_[irow] == -1 ) { printf("%4d : findSlaveEqns1 - unsatisfied constraint",mypid); printf(" equation = %d\n", irow+endRow-nConstraints+1); @@ -751,7 +750,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) printf("%4d : findSlaveEqnsBlock - size = %d\n", mypid, blkSize); //------------------------------------------------------------------ - // compose candidate slave list (slaves in candidateList, corresponding + // compose candidate slave list (slaves in candidateList, corresponding // constraint equation in constrListAuxs) //------------------------------------------------------------------ @@ -760,7 +759,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) { candidateList = new int[localNRows-nConstraints]; constrListAuxs = new int*[localNRows-nConstraints]; - for ( ic = 0; ic < localNRows-nConstraints; ic++ ) + for ( ic = 0; ic < localNRows-nConstraints; ic++ ) { constrListAuxs[ic] = new int[blkSize]; for (jcol = 0; jcol < blkSize; jcol++) constrListAuxs[ic][jcol] = -1; @@ -770,25 +769,25 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) // candidates are those with links to the constraint list //--------------------------------------------------------------- - for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) + for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) { if ( eqnStatuses_[irow-startRow] == 1 ) continue; HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); ncnt = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - for ( ip = 0; ip < nprocs; ip++ ) + for ( ip = 0; ip < nprocs; ip++ ) { uBound = procNRows[ip+1]; lBound = uBound - (procNConstr_[ip+1] - procNConstr_[ip]); - if ( colIndex >= lBound && colIndex < uBound && ip == mypid ) + if ( colIndex >= lBound && colIndex < uBound && ip == mypid ) { ncnt++; if ( ncnt <= blkSize ) constrListAuxs[nCandidates][ncnt-1] = colIndex; } - else if (colIndex >= lBound && colIndex < uBound && ip != mypid) + else if (colIndex >= lBound && colIndex < uBound && ip != mypid) { ncnt = blkSize + 1; break; @@ -797,13 +796,13 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) if ( ncnt > blkSize ) break; } HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); - if ( ncnt >= 1 && ncnt <= blkSize ) + if ( ncnt >= 1 && ncnt <= blkSize ) { isACandidate = 1; - for ( ic = 0; ic < ncnt; ic++ ) + for ( ic = 0; ic < ncnt; ic++ ) { if ( constrListAuxs[nCandidates][ic] <= newEndRow || - constrListAuxs[nCandidates][ic] > endRow ) + constrListAuxs[nCandidates][ic] > endRow ) { isACandidate = 0; break; @@ -813,7 +812,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) { candidateList[nCandidates++] = irow; if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 3 ) - printf("%4d : findSlaveEqnsBlock - candidate %d = %d\n", + printf("%4d : findSlaveEqnsBlock - candidate %d = %d\n", mypid, nCandidates-1, irow); } } @@ -830,7 +829,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) int *tempSlaveList, *tempSlaveListAux; if ( nConstraints > 0 ) tempSlaveList = new int[nConstraints]; if ( nConstraints > 0 ) tempSlaveListAux = new int[nConstraints]; - for (irow = 0; irow < nConstraints; irow++) + for (irow = 0; irow < nConstraints; irow++) { tempSlaveList[irow] = slaveEqnList_[irow]; tempSlaveListAux[irow] = irow; @@ -839,14 +838,14 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) /* for each of the candidates, examine all associated constraints dof */ - for ( irow = 0; irow < nCandidates; irow++ ) + for ( irow = 0; irow < nCandidates; irow++ ) { - for ( ic = 0; ic < blkSize; ic++ ) + for ( ic = 0; ic < blkSize; ic++ ) { constrIndex = constrListAuxs[irow][ic]; /* if valid constraint number */ if ( constrIndex >= 0 ) - { + { /* get the constraint row */ HYPRE_ParCSRMatrixGetRow(A_csr,constrIndex,&rowSize,&colInd,NULL); @@ -854,15 +853,15 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) /* - see if the column number is an already selected slave */ /* - if so, find the corresponding constraint no. of that slave */ /* - add that constraint to my list */ - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; searchIndex = hypre_BinarySearch(tempSlaveList,colIndex, nConstraints); if ( searchIndex >= 0 ) - { + { searchInd2 = tempSlaveListAux[searchIndex] + newEndRow + 1; - for ( ip = 0; ip < blkSize; ip++ ) + for ( ip = 0; ip < blkSize; ip++ ) if ( constrListAuxs[irow][ip] == searchInd2 || constrListAuxs[irow][ip] == -1 ) break; if ( ip == blkSize ) constrListAuxs[irow][0] = -5; @@ -884,7 +883,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) /* delete candidates that gives larger than expected blocksize */ ncnt = 0; - for ( irow = 0; irow < nCandidates; irow++ ) + for ( irow = 0; irow < nCandidates; irow++ ) { if ( constrListAuxs[irow][0] != -5 ) { @@ -907,11 +906,11 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) // (search for candidates column index with maximum magnitude) // ==> slaveEqnList_ //--------------------------------------------------------------------- - + searchIndex = 0; blkInfo = new int[blkSize+HYPRE_SLIDEMAX]; - for ( irow = newEndRow+1; irow <= endRow; irow++ ) + for ( irow = newEndRow+1; irow <= endRow; irow++ ) { /* -- if slave variable has not been picked for constraint irow -- */ @@ -925,7 +924,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) rowSize = rowSize2; colInd = new int[rowSize]; colVal = new double[rowSize]; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colInd[jcol] = colInd2[jcol]; colVal[jcol] = colVal2[jcol]; @@ -933,10 +932,10 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize2,&colInd2,&colVal2); searchIndex = -1; searchValue = blockMinNorm_; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if (colVal[jcol] != 0.0 && colIndex >= startRow + if (colVal[jcol] != 0.0 && colIndex >= startRow && colIndex <= newEndRow) { /* -- see if the nonzero entry is a potential candidate -- */ @@ -951,14 +950,14 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) if (searchInd2 >= 0 && eqnStatuses_[colIndex-startRow] != 1) { newBlkSize = 1; - blkInfoCnt = 0; - for ( ic = 0; ic < blkSize; ic++ ) + blkInfoCnt = 0; + for ( ic = 0; ic < blkSize; ic++ ) { constrIndex = constrListAuxs[searchInd2][ic]; if ( constrIndex != -1 ) { constrIndex2 = constrIndex - endRow + nConstraints - 1; - if ( constrIndex != irow && + if ( constrIndex != irow && slaveEqnList_[constrIndex2] != -1) { for ( ip = 0; ip < blkInfoCnt; ip++ ) @@ -980,7 +979,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) { printf("%4d : constraint %d - candidate %d (%d) ", mypid, irow, searchInd2, candidateList[searchInd2]); - printf("gives blksize = %d\n", newBlkSize); + printf("gives blksize = %d\n", newBlkSize); } /* if (newBlkSize > 1 && newBlkSize <= blkSize) @@ -989,7 +988,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) { retVal = matrixCondEst(irow,colIndex,blkInfo,blkInfoCnt); if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) - printf("%4d : pivot = %e (%e) : %d\n", mypid, retVal, + printf("%4d : pivot = %e (%e) : %d\n", mypid, retVal, searchValue,newBlkSize); if ( retVal > searchValue ) { @@ -1000,7 +999,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) } } } - } + } delete [] colInd; delete [] colVal; if ( searchIndex >= 0 && searchValue > blockMinNorm_ ) @@ -1008,7 +1007,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) searchInd2 = hypre_BinarySearch(candidateList,searchIndex, nCandidates); newIndex = -9; - for ( ic = 0; ic < blkSize; ic++ ) + for ( ic = 0; ic < blkSize; ic++ ) { constrIndex = constrListAuxs[searchInd2][ic]; if ( constrIndex != -1 ) @@ -1018,7 +1017,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) { if (newIndex == -9) newIndex=constrBlkInfo_[constrIndex2]; oldIndex = constrBlkInfo_[constrIndex2]; - for ( ii = 0; ii < nConstraints; ii++ ) + for ( ii = 0; ii < nConstraints; ii++ ) { if ( constrBlkInfo_[ii] == oldIndex ) { @@ -1039,25 +1038,25 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) /* update the constrListAux - first get selected slave row */ - for ( ii = 0; ii < blkSize; ii++ ) + for ( ii = 0; ii < blkSize; ii++ ) { constrIndex2 = constrListAuxs[searchInd2][ii]; if ( constrIndex2 != -1 ) { HYPRE_ParCSRMatrixGetRow(A_csr,constrIndex2,&rowSize2, &colInd2,&colVal2); - for ( jj = 0; jj < rowSize2; jj++ ) + for ( jj = 0; jj < rowSize2; jj++ ) { - searchInd3 = hypre_BinarySearch(candidateList, + searchInd3 = hypre_BinarySearch(candidateList, colInd2[jj],nCandidates); if ( searchInd3 >= 0 ) { - for ( ip = 0; ip < blkSize; ip++ ) + for ( ip = 0; ip < blkSize; ip++ ) { if ( constrListAuxs[searchInd3][ip] == irow || constrListAuxs[searchInd3][ip] == -1 ) break; } - if ( ip == blkSize ) + if ( ip == blkSize ) { constrListAuxs[searchInd3][0] = -5; eqnStatuses_[colInd2[jj]-startRow] = 1; @@ -1071,9 +1070,9 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 3 ) printf("*Slave candidate %d adds new constr %d\n", candidateList[searchInd3], irow); - } - } - } + } + } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,constrIndex2,&rowSize2, &colInd2,&colVal2); } @@ -1082,7 +1081,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) printf("%4d : findSlaveEqnsBlock - constr %d <=> slave %d (%d)\n", mypid, irow, searchIndex, newIndex); } - else + else { if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) { @@ -1098,7 +1097,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) mypid, irow); printf(" to find a slave.\n"); } - else + else { printf("%4d : findSlaveEqnsBlock - constraint %4d fails (2)", mypid, irow); @@ -1112,18 +1111,18 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) for ( ii = 0; ii < rowSize; ii++ ) colTmp[ii] = colInd[ii]; HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd, &colVal); - for ( jcol = 0; jcol < rowSize2; jcol++ ) + for ( jcol = 0; jcol < rowSize2; jcol++ ) { colIndex = colTmp[jcol]; printf("%4d : row %d has col %d (%d,%d) (%d,%d)\n",mypid, irow,colIndex,jcol,rowSize,procNRows[mypid], - procNRows[mypid+1]); - if ( colIndex >= procNRows[mypid] && + procNRows[mypid+1]); + if ( colIndex >= procNRows[mypid] && colIndex < procNRows[mypid+1]) { HYPRE_ParCSRMatrixGetRow(A_csr,colIndex,&rowSize, &colInd,NULL); - for ( ii = 0; ii < rowSize; ii++ ) + for ( ii = 0; ii < rowSize; ii++ ) printf("%4d : col %d has col %d (%d,%d)\n",mypid, colIndex,colInd[ii],ii,rowSize); HYPRE_ParCSRMatrixRestoreRow(A_csr,colIndex,&rowSize, @@ -1139,7 +1138,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) delete [] blkInfo; if ( nConstraints > 0 ) { - for ( ic = 0; ic < localNRows-nConstraints; ic++ ) + for ( ic = 0; ic < localNRows-nConstraints; ic++ ) if ( constrListAuxs[ic] != NULL ) delete [] constrListAuxs[ic]; delete [] constrListAuxs; delete [] candidateList; @@ -1155,7 +1154,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) for ( is = 0; is < nConstraints; is++ ) { iArray1[is] = constrBlkInfo_[is]; - iArray2[is] = constrBlkSizes_[is]; + iArray2[is] = constrBlkSizes_[is]; } HYPRE_LSI_qsort1a(iArray1, iArray2, 0, nConstraints-1); ip = -1; ncnt = 0; @@ -1176,12 +1175,12 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) if ( iArray2[is] == iArray2[is-1] ) ip++; else { - printf("%4d : number of blocks with blksize %6d = %d\n", + printf("%4d : number of blocks with blksize %6d = %d\n", mypid, iArray2[is-1], ip); ip = 1; } } - printf("%4d : number of blocks with blksize %6d = %d\n", + printf("%4d : number of blocks with blksize %6d = %d\n", mypid, iArray2[ncnt-1], ip); delete [] iArray1; delete [] iArray2; @@ -1196,7 +1195,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) for ( irow = 0; irow < nConstraints; irow++ ) if ( slaveEqnList_[irow] == -1 ) ncnt++; MPI_Allreduce(&ncnt, &nSum, 1, MPI_INT, MPI_SUM, mpiComm_); - if ( nSum > 0 ) + if ( nSum > 0 ) { if ( mypid == 0 && ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { @@ -1207,7 +1206,7 @@ int HYPRE_SlideReduction::findSlaveEqnsBlock(int blkSize) if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { for ( irow = 0; irow < nConstraints; irow++ ) - if ( slaveEqnList_[irow] == -1 ) + if ( slaveEqnList_[irow] == -1 ) { printf("%4d : findSlaveEqnsBlock - unsatisfied constraint",mypid); printf(" equation = %d\n", irow+endRow-nConstraints+1); @@ -1247,13 +1246,13 @@ int HYPRE_SlideReduction::composeGlobalList() if ( gSlaveEqnList_ != NULL ) delete [] gSlaveEqnList_; if ( gSlaveEqnListAux_ != NULL ) delete [] gSlaveEqnListAux_; slaveEqnListAux_ = NULL; - if ( nConstraints > 0 ) + if ( nConstraints > 0 ) { slaveEqnListAux_ = new int[nConstraints]; - for ( is = 0; is < nConstraints; is++ ) slaveEqnListAux_[is] = is; + for ( is = 0; is < nConstraints; is++ ) slaveEqnListAux_[is] = is; HYPRE_LSI_qsort1a(slaveEqnList_, slaveEqnListAux_, 0, nConstraints-1); ierr = 0; - for ( is = 1; is < nConstraints; is++ ) + for ( is = 1; is < nConstraints; is++ ) { if ( slaveEqnList_[is] == slaveEqnList_[is-1] ) { @@ -1265,7 +1264,7 @@ int HYPRE_SlideReduction::composeGlobalList() } if ( ierr ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { printf("%4d : HYPRE_SlideReduction slave %d = %d \n",mypid,is, slaveEqnList_[is]); @@ -1284,16 +1283,16 @@ int HYPRE_SlideReduction::composeGlobalList() displArray = new int[nprocs]; MPI_Allgather(&nConstraints,1,MPI_INT,recvCntArray,1,MPI_INT,mpiComm_); displArray[0] = 0; - for ( ip = 1; ip < nprocs; ip++ ) + for ( ip = 1; ip < nprocs; ip++ ) displArray[ip] = displArray[ip-1] + recvCntArray[ip-1]; - for ( ip = 0; ip < nConstraints; ip++ ) - slaveEqnListAux_[ip] += displArray[mypid]; + for ( ip = 0; ip < nConstraints; ip++ ) + slaveEqnListAux_[ip] += displArray[mypid]; MPI_Allgatherv(slaveEqnList_, nConstraints, MPI_INT, gSlaveEqnList_, recvCntArray, displArray, MPI_INT, mpiComm_); MPI_Allgatherv(slaveEqnListAux_, nConstraints, MPI_INT, gSlaveEqnListAux_, recvCntArray, displArray, MPI_INT, mpiComm_); - for ( is = 0; is < nConstraints; is++ ) - slaveEqnListAux_[is] -= displArray[mypid]; + for ( is = 0; is < nConstraints; is++ ) + slaveEqnListAux_[is] -= displArray[mypid]; delete [] recvCntArray; delete [] displArray; @@ -1304,7 +1303,7 @@ int HYPRE_SlideReduction::composeGlobalList() for ( is = 0; is < nConstraints; is++ ) { iArray1[is] = constrBlkInfo_[is]; - iArray2[is] = constrBlkSizes_[is]; + iArray2[is] = constrBlkSizes_[is]; } HYPRE_LSI_qsort1a(iArray1, iArray2, 0, nConstraints-1); ip = -1; ncnt = 0; @@ -1325,12 +1324,12 @@ int HYPRE_SlideReduction::composeGlobalList() if ( iArray2[is] == iArray2[is-1] ) ip++; else { - printf("%4d : number of blocks with blksize %6d = %d\n", + printf("%4d : number of blocks with blksize %6d = %d\n", mypid, iArray2[is-1], ip); ip = 1; } } - printf("%4d : number of blocks with blksize %6d = %d\n", + printf("%4d : number of blocks with blksize %6d = %d\n", mypid, iArray2[ncnt-1], ip); delete [] iArray1; delete [] iArray2; @@ -1343,7 +1342,7 @@ int HYPRE_SlideReduction::composeGlobalList() return 0; } - + //**************************************************************************** // build the submatrix matrix //---------------------------------------------------------------------------- @@ -1406,7 +1405,7 @@ int HYPRE_SlideReduction::buildSubMatrices() ierr = HYPRE_IJMatrixCreate(mpiComm_,A21StartRow,A21StartRow+A21NRows-1, A21StartCol,A21StartCol+A21NCols-1,&A21mat_); ierr += HYPRE_IJMatrixSetObjectType(A21mat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in the nConstraint row of A21 @@ -1423,7 +1422,7 @@ int HYPRE_SlideReduction::buildSubMatrices() for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if (colVal[jcol] != 0.0 && + if (colVal[jcol] != 0.0 && (colIndex <= newEndRow || colIndex > endRow)) newRowSize++; } A21MatSize[irow-newEndRow-1] = newRowSize; @@ -1442,7 +1441,7 @@ int HYPRE_SlideReduction::buildSubMatrices() ierr = HYPRE_IJMatrixSetRowSizes(A21mat_, A21MatSize); ierr += HYPRE_IJMatrixInitialize(A21mat_); - assert(!ierr); + hypre_assert(!ierr); if ( A21NRows > 0 ) delete [] A21MatSize; //------------------------------------------------------------------ @@ -1511,7 +1510,7 @@ int HYPRE_SlideReduction::buildSubMatrices() reducedAStartRow+reducedANRows-1, reducedAStartCol, reducedAStartCol+reducedANCols-1,&reducedAmat_); ierr += HYPRE_IJMatrixSetObjectType(reducedAmat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for reducedA @@ -1541,7 +1540,7 @@ int HYPRE_SlideReduction::buildSubMatrices() } ierr = HYPRE_IJMatrixSetRowSizes(reducedAmat_, reducedAMatSize); ierr += HYPRE_IJMatrixInitialize(reducedAmat_); - assert(!ierr); + hypre_assert(!ierr); delete [] reducedAMatSize; //------------------------------------------------------------------ @@ -1574,7 +1573,7 @@ int HYPRE_SlideReduction::buildSubMatrices() ierr = HYPRE_IJMatrixSetValues(reducedAmat_, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); rowCount++; } delete [] newColInd; @@ -1588,7 +1587,7 @@ int HYPRE_SlideReduction::buildSubMatrices() HYPRE_IJMatrixAssemble(reducedAmat_); HYPRE_IJMatrixGetObject(reducedAmat_, (void **) &reducedA_csr); - + return 0; } @@ -1650,7 +1649,7 @@ int HYPRE_SlideReduction::buildModifiedRHSVector(HYPRE_IJVector x, ierr += HYPRE_IJVectorSetObjectType(reducedBvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedBvec_); ierr += HYPRE_IJVectorAssemble(reducedBvec_); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(reducedBvec_, (void **) &rb_csr); HYPRE_IJVectorGetObject(b, (void **) &b_csr); b_local = hypre_ParVectorLocalVector((hypre_ParVector *) b_csr); @@ -1670,7 +1669,7 @@ int HYPRE_SlideReduction::buildModifiedRHSVector(HYPRE_IJVector x, HYPRE_IJVectorSetObjectType(x2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(x2); ierr += HYPRE_IJVectorAssemble(x2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(x2, (void **) &x2_csr); HYPRE_IJVectorGetObject(x, (void **) &x_csr); x_local = hypre_ParVectorLocalVector((hypre_ParVector *) x_csr); @@ -1700,14 +1699,14 @@ int HYPRE_SlideReduction::buildModifiedRHSVector(HYPRE_IJVector x, ierr += HYPRE_IJVectorSetObjectType(reducedXvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedXvec_); ierr += HYPRE_IJVectorAssemble(reducedXvec_); - assert( !ierr ); + hypre_assert( !ierr ); ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, redBStart+redBNRows-1, &reducedRvec_); ierr += HYPRE_IJVectorSetObjectType(reducedRvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedRvec_); ierr += HYPRE_IJVectorAssemble(reducedRvec_); - assert( !ierr ); + hypre_assert( !ierr ); free( procNRows ); return 0; @@ -1717,7 +1716,7 @@ int HYPRE_SlideReduction::buildModifiedRHSVector(HYPRE_IJVector x, // given the solution vector, copy the actual solution //----------------------------------------------------------------------------- -int HYPRE_SlideReduction::buildModifiedSolnVector(HYPRE_IJVector x) +int HYPRE_SlideReduction::buildModifiedSolnVector(HYPRE_IJVector x) { int mypid, nprocs, *procNRows, startRow, endRow, localNRows; int nConstraints, irow; @@ -1725,7 +1724,7 @@ int HYPRE_SlideReduction::buildModifiedSolnVector(HYPRE_IJVector x) HYPRE_ParCSRMatrix A_csr; HYPRE_ParVector x_csr, rx_csr; hypre_Vector *x_local, *rx_local; - + //------------------------------------------------------------------ // get machine and matrix information //------------------------------------------------------------------ @@ -1740,7 +1739,7 @@ int HYPRE_SlideReduction::buildModifiedSolnVector(HYPRE_IJVector x) localNRows = endRow - startRow + 1; nConstraints = procNConstr_[mypid+1] - procNConstr_[mypid]; free( procNRows ); - if (( outputLevel_ & HYPRE_BITMASK2 ) >= 1 && + if (( outputLevel_ & HYPRE_BITMASK2 ) >= 1 && (procNConstr_==NULL || procNConstr_[nprocs]==0)) { printf("%4d : buildModifiedSolnVector WARNING - no local entry.\n", @@ -1823,7 +1822,7 @@ int HYPRE_SlideReduction::buildReducedMatrix() hypre_BoomerAMGBuildCoarseOperator((hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix *) invA22_csr, - (hypre_ParCSRMatrix *) A21_csr, + (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix **) &RAP_csr); if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) @@ -1841,7 +1840,7 @@ int HYPRE_SlideReduction::buildReducedMatrix() printf("%4d : Printing RAP matrix... \n", mypid); fflush(stdout); } - for (irow=reducedAStartRow; irow= 1 ) { - printf("%4d : buildReducedMatrix - reduceAGlobalDim = %d %d\n", mypid, + printf("%4d : buildReducedMatrix - reduceAGlobalDim = %d %d\n", mypid, reducedAGlobalNRows, reducedAGlobalNCols); - printf("%4d : buildReducedMatrix - reducedALocalDim = %d %d\n", mypid, + printf("%4d : buildReducedMatrix - reducedALocalDim = %d %d\n", mypid, reducedANRows, reducedANCols); } @@ -1881,16 +1880,16 @@ int HYPRE_SlideReduction::buildReducedMatrix() reducedAStartRow+reducedANRows-1, reducedAStartCol, reducedAStartCol+reducedANCols-1,&reducedAmat_); ierr += HYPRE_IJMatrixSetObjectType(reducedAmat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for reducedA //------------------------------------------------------------------ rowCount = maxRowSize = totalNNZ = 0; - for ( irow = startRow; irow <= newEndRow; irow++ ) + for ( irow = startRow; irow <= newEndRow; irow++ ) { - searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); + searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); if ( searchIndex >= 0 ) reducedAMatSize[rowCount++] = 1; else { @@ -1898,18 +1897,18 @@ int HYPRE_SlideReduction::buildReducedMatrix() rowIndex = reducedAStartRow + rowCount; ierr = HYPRE_ParCSRMatrixGetRow(RAP_csr,rowIndex,&rowSize2, &colInd2, &colVal2); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize + rowSize2; maxRowSize = ( newRowSize > maxRowSize ) ? newRowSize : maxRowSize; newColInd = new int[newRowSize]; - for (jcol = 0; jcol < rowSize; jcol++) + for (jcol = 0; jcol < rowSize; jcol++) newColInd[jcol] = colInd[jcol]; - for (jcol = 0; jcol < rowSize2; jcol++) + for (jcol = 0; jcol < rowSize2; jcol++) newColInd[rowSize+jcol] = colInd2[jcol]; hypre_qsort0(newColInd, 0, newRowSize-1); ncnt = 0; - for ( jcol = 1; jcol < newRowSize; jcol++ ) - if (newColInd[jcol] != newColInd[ncnt]) + for ( jcol = 1; jcol < newRowSize; jcol++ ) + if (newColInd[jcol] != newColInd[ncnt]) newColInd[++ncnt] = newColInd[jcol]; if ( newRowSize > 0 ) ncnt++; reducedAMatSize[rowCount++] = ncnt; @@ -1918,16 +1917,16 @@ int HYPRE_SlideReduction::buildReducedMatrix() ierr = HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowIndex,&rowSize2, &colInd2,&colVal2); delete [] newColInd; - assert( !ierr ); + hypre_assert( !ierr ); } } ierr = HYPRE_IJMatrixSetRowSizes(reducedAmat_, reducedAMatSize); ierr += HYPRE_IJMatrixInitialize(reducedAmat_); - assert(!ierr); + hypre_assert(!ierr); delete [] reducedAMatSize; int totalNNZA = 0; - for ( irow = startRow; irow <= endRow; irow++ ) + for ( irow = startRow; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,NULL,NULL); totalNNZA += rowSize; @@ -1935,20 +1934,20 @@ int HYPRE_SlideReduction::buildReducedMatrix() } if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { - printf("%4d : buildReducedMatrix - NNZ of reducedA = %d %d %e\n", mypid, + printf("%4d : buildReducedMatrix - NNZ of reducedA = %d %d %e\n", mypid, totalNNZ, totalNNZA, 1.0*totalNNZ/totalNNZA); } //------------------------------------------------------------------ - // load the reducedA matrix + // load the reducedA matrix //------------------------------------------------------------------ rowCount = 0; newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = startRow; irow <= newEndRow; irow++ ) + for ( irow = startRow; irow <= newEndRow; irow++ ) { - searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); + searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); rowIndex = reducedAStartRow + rowCount; if ( searchIndex >= 0 ) { @@ -1963,50 +1962,50 @@ int HYPRE_SlideReduction::buildReducedMatrix() &colVal2); newRowSize = rowSize + rowSize2; ncnt = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; for ( procIndex = 0; procIndex < nprocs; procIndex++ ) if ( procNRows[procIndex] > colIndex ) break; - uBound = procNRows[procIndex] - + uBound = procNRows[procIndex] - (procNConstr_[procIndex]-procNConstr_[procIndex-1]); procIndex--; - if ( colIndex < uBound ) + if ( colIndex < uBound ) { searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, globalNConstr); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { newColInd[ncnt] = colIndex - procNConstr_[procIndex]; - newColVal[ncnt++] = colVal[jcol]; + newColVal[ncnt++] = colVal[jcol]; } } } - for ( jcol = 0; jcol < rowSize2; jcol++ ) + for ( jcol = 0; jcol < rowSize2; jcol++ ) { - newColInd[ncnt+jcol] = colInd2[jcol]; - newColVal[ncnt+jcol] = - colVal2[jcol]; + newColInd[ncnt+jcol] = colInd2[jcol]; + newColVal[ncnt+jcol] = - colVal2[jcol]; } newRowSize = ncnt + rowSize2; hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncnt = 0; - for ( jcol = 0; jcol < newRowSize; jcol++ ) + for ( jcol = 0; jcol < newRowSize; jcol++ ) { - if ( jcol != ncnt && newColInd[jcol] == newColInd[ncnt] ) + if ( jcol != ncnt && newColInd[jcol] == newColInd[ncnt] ) newColVal[ncnt] += newColVal[jcol]; - else if ( newColInd[jcol] != newColInd[ncnt] ) + else if ( newColInd[jcol] != newColInd[ncnt] ) { ncnt++; newColVal[ncnt] = newColVal[jcol]; newColInd[ncnt] = newColInd[jcol]; - } - } + } + } newRowSize = ncnt + 1; ncnt = 0; - for ( jcol = 0; jcol < newRowSize; jcol++ ) + for ( jcol = 0; jcol < newRowSize; jcol++ ) { if ( habs(newColVal[jcol]) >= truncTol_ ) - { + { newColInd[ncnt] = newColInd[jcol]; newColVal[ncnt++] = newColVal[jcol]; } @@ -2016,10 +2015,10 @@ int HYPRE_SlideReduction::buildReducedMatrix() HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowIndex,&rowSize2,&colInd2, &colVal2); } - ierr = HYPRE_IJMatrixSetValues(reducedAmat_, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(reducedAmat_, 1, &newRowSize, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); rowCount++; } delete [] newColInd; @@ -2045,7 +2044,7 @@ int HYPRE_SlideReduction::buildReducedMatrix() printf("%4d : Printing reducedA matrix... \n", mypid); fflush(stdout); } - for ( irow = reducedAStartRow; + for ( irow = reducedAStartRow; irow < reducedAStartRow+localNRows-nConstraints; irow++ ) { //printf("%d : reducedA ROW %d\n", mypid, irow); @@ -2093,7 +2092,7 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) startRow = procNRows[mypid]; endRow = procNRows[mypid+1] - 1; localNRows = endRow - startRow + 1; - if ( procNConstr_ == NULL || procNConstr_[nprocs] == 0 ) + if ( procNConstr_ == NULL || procNConstr_[nprocs] == 0 ) { printf("%4d : buildReducedRHSVector WARNING - no local entries.\n",mypid); free(procNRows); @@ -2113,14 +2112,14 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(f2, (void **) &f2_csr); HYPRE_IJVectorCreate(mpiComm_, f2Start, f2Start+f2LocalLength-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(f2hat); ierr += HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_IJVectorGetObject(b, (void **) &b_csr); @@ -2129,22 +2128,22 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) f2_local = hypre_ParVectorLocalVector((hypre_ParVector *) f2_csr); f2_data = (double *) hypre_VectorData(f2_local); - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { vecIndex = -1; - for ( jcol = 0; jcol < nConstraints; jcol++ ) + for ( jcol = 0; jcol < nConstraints; jcol++ ) { - if ( slaveEqnListAux_[jcol] == irow ) + if ( slaveEqnListAux_[jcol] == irow ) { vecIndex = slaveEqnList_[jcol]; break; } } - assert( vecIndex >= startRow ); - assert( vecIndex <= endRow ); + hypre_assert( vecIndex >= startRow ); + hypre_assert( vecIndex <= endRow ); f2_data[irow] = b_data[vecIndex-startRow]; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { vecIndex = localNRows - nConstraints + irow; f2_data[irow+nConstraints] = b_data[vecIndex]; @@ -2152,7 +2151,7 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) HYPRE_IJMatrixGetObject(invA22mat_, (void **) &invA22_csr); HYPRE_ParCSRMatrixMatvec( 1.0, invA22_csr, f2_csr, 0.0, f2hat_csr ); - HYPRE_IJVectorDestroy(f2); + HYPRE_IJVectorDestroy(f2); //------------------------------------------------------------------ // form reducedB = A21^T * f2hat @@ -2161,35 +2160,35 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) redBLocalLength = localNRows - nConstraints; redBStart = procNRows[mypid] - procNConstr_[mypid]; - ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, + ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, redBStart+redBLocalLength-1, &reducedBvec_); ierr += HYPRE_IJVectorSetObjectType(reducedBvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedBvec_); ierr += HYPRE_IJVectorAssemble(reducedBvec_); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(reducedBvec_, (void **) &rb_csr); HYPRE_IJMatrixGetObject(A21mat_, (void **) &A21_csr); HYPRE_ParCSRMatrixMatvecT(-1.0, A21_csr, f2hat_csr, 0.0, rb_csr); - HYPRE_IJVectorDestroy(f2hat); + HYPRE_IJVectorDestroy(f2hat); //------------------------------------------------------------------ // finally form reducedB = f1 - f2til //------------------------------------------------------------------ rowIndex = redBStart; - for ( irow = startRow; irow <= newEndRow; irow++ ) + for ( irow = startRow; irow <= newEndRow; irow++ ) { - if ( hypre_BinarySearch(slaveEqnList_, irow, nConstraints) < 0 ) + if ( hypre_BinarySearch(slaveEqnList_, irow, nConstraints) < 0 ) { ddata = b_data[irow-startRow]; - HYPRE_IJVectorAddToValues(reducedBvec_, 1, (const int *) &rowIndex, + HYPRE_IJVectorAddToValues(reducedBvec_, 1, (const int *) &rowIndex, (const double *) &ddata); } else { ddata = 0.0; - HYPRE_IJVectorSetValues(reducedBvec_, 1, (const int *) &rowIndex, + HYPRE_IJVectorSetValues(reducedBvec_, 1, (const int *) &rowIndex, (const double *) &ddata); } rowIndex++; @@ -2197,22 +2196,22 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) HYPRE_IJVectorGetObject(reducedBvec_, (void **) &rb_csr); //------------------------------------------------------------------ - // create a few more vectors + // create a few more vectors //------------------------------------------------------------------ - ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, + ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, redBStart+redBLocalLength-1, &reducedXvec_); ierr += HYPRE_IJVectorSetObjectType(reducedXvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedXvec_); ierr += HYPRE_IJVectorAssemble(reducedXvec_); - assert( !ierr ); + hypre_assert( !ierr ); - ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, + ierr = HYPRE_IJVectorCreate(mpiComm_, redBStart, redBStart+redBLocalLength-1, &reducedRvec_); ierr += HYPRE_IJVectorSetObjectType(reducedRvec_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedRvec_); ierr += HYPRE_IJVectorAssemble(reducedRvec_); - assert( !ierr ); + hypre_assert( !ierr ); free( procNRows ); return 0; @@ -2224,7 +2223,7 @@ int HYPRE_SlideReduction::buildReducedRHSVector(HYPRE_IJVector b) // x_2 = invA22 * ( b2 - A21 x_1 ) //----------------------------------------------------------------------------- -int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, +int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, HYPRE_IJVector b) { int mypid, nprocs, *procNRows, startRow, endRow, localNRows; @@ -2233,9 +2232,9 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, double *b_data, *v1_data, *rx_data, *x_data, *x2_data; HYPRE_ParCSRMatrix A_csr, A21_csr, invA22_csr; HYPRE_ParVector x_csr, x2_csr, v1_csr, b_csr, rx_csr; - HYPRE_IJVector v1, x2; + HYPRE_IJVector v1, x2; hypre_Vector *b_local, *v1_local, *rx_local, *x_local, *x2_local; - + //------------------------------------------------------------------ // get machine and matrix information //------------------------------------------------------------------ @@ -2250,7 +2249,7 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, localNRows = endRow - startRow + 1; nConstraints = procNConstr_[mypid+1] - procNConstr_[mypid]; newEndRow = endRow - nConstraints; - if (( outputLevel_ & HYPRE_BITMASK2 ) >= 1 && + if (( outputLevel_ & HYPRE_BITMASK2 ) >= 1 && (procNConstr_==NULL || procNConstr_[nprocs]==0)) { printf("%4d : buildReducedSolnVector WARNING - no local entry.\n",mypid); @@ -2263,12 +2262,12 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, vecStart = 2 * procNConstr_[mypid]; vecLocalLength = 2 * nConstraints; - ierr = HYPRE_IJVectorCreate(mpiComm_, vecStart, + ierr = HYPRE_IJVectorCreate(mpiComm_, vecStart, vecStart+vecLocalLength-1, &v1); ierr += HYPRE_IJVectorSetObjectType(v1, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(v1); ierr += HYPRE_IJVectorAssemble(v1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(v1, (void **) &v1_csr); HYPRE_IJMatrixGetObject(A21mat_, (void **) &A21_csr); HYPRE_IJVectorGetObject(reducedXvec_, (void **) &rx_csr); @@ -2296,16 +2295,16 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, for ( irow = 0; irow < nConstraints; irow++ ) { searchIndex = -1; - for ( jcol = 0; jcol < nConstraints; jcol++ ) + for ( jcol = 0; jcol < nConstraints; jcol++ ) { - if ( slaveEqnListAux_[jcol] == irow ) + if ( slaveEqnListAux_[jcol] == irow ) { - searchIndex = slaveEqnList_[jcol]; + searchIndex = slaveEqnList_[jcol]; break; } } - assert( searchIndex >= startRow ); - assert( searchIndex <= newEndRow ); + hypre_assert( searchIndex >= startRow ); + hypre_assert( searchIndex <= newEndRow ); v1_data[rowIndex++] += b_data[searchIndex-startRow]; } for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) @@ -2315,12 +2314,12 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, // compute inv(A22) * (f2 - A21 * sol) --> x2 = invA22 * v1 //------------------------------------------------------------- - ierr = HYPRE_IJVectorCreate(mpiComm_, vecStart, + ierr = HYPRE_IJVectorCreate(mpiComm_, vecStart, vecStart+vecLocalLength-1, &x2); ierr += HYPRE_IJVectorSetObjectType(x2, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(x2); ierr += HYPRE_IJVectorAssemble(x2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(x2, (void **) &x2_csr ); HYPRE_IJMatrixGetObject(invA22mat_, (void **) &invA22_csr ); HYPRE_ParCSRMatrixMatvec(1.0, invA22_csr, v1_csr, 0.0, x2_csr); @@ -2333,7 +2332,7 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, #endif //------------------------------------------------------------- - // inject final solution to the solution vector x + // inject final solution to the solution vector x //------------------------------------------------------------- HYPRE_IJVectorGetObject(x, (void **) &x_csr ); @@ -2349,11 +2348,11 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, for ( irow = 0; irow < nConstraints; irow++ ) { - for ( jcol = 0; jcol < nConstraints; jcol++ ) + for ( jcol = 0; jcol < nConstraints; jcol++ ) { - if ( slaveEqnListAux_[jcol] == irow ) + if ( slaveEqnListAux_[jcol] == irow ) { - searchIndex = slaveEqnList_[jcol]; + searchIndex = slaveEqnList_[jcol]; break; } } @@ -2371,12 +2370,12 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, HYPRE_IJVector R; HYPRE_ParVector R_csr; - ierr = HYPRE_IJVectorCreate(mpiComm_, procNRows[mypid], + ierr = HYPRE_IJVectorCreate(mpiComm_, procNRows[mypid], procNRows[mypid+1]-1, &R); ierr += HYPRE_IJVectorSetObjectType(R, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(R); ierr += HYPRE_IJVectorAssemble(R); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorGetObject(R, (void **) &R_csr); HYPRE_ParVectorCopy( b_csr, R_csr ); HYPRE_ParCSRMatrixMatvec( -1.0, A_csr, x_csr, 1.0, R_csr ); @@ -2388,11 +2387,11 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, for ( irow = 0; irow < nConstraints; irow++ ) { searchIndex = -1; - for ( jcol = 0; jcol < nConstraints; jcol++ ) + for ( jcol = 0; jcol < nConstraints; jcol++ ) { - if ( slaveEqnListAux_[jcol] == irow ) + if ( slaveEqnListAux_[jcol] == irow ) { - searchIndex = slaveEqnList_[jcol]; + searchIndex = slaveEqnList_[jcol]; break; } } @@ -2400,7 +2399,7 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, } for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) rnorm2 += (R_data[irow-startRow] * R_data[irow-startRow]); - HYPRE_IJVectorDestroy(R); + HYPRE_IJVectorDestroy(R); if ( mypid == 0 ) printf("HYPRE_SlideRedction norm check = %e %e %e\n", sqrt(rnorm), sqrt(rnorm-rnorm2), sqrt(rnorm2)); @@ -2410,8 +2409,8 @@ int HYPRE_SlideReduction::buildReducedSolnVector(HYPRE_IJVector x, // clean up //---------------------------------------------------------------- - HYPRE_IJVectorDestroy(v1); - HYPRE_IJVectorDestroy(x2); + HYPRE_IJVectorDestroy(v1); + HYPRE_IJVectorDestroy(x2); free( procNRows ); return 0; } @@ -2446,7 +2445,7 @@ int HYPRE_SlideReduction::buildA21Mat() globalNConstr = procNConstr_[nprocs]; globalNRows = procNRows[nprocs]; nConstraints = procNConstr_[mypid+1] - procNConstr_[mypid]; - + //****************************************************************** // extract A21 from A //------------------------------------------------------------------ @@ -2463,7 +2462,7 @@ int HYPRE_SlideReduction::buildA21Mat() if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { printf("%4d : buildA21Mat - A21StartRow = %d\n", mypid, A21StartRow); - printf("%4d : buildA21Mat - A21GlobalDim = %d %d\n", mypid, + printf("%4d : buildA21Mat - A21GlobalDim = %d %d\n", mypid, A21GlobalNRows, A21GlobalNCols); printf("%4d : buildA21Mat - A21LocalDim = %d %d\n",mypid, A21NRows, A21NCols); @@ -2476,7 +2475,7 @@ int HYPRE_SlideReduction::buildA21Mat() ierr = HYPRE_IJMatrixCreate(mpiComm_,A21StartRow,A21StartRow+A21NRows-1, A21StartCol,A21StartCol+A21NCols-1,&A21mat_); ierr += HYPRE_IJMatrixSetObjectType(A21mat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in the first nConstraint row of A21 @@ -2488,26 +2487,26 @@ int HYPRE_SlideReduction::buildA21Mat() newEndRow = endRow - nConstraints; if ( A21NRows > 0 ) A21MatSize = new int[A21NRows]; else A21MatSize = NULL; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if ( colVal[jcol] != 0.0 ) + if ( colVal[jcol] != 0.0 ) { - if ( colIndex <= newEndRow || colIndex > endRow ) + if ( colIndex <= newEndRow || colIndex > endRow ) { - searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, + searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, globalNConstr); if ( searchIndex < 0 ) newRowSize++; } @@ -2524,11 +2523,11 @@ int HYPRE_SlideReduction::buildA21Mat() //------------------------------------------------------------------ rowCount = nConstraints; - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { if ( colVal[jcol] != 0.0 ) { @@ -2536,7 +2535,7 @@ int HYPRE_SlideReduction::buildA21Mat() if ( colIndex <= newEndRow || colIndex > endRow ) { searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, - globalNConstr); + globalNConstr); if ( searchIndex < 0 ) newRowSize++; } } @@ -2558,7 +2557,7 @@ int HYPRE_SlideReduction::buildA21Mat() ierr = HYPRE_IJMatrixSetRowSizes(A21mat_, A21MatSize); ierr += HYPRE_IJMatrixInitialize(A21mat_); - assert(!ierr); + hypre_assert(!ierr); if ( A21NRows > 0 ) delete [] A21MatSize; //------------------------------------------------------------------ @@ -2572,26 +2571,26 @@ int HYPRE_SlideReduction::buildA21Mat() for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { if ( colVal[jcol] != 0.0 ) { colIndex = colInd[jcol]; - if ( colIndex <= newEndRow || colIndex > endRow ) + if ( colIndex <= newEndRow || colIndex > endRow ) { - searchIndex = HYPRE_LSI_Search(gSlaveEqnList_,colIndex, - globalNConstr); - if ( searchIndex < 0 ) + searchIndex = HYPRE_LSI_Search(gSlaveEqnList_,colIndex, + globalNConstr); + if ( searchIndex < 0 ) { for ( procIndex = 0; procIndex < nprocs; procIndex++ ) if ( procNRows[procIndex] > colIndex ) break; @@ -2602,14 +2601,14 @@ int HYPRE_SlideReduction::buildA21Mat() if ( newColIndex < 0 || newColIndex >= A21GlobalNCols ) { printf("%4d : buildA21Mat ERROR - ",mypid); - printf(" out of range (%d,%d (%d))\n", rowCount, + printf(" out of range (%d,%d (%d))\n", rowCount, colIndex, A21GlobalNCols); for ( is = 0; is < rowSize; is++ ) printf("%4d : row %7d has col = %7d\n",mypid,rowIndex, colInd[is]); exit(1); - } - if ( newRowSize > maxRowSize+1 ) + } + if ( newRowSize > maxRowSize+1 ) { if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) { @@ -2619,7 +2618,7 @@ int HYPRE_SlideReduction::buildA21Mat() } } } - } + } } HYPRE_IJMatrixSetValues(A21mat_, 1, &newRowSize, (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); @@ -2631,14 +2630,14 @@ int HYPRE_SlideReduction::buildA21Mat() // next load the second nConstraint rows to A21 extracted from A //------------------------------------------------------------------ - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if (colVal[jcol] != 0.0 && + if (colVal[jcol] != 0.0 && (colIndex <= newEndRow || colIndex > endRow)) { searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, @@ -2652,7 +2651,7 @@ int HYPRE_SlideReduction::buildA21Mat() newColInd[newRowSize] = newColIndex; newColVal[newRowSize++] = colVal[jcol]; } - } + } } HYPRE_IJMatrixSetValues(A21mat_, 1, &newRowSize, (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); @@ -2676,24 +2675,24 @@ int HYPRE_SlideReduction::buildA21Mat() sprintf(fname, "A21.%d", mypid); FILE *fp = fopen(fname, "w"); - if ( mypid == 0 ) + if ( mypid == 0 ) { printf("====================================================\n"); printf("%4d : Printing A21 matrix... \n", mypid); fflush(stdout); } - for (irow = A21StartRow;irow < A21StartRow+2*nConstraints;irow++) + for (irow = A21StartRow;irow < A21StartRow+2*nConstraints;irow++) { HYPRE_ParCSRMatrixGetRow(A21_csr,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) if ( colVal[jcol] != 0.0 ) fprintf(fp, "%6d %6d %25.16e \n",irow+1,colInd[jcol]+1, colVal[jcol]); - HYPRE_ParCSRMatrixRestoreRow(A21_csr, irow, &rowSize, &colInd, + HYPRE_ParCSRMatrixRestoreRow(A21_csr, irow, &rowSize, &colInd, &colVal); } fclose(fp); - if ( mypid == 0 ) + if ( mypid == 0 ) printf("====================================================\n"); } return 0; @@ -2730,7 +2729,7 @@ int HYPRE_SlideReduction::buildInvA22Mat() globalNConstr = procNConstr_[nprocs]; nConstraints = procNConstr_[mypid+1] - procNConstr_[mypid]; newEndRow = endRow - nConstraints; - + //------------------------------------------------------------------ // construct the group information //------------------------------------------------------------------ @@ -2739,20 +2738,20 @@ int HYPRE_SlideReduction::buildInvA22Mat() if ( nConstraints > 0 ) { iTempList = new int[nConstraints]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) iTempList[irow] = constrBlkInfo_[irow]; hypre_qsort0( iTempList, 0, nConstraints-1 ); nGroups = 1; - for ( irow = 1; irow < nConstraints; irow++ ) + for ( irow = 1; irow < nConstraints; irow++ ) if ( iTempList[irow] != iTempList[irow-1] ) nGroups++; groupIDs = new int[nGroups]; groupSizes = new int[nGroups]; groupIDs[0] = iTempList[0]; groupSizes[0] = 1; nGroups = 1; - for ( irow = 1; irow < nConstraints; irow++ ) + for ( irow = 1; irow < nConstraints; irow++ ) { - if ( iTempList[irow] != iTempList[irow-1] ) + if ( iTempList[irow] != iTempList[irow-1] ) { groupSizes[nGroups] = 1; groupIDs[nGroups++] = iTempList[irow]; @@ -2764,9 +2763,9 @@ int HYPRE_SlideReduction::buildInvA22Mat() { if ( groupSizes[ig] > maxBlkSize ) { - printf("%4d : buildInvA22 ERROR - block Size %d >= %d\n", mypid, + printf("%4d : buildInvA22 ERROR - block Size %d >= %d\n", mypid, groupSizes[ig], maxBlkSize); - printf("%4d : buildInvA22 ERROR - group ID = %d\n", mypid, + printf("%4d : buildInvA22 ERROR - group ID = %d\n", mypid, groupIDs[ig]); exit(1); } @@ -2776,7 +2775,7 @@ int HYPRE_SlideReduction::buildInvA22Mat() groupRowNums[ig] = new int[groupSizes[ig]]; groupSizes[ig] = 0; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { index = constrBlkInfo_[irow]; searchIndex = hypre_BinarySearch(groupIDs, index, nGroups); @@ -2786,7 +2785,7 @@ int HYPRE_SlideReduction::buildInvA22Mat() } //------------------------------------------------------------------ - // first extract the (2,1) block of A22 + // first extract the (2,1) block of A22 // ( constraints-to-local slaves ) //------------------------------------------------------------------ @@ -2801,7 +2800,7 @@ int HYPRE_SlideReduction::buildInvA22Mat() #if 0 FILE *fp = fopen("CT.m","w"); #endif - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = newEndRow + 1 + irow; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); @@ -2811,16 +2810,16 @@ FILE *fp = fopen("CT.m","w"); { colIndex = colInd[jcol]; searchIndex = hypre_BinarySearch(slaveEqnList_,colIndex,nConstraints); - if ( searchIndex >= 0 ) + if ( searchIndex >= 0 ) { CT_JA[CTOffset+CTRowSize] = slaveEqnListAux_[searchIndex]; - CT_AA[CTOffset+CTRowSize] = colVal[jcol]; + CT_AA[CTOffset+CTRowSize] = colVal[jcol]; CTRowSize++; #if 0 fprintf(fp,"%d %d %25.16e\n",irow+1,CT_JA[CTOffset+CTRowSize-1]+1,colVal[jcol]); #endif } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } #if 0 @@ -2828,7 +2827,7 @@ fclose(fp); #endif //------------------------------------------------------------------ - // invert the (2,1) block of A22 + // invert the (2,1) block of A22 //------------------------------------------------------------------ #if 0 @@ -2840,13 +2839,13 @@ FILE *fp2 = fopen("invCT.m","w"); for ( ig = 0; ig < nGroups; ig++ ) { - for ( ir = 0; ir < groupSizes[ig]; ir++ ) + for ( ir = 0; ir < groupSizes[ig]; ir++ ) for ( ic = 0; ic < groupSizes[ig]; ic++ ) Imat[ir][ic] = 0.0; for ( ir = 0; ir < groupSizes[ig]; ir++ ) - { + { rowIndex = groupRowNums[ig][ir]; offset = rowIndex * maxBlkSize; - for ( ic = 0; ic < maxBlkSize; ic++ ) + for ( ic = 0; ic < maxBlkSize; ic++ ) { colIndex = CT_JA[offset+ic]; if ( colIndex != -1 ) @@ -2879,10 +2878,10 @@ FILE *fp2 = fopen("invCT.m","w"); } printf("\n"); for ( ir = 0; ir < groupSizes[ig]; ir++ ) - { + { rowIndex = groupRowNums[ig][ir]; offset = rowIndex * maxBlkSize; - for ( ic = 0; ic < maxBlkSize; ic++ ) + for ( ic = 0; ic < maxBlkSize; ic++ ) { colIndex = CT_JA[offset+ic]; if ( colIndex != -1 ) @@ -2893,13 +2892,13 @@ FILE *fp2 = fopen("invCT.m","w"); } } } - assert( !ierr ); + hypre_assert( !ierr ); for ( ir = 0; ir < groupSizes[ig]; ir++ ) - { + { rowIndex = groupRowNums[ig][ir]; offset = rowIndex * maxBlkSize; - for (ic = 0; ic < maxBlkSize; ic++) CT_JA[offset+ic] = -1; - for ( ic = 0; ic < groupSizes[ig]; ic++ ) + for (ic = 0; ic < maxBlkSize; ic++) CT_JA[offset+ic] = -1; + for ( ic = 0; ic < groupSizes[ig]; ic++ ) { if ( Imat2[ir][ic] != 0.0 ) { @@ -2921,53 +2920,53 @@ fclose(fp2); free( Imat ); //------------------------------------------------------------------ - // form ParCSRMatrix of the (2,1) block of A22 + // form ParCSRMatrix of the (2,1) block of A22 //------------------------------------------------------------------ int *hypreCTMatSize, maxRowSize; hypre_ParCSRMatrix *hypreCT; HYPRE_IJMatrix IJCT; - ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJCT); ierr += HYPRE_IJMatrixSetObjectType(IJCT, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreCTMatSize = new int[nConstraints]; else hypreCTMatSize = NULL; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { newRowSize = 0; offset = irow * maxBlkSize; - for ( ic = 0; ic < maxBlkSize; ic++ ) + for ( ic = 0; ic < maxBlkSize; ic++ ) if ( CT_JA[offset+ic] != -1 ) newRowSize++; hypreCTMatSize[irow] = newRowSize; maxRowSize = (newRowSize > maxRowSize) ? newRowSize : maxRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJCT, hypreCTMatSize); ierr = HYPRE_IJMatrixInitialize(IJCT); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreCTMatSize; newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; offset = irow * maxBlkSize; - newRowSize = 0; - for ( ic = 0; ic < maxBlkSize; ic++ ) + newRowSize = 0; + for ( ic = 0; ic < maxBlkSize; ic++ ) { - if ( CT_JA[offset+ic] != -1 ) + if ( CT_JA[offset+ic] != -1 ) { newColInd[newRowSize] = CT_JA[offset+ic] + procNConstr_[mypid]; newColVal[newRowSize++] = CT_AA[offset+ic]; } } ierr = HYPRE_IJMatrixSetValues(IJCT, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } delete [] newColInd; delete [] newColVal; @@ -2981,7 +2980,7 @@ fclose(fp2); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreCT); //------------------------------------------------------------------ - // next extract the (1,2) block of A22 + // next extract the (1,2) block of A22 // ( local slaves-to-constraints ) //------------------------------------------------------------------ @@ -2993,13 +2992,13 @@ fclose(fp2); C_AA = new double[nConstraints*maxBlkSize]; for ( irow = 0; irow < nConstraints*maxBlkSize; irow++ ) C_JA[irow] = -1; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } @@ -3012,15 +3011,15 @@ fclose(fp2); if ( colIndex > newEndRow && colIndex <= endRow ) { C_JA[COffset+CRowSize] = colIndex - newEndRow - 1; - C_AA[COffset+CRowSize] = colVal[jcol]; + C_AA[COffset+CRowSize] = colVal[jcol]; CRowSize++; } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } //------------------------------------------------------------------ - // invert the (2,1) block of A22 + // invert the (2,1) block of A22 //------------------------------------------------------------------ Imat = hypre_TAlloc(double*, maxBlkSize , HYPRE_MEMORY_HOST); @@ -3029,13 +3028,13 @@ fclose(fp2); for ( ig = 0; ig < nGroups; ig++ ) { - for ( ir = 0; ir < groupSizes[ig]; ir++ ) + for ( ir = 0; ir < groupSizes[ig]; ir++ ) for ( ic = 0; ic < groupSizes[ig]; ic++ ) Imat[ir][ic] = 0.0; for ( ir = 0; ir < groupSizes[ig]; ir++ ) - { + { rowIndex = groupRowNums[ig][ir]; offset = rowIndex * maxBlkSize; - for ( ic = 0; ic < maxBlkSize; ic++ ) + for ( ic = 0; ic < maxBlkSize; ic++ ) { colIndex = C_JA[offset+ic]; if ( colIndex != -1 ) @@ -3047,13 +3046,13 @@ fclose(fp2); } } ierr = HYPRE_LSI_MatrixInverse((double**) Imat, groupSizes[ig], &Imat2); - assert( !ierr ); + hypre_assert( !ierr ); for ( ir = 0; ir < groupSizes[ig]; ir++ ) - { + { rowIndex = groupRowNums[ig][ir]; offset = rowIndex * maxBlkSize; - for (ic = 0; ic < maxBlkSize; ic++) C_JA[offset+ic] = -1; - for ( ic = 0; ic < groupSizes[ig]; ic++ ) + for (ic = 0; ic < maxBlkSize; ic++) C_JA[offset+ic] = -1; + for ( ic = 0; ic < groupSizes[ig]; ic++ ) { if ( Imat2[ir][ic] != 0.0 ) { @@ -3069,53 +3068,53 @@ fclose(fp2); free( Imat ); //------------------------------------------------------------------ - // form ParCSRMatrix of the (1,2) block of A22 + // form ParCSRMatrix of the (1,2) block of A22 //------------------------------------------------------------------ int *hypreCMatSize; hypre_ParCSRMatrix *hypreC; HYPRE_IJMatrix IJC; - ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJC); ierr += HYPRE_IJMatrixSetObjectType(IJC, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreCMatSize = new int[nConstraints]; else hypreCMatSize = NULL; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { newRowSize = 0; offset = irow * maxBlkSize; - for ( ic = 0; ic < maxBlkSize; ic++ ) + for ( ic = 0; ic < maxBlkSize; ic++ ) if ( C_JA[offset+ic] != -1 ) newRowSize++; hypreCMatSize[irow] = newRowSize; maxRowSize = (newRowSize > maxRowSize) ? newRowSize : maxRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJC, hypreCMatSize); ierr = HYPRE_IJMatrixInitialize(IJC); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreCMatSize; newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; offset = irow * maxBlkSize; - newRowSize = 0; - for ( ic = 0; ic < maxBlkSize; ic++ ) + newRowSize = 0; + for ( ic = 0; ic < maxBlkSize; ic++ ) { - if ( C_JA[offset+ic] != -1 ) + if ( C_JA[offset+ic] != -1 ) { newColInd[newRowSize] = C_JA[offset+ic] + procNConstr_[mypid]; newColVal[newRowSize++] = C_AA[offset+ic]; } } ierr = HYPRE_IJMatrixSetValues(IJC, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } delete [] newColInd; delete [] newColVal; @@ -3144,29 +3143,29 @@ fclose(fp2); HYPRE_IJMatrix IJB; ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJB); ierr = HYPRE_IJMatrixSetObjectType(IJB, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreBMatSize = new int[nConstraints]; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, - globalNConstr); + searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, + globalNConstr); if ( searchIndex >= 0 ) newRowSize++; } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); @@ -3175,41 +3174,41 @@ fclose(fp2); } ierr = HYPRE_IJMatrixSetRowSizes(IJB, hypreBMatSize); ierr = HYPRE_IJMatrixInitialize(IJB); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreBMatSize; newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, - globalNConstr); - if ( searchIndex >= 0 ) + searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, + globalNConstr); + if ( searchIndex >= 0 ) { newColInd[newRowSize] = gSlaveEqnListAux_[searchIndex]; newColVal[newRowSize++] = - colVal[jcol]; } } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); - rowIndex = procNConstr_[mypid] + irow; + rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_IJMatrixSetValues(IJB, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } HYPRE_IJMatrixAssemble(IJB); HYPRE_IJMatrixGetObject(IJB, (void **) &hypreB); @@ -3230,10 +3229,10 @@ fclose(fp2); HYPRE_ParCSRMatrixPrint((HYPRE_ParCSRMatrix) hypreB, fname); #endif - hypre_BoomerAMGBuildCoarseOperator(hypreCT, hypreB, hypreCT, + hypre_BoomerAMGBuildCoarseOperator(hypreCT, hypreB, hypreCT, (hypre_ParCSRMatrix **) &hypreCBC); #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,irow,&rowSize, &colInd,&colVal); @@ -3242,7 +3241,7 @@ fclose(fp2); HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,irow,&rowSize, &colInd,&colVal); } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreB,irow,&rowSize, &colInd,&colVal); @@ -3251,7 +3250,7 @@ fclose(fp2); HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreB,irow,&rowSize, &colInd,&colVal); } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -3278,11 +3277,11 @@ fclose(fp2); // create a matrix context for A22 //------------------------------------------------------------------ - ierr = HYPRE_IJMatrixCreate(mpiComm_, invA22StartRow, - invA22StartRow+invA22NRows-1, invA22StartCol, + ierr = HYPRE_IJMatrixCreate(mpiComm_, invA22StartRow, + invA22StartRow+invA22NRows-1, invA22StartCol, invA22StartCol+invA22NCols-1, &invA22mat_); ierr += HYPRE_IJMatrixSetObjectType(invA22mat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the no. of nonzeros in the first nConstraint row of invA22 @@ -3290,12 +3289,12 @@ fclose(fp2); maxRowSize = 0; if ( invA22NRows > 0 ) invA22MatSize = new int[invA22NRows]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,NULL,NULL); - assert( !ierr ); + hypre_assert( !ierr ); invA22MatSize[irow] = rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,NULL,NULL); @@ -3303,12 +3302,12 @@ fclose(fp2); } //------------------------------------------------------------------ - // compute the number of nonzeros in the second nConstraints row of + // compute the number of nonzeros in the second nConstraints row of // invA22 (consisting of [D and A22 block]) //------------------------------------------------------------------ #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -3318,18 +3317,18 @@ fclose(fp2); &colInd,&colVal); } #endif - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize += rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize,&colInd,&colVal); @@ -3342,7 +3341,7 @@ fclose(fp2); //------------------------------------------------------------------ #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -3354,7 +3353,7 @@ fclose(fp2); #endif ierr = HYPRE_IJMatrixSetRowSizes(invA22mat_, invA22MatSize); ierr += HYPRE_IJMatrixInitialize(invA22mat_); - assert(!ierr); + hypre_assert(!ierr); if ( invA22NRows > 0 ) delete [] invA22MatSize; //------------------------------------------------------------------ @@ -3364,12 +3363,12 @@ fclose(fp2); newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = 0; for ( jcol = 0; jcol < rowSize; jcol++ ) { @@ -3380,22 +3379,22 @@ fclose(fp2); HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,&colInd,&colVal); rowCount = invA22StartRow + irow; - ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &rowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &rowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } //------------------------------------------------------------------ // next load the second nConstraints rows to A22 extracted from A //------------------------------------------------------------------ - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = 0; for ( jcol = 0; jcol < rowSize; jcol++ ) { @@ -3406,7 +3405,7 @@ fclose(fp2); &rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize2,&colInd2,&colVal2); - assert( !ierr ); + hypre_assert( !ierr ); for ( jcol = 0; jcol < rowSize2; jcol++ ) { colIndex = colInd2[jcol]; @@ -3418,10 +3417,10 @@ fclose(fp2); HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize2,&colInd2,&colVal2); rowCount = invA22StartRow + nConstraints + irow; - ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; @@ -3443,13 +3442,13 @@ fclose(fp2); sprintf( fname, "invA.%d", mypid ); FILE *fp = fopen( fname, "w"); - if ( mypid == 0 ) + if ( mypid == 0 ) { printf("====================================================\n"); printf("%4d : Printing invA22 matrix... \n", mypid); fflush(stdout); } - for (irow=invA22StartRow; irow < invA22StartRow+invA22NRows;irow++) + for (irow=invA22StartRow; irow < invA22StartRow+invA22NRows;irow++) { HYPRE_ParCSRMatrixGetRow(invA22_csr,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -3460,7 +3459,7 @@ fclose(fp2); &colVal); } fclose(fp); - if ( mypid == 0 ) + if ( mypid == 0 ) printf("====================================================\n"); } return 0; @@ -3487,9 +3486,9 @@ int HYPRE_SlideReduction::scaleMatrixVector() hypre_ParVector *b_csr, *b2_csr; hypre_ParCSRCommPkg *commPkg; hypre_ParCSRCommHandle *commHandle; - + //----------------------------------------------------------------------- - // fetch matrix and parameters + // fetch matrix and parameters //----------------------------------------------------------------------- MPI_Comm_rank( mpiComm_, &mypid ); @@ -3532,7 +3531,7 @@ int HYPRE_SlideReduction::scaleMatrixVector() for ( irow = 0; irow < localNRows; irow++ ) { scaleVec[irow] = 0.0; - rowLengs[irow] = ADiagI[irow+1] - ADiagI[irow] + + rowLengs[irow] = ADiagI[irow+1] - ADiagI[irow] + AOffdI[irow+1] - AOffdI[irow]; if ( rowLengs[irow] > maxRowLeng ) maxRowLeng = rowLengs[irow]; for ( jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++ ) @@ -3593,13 +3592,13 @@ int HYPRE_SlideReduction::scaleMatrixVector() for ( jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++ ) { index = ADiagJ[jcol]; - colInd[rowSize] = index + startRow; + colInd[rowSize] = index + startRow; colVal[rowSize++] = scaleVec[irow]*scaleVec[index]*ADiagA[jcol]; } for ( jcol = AOffdI[irow]; jcol < AOffdI[irow+1]; jcol++ ) { index = AOffdJ[jcol]; - colInd[rowSize] = offdMap[index]; + colInd[rowSize] = offdMap[index]; colVal[rowSize++] = scaleVec[irow]*extScaleVec[index]*AOffdA[jcol]; } rowInd = irow + startRow; @@ -3621,7 +3620,7 @@ int HYPRE_SlideReduction::scaleMatrixVector() ierr += HYPRE_IJVectorAssemble(newB); ierr += HYPRE_IJVectorGetObject(newB, (void **) &b2_csr); b2Data = hypre_VectorData(hypre_ParVectorLocalVector(b2_csr)); - assert( !ierr ); + hypre_assert( !ierr ); for ( irow = 0; irow < localNRows; irow++ ) b2Data[irow] = bData[irow] * scaleVec[irow]; @@ -3680,32 +3679,32 @@ double HYPRE_SlideReduction::matrixCondEst(int globalRowID, int globalColID, { searchIndex = hypre_BinarySearch(localBlkInfo, constrBlkInfo_[irow], localBlkCnt); - if ( searchIndex >= 0 ) + if ( searchIndex >= 0 ) rowIndices[matDim++] = endRow - nConstraints + irow + 1; } hypre_qsort0(rowIndices, 0, matDim-1); matrix = hypre_TAlloc(double*, matDim , HYPRE_MEMORY_HOST); localSlaveEqns = new int[nConstraints]; localSlaveAuxs = new int[nConstraints]; - for ( irow = 0; irow < nConstraints; irow++ ) - localSlaveEqns[irow] = slaveEqnList_[irow]; - localSlaveEqns[globalRowID-(endRow+1-nConstraints)] = globalColID; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) + localSlaveEqns[irow] = slaveEqnList_[irow]; + localSlaveEqns[globalRowID-(endRow+1-nConstraints)] = globalColID; + for ( irow = 0; irow < nConstraints; irow++ ) localSlaveAuxs[irow] = irow; HYPRE_LSI_qsort1a(localSlaveEqns, localSlaveAuxs, 0, nConstraints-1); - for ( irow = 0; irow < matDim; irow++ ) + for ( irow = 0; irow < matDim; irow++ ) { matrix[irow] = hypre_TAlloc(double, matDim , HYPRE_MEMORY_HOST); for ( jcol = 0; jcol < matDim; jcol++ ) matrix[irow][jcol] = 0.0; } - for ( irow = 0; irow < matDim; irow++ ) + for ( irow = 0; irow < matDim; irow++ ) { rowIndex = rowIndices[irow]; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { - searchIndex = hypre_BinarySearch(localSlaveEqns,colInd[jcol], + searchIndex = hypre_BinarySearch(localSlaveEqns,colInd[jcol], nConstraints); if ( searchIndex >= 0 ) { @@ -3718,9 +3717,9 @@ double HYPRE_SlideReduction::matrixCondEst(int globalRowID, int globalColID, } #if 0 if ( matDim <= 4 ) - for ( irow = 0; irow < matDim; irow++ ) + for ( irow = 0; irow < matDim; irow++ ) { - for ( jcol = 0; jcol < matDim; jcol++ ) + for ( jcol = 0; jcol < matDim; jcol++ ) printf(" %e ", matrix[irow][jcol]); printf("\n"); } @@ -3730,9 +3729,9 @@ double HYPRE_SlideReduction::matrixCondEst(int globalRowID, int globalColID, else { retVal = 0.0; - for ( irow = 0; irow < matDim; irow++ ) + for ( irow = 0; irow < matDim; irow++ ) { - for ( jcol = 0; jcol < matDim; jcol++ ) + for ( jcol = 0; jcol < matDim; jcol++ ) { value = habs(matrix2[irow][jcol]); retVal = ( value > retVal ) ? value : retVal; @@ -3762,7 +3761,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) { int mypid, nprocs, *procNRows, startRow, endRow; int nConstraints, irow, jcol, rowSize, ncnt, *colInd; - int nCandidates, *candidateList; + int nCandidates, *candidateList; int *constrListAux, colIndex, searchIndex, newEndRow; int *constrListAux2; int constrIndex, uBound, lBound, nSum, nPairs, index; @@ -3788,7 +3787,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) (*couplings)[0] = nPairs; //------------------------------------------------------------------ - // compose candidate slave list (slaves in candidateList, corresponding + // compose candidate slave list (slaves in candidateList, corresponding // constraint equation in constrListAux and constrListAux2) //------------------------------------------------------------------ @@ -3809,16 +3808,16 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) uBound = procNRows[mypid+1]; lBound = uBound - nConstraints; - for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) + for ( irow = startRow; irow <= endRow-nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); ncnt = 0; constrListAux[nCandidates] = -1; constrListAux2[nCandidates] = -1; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if ( colIndex >= lBound && colIndex < uBound ) + if ( colIndex >= lBound && colIndex < uBound ) { ncnt++; @@ -3830,7 +3829,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) if ( ncnt > 2 ) break; } HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); - if ( ncnt == 2 ) + if ( ncnt == 2 ) { if ( constrListAux[nCandidates] > newEndRow && constrListAux[nCandidates] <= endRow && @@ -3839,7 +3838,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) { candidateList[nCandidates++] = irow; if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) - printf("%4d : findSlaveEqns2 - candidate %d = %d\n", + printf("%4d : findSlaveEqns2 - candidate %d = %d\n", mypid, nCandidates-1, irow); } } @@ -3854,21 +3853,21 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) // (search for candidates column index with maximum magnitude) // ==> slaveEqnList_ //--------------------------------------------------------------------- - + nPairs = 0; searchIndex = 0; - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { if ( slaveEqnList_[irow-endRow+nConstraints-1] == -1 ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); searchIndex = -1; searchValue = -1.0E10; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { - if (colVal[jcol] != 0.0 && colInd[jcol] >= startRow - && colInd[jcol] <= (endRow-nConstraints)) + if (colVal[jcol] != 0.0 && colInd[jcol] >= startRow + && colInd[jcol] <= (endRow-nConstraints)) { colIndex = hypre_BinarySearch(candidateList, colInd[jcol], nCandidates); @@ -3880,10 +3879,10 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) if ( colIndex >= 0 ) { constrIndex = constrListAux[colIndex]; - if ( constrIndex == irow ) + if ( constrIndex == irow ) constrIndex = constrListAux2[colIndex]; if (slaveEqnList_[constrIndex-endRow+nConstraints-1] != -1) - { + { if ( habs(colVal[jcol]) > searchValue ) { searchValue = habs(colVal[jcol]); @@ -3892,7 +3891,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) } } } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); if ( searchIndex >= 0 ) { @@ -3904,8 +3903,8 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) printf("%4d : findSlaveEqns2 - constr %d <=> slave %d\n", mypid, irow, searchIndex); - } - else + } + else { if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { @@ -3933,7 +3932,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) for ( irow = 0; irow < nConstraints; irow++ ) if ( slaveEqnList_[irow] == -1 ) ncnt++; MPI_Allreduce(&ncnt, &nSum, 1, MPI_INT, MPI_SUM, mpiComm_); - if ( nSum > 0 ) + if ( nSum > 0 ) { if ( mypid == 0 && ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { @@ -3944,7 +3943,7 @@ int HYPRE_SlideReduction::findSlaveEqns2(int **couplings) if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { for ( irow = 0; irow < nConstraints; irow++ ) - if ( slaveEqnList_[irow] == -1 ) + if ( slaveEqnList_[irow] == -1 ) { printf("%4d : findSlaveEqns2 - unsatisfied constraint",mypid); printf(" equation = %d\n", irow+endRow-nConstraints+1); @@ -3986,7 +3985,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() globalNConstr = procNConstr_[nprocs]; globalNRows = procNRows[nprocs]; nConstraints = procNConstr_[mypid+1] - procNConstr_[mypid]; - + //****************************************************************** // extract A21 from A //------------------------------------------------------------------ @@ -4004,7 +4003,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() { printf("%4d : buildReducedMatrix - A21StartRow = %d\n", mypid, A21StartRow); - printf("%4d : buildReducedMatrix - A21GlobalDim = %d %d\n", mypid, + printf("%4d : buildReducedMatrix - A21GlobalDim = %d %d\n", mypid, A21GlobalNRows, A21GlobalNCols); printf("%4d : buildReducedMatrix - A21LocalDim = %d %d\n",mypid, A21NRows, A21NCols); @@ -4017,7 +4016,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() ierr = HYPRE_IJMatrixCreate(mpiComm_,A21StartRow,A21StartRow+A21NRows-1, A21StartCol,A21StartCol+A21NCols-1,&A21mat_); ierr += HYPRE_IJMatrixSetObjectType(A21mat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in the first nConstraint row of A21 @@ -4028,26 +4027,26 @@ int HYPRE_SlideReduction::buildReducedMatrix2() rowCount = maxRowSize = 0; newEndRow = endRow - nConstraints; A21MatSize = new int[A21NRows]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if ( colVal[jcol] != 0.0 ) + if ( colVal[jcol] != 0.0 ) { - if ( colIndex <= newEndRow || colIndex > endRow ) + if ( colIndex <= newEndRow || colIndex > endRow ) { - searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, + searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, globalNConstr); if ( searchIndex < 0 ) newRowSize++; } @@ -4064,11 +4063,11 @@ int HYPRE_SlideReduction::buildReducedMatrix2() //------------------------------------------------------------------ rowCount = nConstraints; - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { if ( colVal[jcol] != 0.0 ) { @@ -4076,7 +4075,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() if ( colIndex <= newEndRow || colIndex > endRow ) { searchIndex = hypre_BinarySearch(gSlaveEqnList_,colIndex, - globalNConstr); + globalNConstr); if ( searchIndex < 0 ) newRowSize++; } } @@ -4098,7 +4097,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() ierr = HYPRE_IJMatrixSetRowSizes(A21mat_, A21MatSize); ierr += HYPRE_IJMatrixInitialize(A21mat_); - assert(!ierr); + hypre_assert(!ierr); delete [] A21MatSize; //------------------------------------------------------------------ @@ -4112,26 +4111,26 @@ int HYPRE_SlideReduction::buildReducedMatrix2() for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { if ( colVal[jcol] != 0.0 ) { colIndex = colInd[jcol]; - if ( colIndex <= newEndRow || colIndex > endRow ) + if ( colIndex <= newEndRow || colIndex > endRow ) { - searchIndex = HYPRE_LSI_Search(gSlaveEqnList_,colIndex, - globalNConstr); - if ( searchIndex < 0 ) + searchIndex = HYPRE_LSI_Search(gSlaveEqnList_,colIndex, + globalNConstr); + if ( searchIndex < 0 ) { for ( procIndex = 0; procIndex < nprocs; procIndex++ ) if ( procNRows[procIndex] > colIndex ) break; @@ -4142,14 +4141,14 @@ int HYPRE_SlideReduction::buildReducedMatrix2() if ( newColIndex < 0 || newColIndex >= A21GlobalNCols ) { printf("%4d : buildReducedMatrix ERROR - A21",mypid); - printf(" out of range (%d,%d (%d))\n", rowCount, + printf(" out of range (%d,%d (%d))\n", rowCount, colIndex, A21GlobalNCols); for ( is = 0; is < rowSize; is++ ) printf("%4d : row %7d has col = %7d\n",mypid,rowIndex, colInd[is]); exit(1); - } - if ( newRowSize > maxRowSize+1 ) + } + if ( newRowSize > maxRowSize+1 ) { if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 2 ) { @@ -4159,7 +4158,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() } } } - } + } } HYPRE_IJMatrixSetValues(A21mat_,1,&newRowSize,(const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); @@ -4171,14 +4170,14 @@ int HYPRE_SlideReduction::buildReducedMatrix2() // next load the second nConstraint rows to A21 extracted from A //------------------------------------------------------------------ - for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) + for ( irow = endRow-nConstraints+1; irow <= endRow; irow++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,irow,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - if (colVal[jcol] != 0.0 && + if (colVal[jcol] != 0.0 && (colIndex <= newEndRow || colIndex > endRow)) { searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, @@ -4192,7 +4191,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() newColInd[newRowSize] = newColIndex; newColVal[newRowSize++] = colVal[jcol]; } - } + } } HYPRE_IJMatrixSetValues(A21mat_,1,&newRowSize,(const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); @@ -4215,13 +4214,13 @@ int HYPRE_SlideReduction::buildReducedMatrix2() sprintf(fname, "A21.%d", mypid); FILE *fp = fopen(fname, "w"); - if ( mypid == 0 ) + if ( mypid == 0 ) { printf("====================================================\n"); printf("%4d : Printing A21 matrix... \n", mypid); fflush(stdout); } - for (irow = A21StartRow;irow < A21StartRow+2*nConstraints;irow++) + for (irow = A21StartRow;irow < A21StartRow+2*nConstraints;irow++) { HYPRE_ParCSRMatrixGetRow(A21_csr,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -4231,7 +4230,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow(A21_csr, irow, &rowSize, &colInd, &colVal); } - if ( mypid == 0 ) + if ( mypid == 0 ) printf("====================================================\n"); fclose(fp); } @@ -4243,7 +4242,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() //------------------------------------------------------------------ //------------------------------------------------------------------ - // first extract the (2,1) block of A22 + // first extract the (2,1) block of A22 // ( constraints-to-local slaves ) //------------------------------------------------------------------ @@ -4254,41 +4253,41 @@ int HYPRE_SlideReduction::buildReducedMatrix2() CT_JA = new int[nConstraints*2]; CT_AA = new double[nConstraints*2]; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = newEndRow + 1 + irow; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); - CT_JA[irow*2] = CT_JA[irow*2+1] = -1; - CT_AA[irow*2] = CT_AA[irow*2+1] = 0.0; + CT_JA[irow*2] = CT_JA[irow*2+1] = -1; + CT_AA[irow*2] = CT_AA[irow*2+1] = 0.0; for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; searchIndex = hypre_BinarySearch(slaveEqnList_,colIndex,nConstraints); - if ( searchIndex >= 0 ) + if ( searchIndex >= 0 ) { if ( CT_JA[irow*2] == -1 ) { CT_JA[irow*2] = slaveEqnListAux_[searchIndex]; - CT_AA[irow*2] = colVal[jcol]; + CT_AA[irow*2] = colVal[jcol]; } else { CT_JA[irow*2+1] = slaveEqnListAux_[searchIndex]; - CT_AA[irow*2+1] = colVal[jcol]; + CT_AA[irow*2+1] = colVal[jcol]; } } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } //------------------------------------------------------------------ - // invert the (2,1) block of A22 + // invert the (2,1) block of A22 //------------------------------------------------------------------ if ( nConstraints > 0 ) rowTags = new int[nConstraints]; - for ( irow = 0; irow < nConstraints; irow++ ) rowTags[irow] = -1; + for ( irow = 0; irow < nConstraints; irow++ ) rowTags[irow] = -1; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { if ( rowTags[irow] == -1 ) { @@ -4321,7 +4320,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() mat2X2[1] = CT_AA[2*rowIndex]; } rowTags[rowIndex] = 0; - denom = mat2X2[0] * mat2X2[3] - mat2X2[1] * mat2X2[2]; + denom = mat2X2[0] * mat2X2[3] - mat2X2[1] * mat2X2[2]; denom = 1.0 / denom; CT_JA[irow*2] = irow; CT_AA[irow*2] = mat2X2[3] * denom; @@ -4331,55 +4330,55 @@ int HYPRE_SlideReduction::buildReducedMatrix2() CT_AA[rowIndex*2] = mat2X2[0] * denom; CT_JA[rowIndex*2+1] = irow; CT_AA[rowIndex*2+1] = - mat2X2[1] * denom; - } + } rowTags[irow] = 0; - } - } + } + } if ( nConstraints > 0 ) delete [] rowTags; //------------------------------------------------------------------ - // form ParCSRMatrix of the (2,1) block of A22 + // form ParCSRMatrix of the (2,1) block of A22 //------------------------------------------------------------------ int *hypreCTMatSize; hypre_ParCSRMatrix *hypreCT; HYPRE_IJMatrix IJCT; - ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJCT); ierr += HYPRE_IJMatrixSetObjectType(IJCT, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreCTMatSize = new int[nConstraints]; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { hypreCTMatSize[irow] = 1; - if ( CT_JA[irow*2+1] != -1 && CT_AA[irow*2+1] != 0.0 ) + if ( CT_JA[irow*2+1] != -1 && CT_AA[irow*2+1] != 0.0 ) hypreCTMatSize[irow]++; } ierr = HYPRE_IJMatrixSetRowSizes(IJCT, hypreCTMatSize); ierr = HYPRE_IJMatrixInitialize(IJCT); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreCTMatSize; newColInd = new int[2]; newColVal = new double[2]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; newColInd[0] = CT_JA[irow*2] + procNConstr_[mypid]; newColVal[0] = CT_AA[irow*2]; newRowSize = 1; - if ( CT_JA[irow*2+1] != -1 && CT_AA[irow*2+1] != 0.0 ) + if ( CT_JA[irow*2+1] != -1 && CT_AA[irow*2+1] != 0.0 ) { newColInd[1] = CT_JA[irow*2+1] + procNConstr_[mypid]; newColVal[1] = CT_AA[irow*2+1]; newRowSize++; } ierr = HYPRE_IJMatrixSetValues(IJCT, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } delete [] newColInd; delete [] newColVal; @@ -4393,7 +4392,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreCT); //------------------------------------------------------------------ - // next extract the (1,2) block of A22 + // next extract the (1,2) block of A22 // ( local slaves-to-constraints ) //------------------------------------------------------------------ @@ -4404,19 +4403,19 @@ int HYPRE_SlideReduction::buildReducedMatrix2() C_JA = new int[nConstraints*2]; C_AA = new double[nConstraints*2]; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); - C_JA[irow*2] = C_JA[irow*2+1] = -1; - C_AA[irow*2] = C_AA[irow*2+1] = 0.0; + C_JA[irow*2] = C_JA[irow*2+1] = -1; + C_AA[irow*2] = C_AA[irow*2+1] = 0.0; for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; @@ -4425,26 +4424,26 @@ int HYPRE_SlideReduction::buildReducedMatrix2() if ( C_JA[irow*2] == -1 ) { C_JA[irow*2] = colIndex - newEndRow - 1; - C_AA[irow*2] = colVal[jcol]; + C_AA[irow*2] = colVal[jcol]; } else { C_JA[irow*2+1] = colIndex - newEndRow - 1; - C_AA[irow*2+1] = colVal[jcol]; + C_AA[irow*2+1] = colVal[jcol]; } } - } + } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } //------------------------------------------------------------------ - // invert the (1,2) block of A22 + // invert the (1,2) block of A22 //------------------------------------------------------------------ if ( nConstraints > 0 ) rowTags = new int[nConstraints]; - for ( irow = 0; irow < nConstraints; irow++ ) rowTags[irow] = -1; + for ( irow = 0; irow < nConstraints; irow++ ) rowTags[irow] = -1; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { if ( rowTags[irow] == -1 ) { @@ -4477,7 +4476,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() mat2X2[1] = C_AA[2*rowIndex]; } rowTags[rowIndex] = 0; - denom = mat2X2[0] * mat2X2[3] - mat2X2[1] * mat2X2[3]; + denom = mat2X2[0] * mat2X2[3] - mat2X2[1] * mat2X2[3]; denom = 1.0 / denom; C_JA[irow*2] = irow; C_AA[irow*2] = mat2X2[3] * denom; @@ -4487,55 +4486,55 @@ int HYPRE_SlideReduction::buildReducedMatrix2() C_AA[rowIndex*2] = mat2X2[0] * denom; C_JA[rowIndex*2+1] = irow; C_AA[rowIndex*2+1] = - mat2X2[1] * denom; - } + } rowTags[irow] = 0; - } - } + } + } if ( nConstraints > 0 ) delete [] rowTags; //------------------------------------------------------------------ - // form ParCSRMatrix of the (1,2) block of A22 + // form ParCSRMatrix of the (1,2) block of A22 //------------------------------------------------------------------ int *hypreCMatSize; hypre_ParCSRMatrix *hypreC; HYPRE_IJMatrix IJC; - ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJC); ierr += HYPRE_IJMatrixSetObjectType(IJC, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreCMatSize = new int[nConstraints]; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { hypreCMatSize[irow] = 1; - if ( C_JA[irow*2+1] != -1 && C_AA[irow*2+1] != 0.0 ) + if ( C_JA[irow*2+1] != -1 && C_AA[irow*2+1] != 0.0 ) hypreCMatSize[irow]++; } ierr = HYPRE_IJMatrixSetRowSizes(IJC, hypreCMatSize); ierr = HYPRE_IJMatrixInitialize(IJC); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreCMatSize; newColInd = new int[2]; newColVal = new double[2]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; newColInd[0] = C_JA[irow*2] + procNConstr_[mypid]; newColVal[0] = C_AA[irow*2]; newRowSize = 1; - if ( C_JA[irow*2+1] != -1 && C_AA[irow*2+1] != 0.0 ) + if ( C_JA[irow*2+1] != -1 && C_AA[irow*2+1] != 0.0 ) { newColInd[1] = C_JA[irow*2+1] + procNConstr_[mypid]; newColVal[1] = C_AA[irow*2+1]; newRowSize++; } ierr = HYPRE_IJMatrixSetValues(IJC, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } delete [] newColInd; delete [] newColVal; @@ -4557,29 +4556,29 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_IJMatrix IJB; ierr = HYPRE_IJMatrixCreate(mpiComm_, procNConstr_[mypid], - procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], + procNConstr_[mypid]+nConstraints-1, procNConstr_[mypid], procNConstr_[mypid]+nConstraints-1, &IJB); ierr = HYPRE_IJMatrixSetObjectType(IJB, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) hypreBMatSize = new int[nConstraints]; maxRowSize = 0; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, - globalNConstr); + searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, + globalNConstr); if ( searchIndex >= 0 ) newRowSize++; } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); @@ -4588,7 +4587,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() } ierr = HYPRE_IJMatrixSetRowSizes(IJB, hypreBMatSize); ierr = HYPRE_IJMatrixInitialize(IJB); - assert(!ierr); + hypre_assert(!ierr); if ( nConstraints > 0 ) delete [] hypreBMatSize; if ( maxRowSize > 0 ) @@ -4596,35 +4595,35 @@ int HYPRE_SlideReduction::buildReducedMatrix2() newColInd = new int[maxRowSize]; newColVal = new double[maxRowSize]; } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { - for ( is = 0; is < nConstraints; is++ ) + for ( is = 0; is < nConstraints; is++ ) { - if ( slaveEqnListAux_[is] == irow ) + if ( slaveEqnListAux_[is] == irow ) { - rowIndex = slaveEqnList_[is]; + rowIndex = slaveEqnList_[is]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; - searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, - globalNConstr); - if ( searchIndex >= 0 ) + searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, + globalNConstr); + if ( searchIndex >= 0 ) { newColInd[newRowSize] = gSlaveEqnListAux_[searchIndex]; newColVal[newRowSize++] = - colVal[jcol]; } } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); - rowIndex = procNConstr_[mypid] + irow; + rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_IJMatrixSetValues(IJB, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert( !ierr ); + hypre_assert( !ierr ); } HYPRE_IJMatrixAssemble(IJB); HYPRE_IJMatrixGetObject(IJB, (void **) &hypreB); @@ -4644,10 +4643,10 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixPrint((HYPRE_ParCSRMatrix) hypreCT, fname); strcpy( fname, "hypreB" ); HYPRE_ParCSRMatrixPrint((HYPRE_ParCSRMatrix) hypreB, fname); - hypre_BoomerAMGBuildCoarseOperator(hypreCT, hypreB, hypreCT, + hypre_BoomerAMGBuildCoarseOperator(hypreCT, hypreB, hypreCT, (hypre_ParCSRMatrix **) &hypreCBC); #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,irow,&rowSize, &colInd,&colVal); @@ -4656,7 +4655,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,irow,&rowSize, &colInd,&colVal); } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreB,irow,&rowSize, &colInd,&colVal); @@ -4665,7 +4664,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreB,irow,&rowSize, &colInd,&colVal); } - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -4692,11 +4691,11 @@ int HYPRE_SlideReduction::buildReducedMatrix2() // create a matrix context for A22 //------------------------------------------------------------------ - ierr = HYPRE_IJMatrixCreate(mpiComm_, invA22StartRow, - invA22StartRow+invA22NRows-1, invA22StartCol, + ierr = HYPRE_IJMatrixCreate(mpiComm_, invA22StartRow, + invA22StartRow+invA22NRows-1, invA22StartCol, invA22StartCol+invA22NCols-1, &invA22mat_); ierr += HYPRE_IJMatrixSetObjectType(invA22mat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the no. of nonzeros in the first nConstraint row of invA22 @@ -4704,12 +4703,12 @@ int HYPRE_SlideReduction::buildReducedMatrix2() maxRowSize = 0; invA22MatSize = new int[invA22NRows]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,NULL,NULL); - assert( !ierr ); + hypre_assert( !ierr ); invA22MatSize[irow] = rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,NULL,NULL); @@ -4717,12 +4716,12 @@ int HYPRE_SlideReduction::buildReducedMatrix2() } //------------------------------------------------------------------ - // compute the number of nonzeros in the second nConstraints row of + // compute the number of nonzeros in the second nConstraints row of // invA22 (consisting of [D and A22 block]) //------------------------------------------------------------------ #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -4732,18 +4731,18 @@ int HYPRE_SlideReduction::buildReducedMatrix2() &colInd,&colVal); } #endif - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize += rowSize; HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize,&colInd,&colVal); @@ -4756,7 +4755,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() //------------------------------------------------------------------ #if 0 - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,irow,&rowSize, &colInd,&colVal); @@ -4768,7 +4767,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() #endif ierr = HYPRE_IJMatrixSetRowSizes(invA22mat_, invA22MatSize); ierr += HYPRE_IJMatrixInitialize(invA22mat_); - assert(!ierr); + hypre_assert(!ierr); delete [] invA22MatSize; //------------------------------------------------------------------ @@ -4778,12 +4777,12 @@ int HYPRE_SlideReduction::buildReducedMatrix2() newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = 0; for ( jcol = 0; jcol < rowSize; jcol++ ) { @@ -4794,10 +4793,10 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCT,rowIndex, &rowSize,&colInd,&colVal); rowCount = invA22StartRow + irow; - ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &rowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &rowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } @@ -4805,12 +4804,12 @@ int HYPRE_SlideReduction::buildReducedMatrix2() // next load the second nConstraints rows to A22 extracted from A //------------------------------------------------------------------ - for ( irow = 0; irow < nConstraints; irow++ ) + for ( irow = 0; irow < nConstraints; irow++ ) { rowIndex = procNConstr_[mypid] + irow; ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreC,rowIndex, &rowSize,&colInd,&colVal); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = 0; for ( jcol = 0; jcol < rowSize; jcol++ ) { @@ -4821,7 +4820,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() &rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize2,&colInd2,&colVal2); - assert( !ierr ); + hypre_assert( !ierr ); for ( jcol = 0; jcol < rowSize2; jcol++ ) { newColInd[newRowSize] = colInd2[jcol] + procNConstr_[mypid] + @@ -4831,10 +4830,10 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreCBC,rowIndex, &rowSize2,&colInd2,&colVal2); rowCount = invA22StartRow + nConstraints + irow; - ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22mat_, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; @@ -4855,13 +4854,13 @@ int HYPRE_SlideReduction::buildReducedMatrix2() sprintf(fname, "invA22.%d", mypid); FILE *fp = fopen(fname, "w"); - if ( mypid == ncnt ) + if ( mypid == ncnt ) { printf("====================================================\n"); printf("%4d : Printing invA22 matrix... \n", mypid); fflush(stdout); } - for (irow=invA22StartRow; irow < invA22StartRow+invA22NRows;irow++) + for (irow=invA22StartRow; irow < invA22StartRow+invA22NRows;irow++) { HYPRE_ParCSRMatrixGetRow(invA22_csr,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -4871,7 +4870,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() HYPRE_ParCSRMatrixRestoreRow(invA22_csr,irow,&rowSize,&colInd, &colVal); } - if ( mypid == ncnt ) + if ( mypid == ncnt ) printf("====================================================\n"); fclose(fp); } @@ -4888,11 +4887,11 @@ int HYPRE_SlideReduction::buildReducedMatrix2() hypre_BoomerAMGBuildCoarseOperator((hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix *) invA22_csr, - (hypre_ParCSRMatrix *) A21_csr, + (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix **) &RAP_csr); if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) - printf("%4d : buildReducedMatrix - Triple matrix product ends\n", + printf("%4d : buildReducedMatrix - Triple matrix product ends\n", mypid); if ( outputLevel_ >= 4 ) @@ -4906,7 +4905,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() printf("%4d : Printing RAP matrix... \n", mypid); fflush(stdout); } - for ( irow = A21StartRow; irow < A21StartRow+A21NCols; irow++ ) + for ( irow = A21StartRow; irow < A21StartRow+A21NCols; irow++ ) { HYPRE_ParCSRMatrixGetRow(RAP_csr,irow,&rowSize,&colInd,&colVal); for ( jcol = 0; jcol < rowSize; jcol++ ) @@ -4939,9 +4938,9 @@ int HYPRE_SlideReduction::buildReducedMatrix2() if ( ( outputLevel_ & HYPRE_BITMASK2 ) >= 1 ) { - printf("%4d : buildReducedMatrix - reduceAGlobalDim = %d %d\n", mypid, + printf("%4d : buildReducedMatrix - reduceAGlobalDim = %d %d\n", mypid, reducedAGlobalNRows, reducedAGlobalNCols); - printf("%4d : buildReducedMatrix - reducedALocalDim = %d %d\n", mypid, + printf("%4d : buildReducedMatrix - reducedALocalDim = %d %d\n", mypid, reducedANRows, reducedANCols); } @@ -4953,16 +4952,16 @@ int HYPRE_SlideReduction::buildReducedMatrix2() reducedAStartRow+reducedANRows-1, reducedAStartCol, reducedAStartCol+reducedANCols-1,&reducedAmat_); ierr += HYPRE_IJMatrixSetObjectType(reducedAmat_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for reducedA //------------------------------------------------------------------ rowCount = maxRowSize = 0; - for ( irow = startRow; irow <= newEndRow; irow++ ) + for ( irow = startRow; irow <= newEndRow; irow++ ) { - searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); + searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); if ( searchIndex >= 0 ) reducedAMatSize[rowCount++] = 1; else { @@ -4970,17 +4969,17 @@ int HYPRE_SlideReduction::buildReducedMatrix2() rowIndex = reducedAStartRow + rowCount; ierr = HYPRE_ParCSRMatrixGetRow(RAP_csr,rowIndex,&rowSize2, &colInd2, NULL); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize + rowSize2; maxRowSize = ( newRowSize > maxRowSize ) ? newRowSize : maxRowSize; newColInd = new int[newRowSize]; for (jcol = 0; jcol < rowSize; jcol++) newColInd[jcol] = colInd[jcol]; - for (jcol = 0; jcol < rowSize2; jcol++) + for (jcol = 0; jcol < rowSize2; jcol++) newColInd[rowSize+jcol] = colInd2[jcol]; hypre_qsort0(newColInd, 0, newRowSize-1); ncnt = 0; - for ( jcol = 1; jcol < newRowSize; jcol++ ) - if (newColInd[jcol] != newColInd[ncnt]) + for ( jcol = 1; jcol < newRowSize; jcol++ ) + if (newColInd[jcol] != newColInd[ncnt]) newColInd[++ncnt] = newColInd[jcol]; if ( newRowSize > 0 ) ncnt++; reducedAMatSize[rowIndex++] = ncnt; @@ -4988,25 +4987,25 @@ int HYPRE_SlideReduction::buildReducedMatrix2() ierr = HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowIndex,&rowSize2, &colInd2,NULL); delete [] newColInd; - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; } } ierr = HYPRE_IJMatrixSetRowSizes(reducedAmat_, reducedAMatSize); ierr += HYPRE_IJMatrixInitialize(reducedAmat_); - assert(!ierr); + hypre_assert(!ierr); delete [] reducedAMatSize; //------------------------------------------------------------------ - // load the reducedA matrix + // load the reducedA matrix //------------------------------------------------------------------ rowCount = 0; newColInd = new int[maxRowSize+1]; newColVal = new double[maxRowSize+1]; - for ( irow = startRow; irow <= newEndRow; irow++ ) + for ( irow = startRow; irow <= newEndRow; irow++ ) { - searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); + searchIndex = hypre_BinarySearch(slaveEqnList_, irow, nConstraints); rowIndex = reducedAStartRow + rowCount; if ( searchIndex >= 0 ) { @@ -5021,53 +5020,53 @@ int HYPRE_SlideReduction::buildReducedMatrix2() &colVal2); newRowSize = rowSize + rowSize2; ncnt = 0; - for ( jcol = 0; jcol < rowSize; jcol++ ) + for ( jcol = 0; jcol < rowSize; jcol++ ) { colIndex = colInd[jcol]; for ( procIndex = 0; procIndex < nprocs; procIndex++ ) if ( procNRows[procIndex] > colIndex ) break; - uBound = procNRows[procIndex] - + uBound = procNRows[procIndex] - (procNConstr_[procIndex]-procNConstr_[procIndex-1]); procIndex--; - if ( colIndex < uBound ) + if ( colIndex < uBound ) { searchIndex = hypre_BinarySearch(gSlaveEqnList_, colIndex, globalNConstr); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { newColInd[ncnt] = colIndex - procNConstr_[procIndex]; - newColVal[ncnt++] = colVal[jcol]; + newColVal[ncnt++] = colVal[jcol]; } } } - for ( jcol = 0; jcol < rowSize2; jcol++ ) + for ( jcol = 0; jcol < rowSize2; jcol++ ) { - newColInd[ncnt+jcol] = colInd2[jcol]; - newColVal[ncnt+jcol] = - colVal2[jcol]; + newColInd[ncnt+jcol] = colInd2[jcol]; + newColVal[ncnt+jcol] = - colVal2[jcol]; } newRowSize = ncnt + rowSize2; hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncnt = 0; - for ( jcol = 0; jcol < newRowSize; jcol++ ) + for ( jcol = 0; jcol < newRowSize; jcol++ ) { - if ( jcol != ncnt && newColInd[jcol] == newColInd[ncnt] ) + if ( jcol != ncnt && newColInd[jcol] == newColInd[ncnt] ) newColVal[ncnt] += newColVal[jcol]; - else if ( newColInd[jcol] != newColInd[ncnt] ) + else if ( newColInd[jcol] != newColInd[ncnt] ) { ncnt++; newColVal[ncnt] = newColVal[jcol]; newColInd[ncnt] = newColInd[jcol]; - } - } + } + } newRowSize = ncnt + 1; HYPRE_ParCSRMatrixRestoreRow(A_csr,irow,&rowSize,&colInd,&colVal); HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowIndex,&rowSize2,&colInd2, &colVal2); } - ierr = HYPRE_IJMatrixSetValues(reducedAmat_, 1, &newRowSize, - (const int *) &rowIndex, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(reducedAmat_, 1, &newRowSize, + (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); rowCount++; } delete [] newColInd; @@ -5091,7 +5090,7 @@ int HYPRE_SlideReduction::buildReducedMatrix2() printf("====================================================\n"); printf("%4d : Printing reducedA matrix... \n", mypid); fflush(stdout); - for ( irow = reducedAStartRow; + for ( irow = reducedAStartRow; irow < reducedAStartRow+localNRows-nConstraints; irow++ ) { //printf("%d : reducedA ROW %d\n", mypid, irow); diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_TFQmr.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_TFQmr.c index 58c5644f3..cae959567 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_TFQmr.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_TFQmr.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgs.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgs.c index 7fbcd9074..fec15ee1b 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgs.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgs.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgstabl.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgstabl.c index 3087f1cb2..c0e7da3d0 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgstabl.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_bicgstabl.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_fgmres.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_fgmres.c index a75d5b13d..4eab4a900 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_fgmres.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_fgmres.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_lsicg.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_lsicg.c index 32b28f229..b45926b94 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_lsicg.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_lsicg.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_maxwell.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_maxwell.c index 942c8e401..eb9298b45 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_maxwell.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_maxwell.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" @@ -54,11 +53,11 @@ int HYPRE_ParCSRCotreeCreate(MPI_Comm comm, HYPRE_Solver *solver) { hypre_CotreeData *cotree_data; void *void_data; - + cotree_data = hypre_CTAlloc(hypre_CotreeData, 1, HYPRE_MEMORY_HOST); void_data = (void *) cotree_data; *solver = (HYPRE_Solver) void_data; - + (cotree_data -> Aee) = NULL; (cotree_data -> Acc) = NULL; (cotree_data -> Act) = NULL; @@ -82,7 +81,7 @@ int HYPRE_ParCSRCotreeDestroy(HYPRE_Solver solver) { void *cotree_vdata = (void *) solver; hypre_CotreeData *cotree_data = (hypre_CotreeData *) cotree_vdata; - + if (cotree_data) { hypre_TFree(cotree_data, HYPRE_MEMORY_HOST); @@ -163,16 +162,16 @@ int HYPRE_ParCSRCotreeSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, &submatrices); cotree_data->Gt = submatrices[0]; cotree_data->Gc = submatrices[1]; - free(submatrices); + hypre_TFree(submatrices, HYPRE_MEMORY_HOST); comm = hypre_ParCSRMatrixComm((hypre_ParCSRMatrix *) A); MPI_Comm_size(comm, &nprocs); partition = hypre_ParVectorPartitioning((hypre_ParVector *) b); new_partition = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); for (ii = 0; ii <= nprocs; ii++) new_partition[ii] = partition[ii]; -/* partition = hypre_ParVectorPartitioning((hypre_ParVector *) b); */ + /* partition = hypre_ParVectorPartitioning((hypre_ParVector *) b); */ new_vector = hypre_ParVectorCreate(hypre_ParVectorComm((hypre_ParVector *)b), - (int) hypre_ParVectorGlobalSize((hypre_ParVector *) b), + (int) hypre_ParVectorGlobalSize((hypre_ParVector *) b), new_partition); hypre_ParVectorInitialize(new_vector); cotree_data->w = new_vector; diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_superlu.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_superlu.c index 1bf06d130..67500ee77 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_superlu.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_superlu.c @@ -59,7 +59,7 @@ HYPRE_SuperLU; #endif /*************************************************************************** - * HYPRE_ParCSR_SuperLUCreate - Return a SuperLU object "solver". + * HYPRE_ParCSR_SuperLUCreate - Return a SuperLU object "solver". *--------------------------------------------------------------------------*/ int HYPRE_ParCSR_SuperLUCreate( MPI_Comm comm, HYPRE_Solver *solver ) @@ -67,7 +67,7 @@ int HYPRE_ParCSR_SuperLUCreate( MPI_Comm comm, HYPRE_Solver *solver ) #ifdef HAVE_SUPERLU int nprocs; HYPRE_SuperLU *sluPtr; - + MPI_Comm_size(comm, &nprocs); if ( nprocs > 1 ) { @@ -75,7 +75,7 @@ int HYPRE_ParCSR_SuperLUCreate( MPI_Comm comm, HYPRE_Solver *solver ) return -1; } sluPtr = hypre_TAlloc(HYPRE_SuperLU, 1, HYPRE_MEMORY_HOST); - assert ( sluPtr != NULL ); + hypre_assert ( sluPtr != NULL ); sluPtr->factorized_ = 0; sluPtr->permR_ = NULL; sluPtr->permC_ = NULL; @@ -97,10 +97,10 @@ int HYPRE_ParCSR_SuperLUDestroy( HYPRE_Solver solver ) { #ifdef HAVE_SUPERLU HYPRE_SuperLU *sluPtr = (HYPRE_SuperLU *) solver; - assert ( sluPtr != NULL ); - if ( sluPtr->permR_ != NULL ) free(sluPtr->permR_); - if ( sluPtr->permC_ != NULL ) free(sluPtr->permC_); - free(sluPtr); + hypre_assert ( sluPtr != NULL ); + hypre_TFree(sluPtr->permR_, HYPRE_MEMORY_HOST); + hypre_TFree(sluPtr->permC_, HYPRE_MEMORY_HOST); + hypre_TFree(sluPtr, HYPRE_MEMORY_HOST); return 0; #else printf("HYPRE_ParCSR_SuperLUDestroy ERROR - SuperLU not enabled.\n"); @@ -110,14 +110,14 @@ int HYPRE_ParCSR_SuperLUDestroy( HYPRE_Solver solver ) } /*************************************************************************** - * HYPRE_ParCSR_SuperLUSetOutputLevel - Set debug level + * HYPRE_ParCSR_SuperLUSetOutputLevel - Set debug level *--------------------------------------------------------------------------*/ int HYPRE_ParCSR_SuperLUSetOutputLevel(HYPRE_Solver solver, int level) { #ifdef HAVE_SUPERLU HYPRE_SuperLU *sluPtr = (HYPRE_SuperLU *) solver; - assert ( sluPtr != NULL ); + hypre_assert ( sluPtr != NULL ); sluPtr->outputLevel_ = level; return 0; #else @@ -151,12 +151,12 @@ int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, /* ---------------------------------------------------------------- */ sluPtr = (HYPRE_SuperLU *) solver; - assert ( sluPtr != NULL ); + hypre_assert ( sluPtr != NULL ); HYPRE_ParCSRMatrixGetRowPartitioning( A_csr, &partition ); startRow = partition[0]; endRow = partition[1] - 1; nrows = endRow - startRow + 1; - free( partition ); + hypre_TFree(partition, HYPRE_MEMORY_HOST); if ( startRow != 0 ) { printf("HYPRE_ParCSR_SuperLUSetup ERROR - start row != 0.\n"); @@ -209,12 +209,12 @@ int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } - free(colLengs); + hypre_TFree(colLengs, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- */ /* create SuperMatrix */ /* ---------------------------------------------------------------- */ - + dCreate_CompCol_Matrix(&sluAmat,nrows,nrows,cscJ[nrows],cscA,cscI, cscJ, SLU_NC, SLU_D, SLU_GE); etree = hypre_TAlloc(int, nrows , HYPRE_MEMORY_HOST); @@ -239,7 +239,7 @@ int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), &slu_stat, &info); Destroy_CompCol_Permuted(&auxAmat); Destroy_CompCol_Matrix(&sluAmat); - free(etree); + hypre_TFree(etree, HYPRE_MEMORY_HOST); sluPtr->factorized_ = 1; StatFree(&slu_stat); return 0; @@ -269,7 +269,7 @@ int HYPRE_ParCSR_SuperLUSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /* make sure setup has been called */ /* ---------------------------------------------------------------- */ - assert ( sluPtr != NULL ); + hypre_assert ( sluPtr != NULL ); if ( ! (sluPtr->factorized_) ) { printf("HYPRE_ParCSR_SuperLUSolve ERROR - not factorized yet.\n"); @@ -282,7 +282,7 @@ int HYPRE_ParCSR_SuperLUSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, xData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)x)); bData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)b)); - nrows = hypre_ParVectorGlobalSize((hypre_ParVector *)x); + nrows = hypre_ParVectorGlobalSize((hypre_ParVector *)x); for (i = 0; i < nrows; i++) xData[i] = bData[i]; /* ---------------------------------------------------------------- */ @@ -297,7 +297,7 @@ int HYPRE_ParCSR_SuperLUSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, trans = NOTRANS; StatInit(&slu_stat); - dgstrs (trans, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), + dgstrs (trans, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), sluPtr->permC_, sluPtr->permR_, &B, &slu_stat, &info); Destroy_SuperMatrix_Store(&B); StatFree(&slu_stat); diff --git a/src/FEI_mv/fei-hypre/HYPRE_parcsr_symqmr.c b/src/FEI_mv/fei-hypre/HYPRE_parcsr_symqmr.c index 1a84c7963..b304bb352 100644 --- a/src/FEI_mv/fei-hypre/HYPRE_parcsr_symqmr.c +++ b/src/FEI_mv/fei-hypre/HYPRE_parcsr_symqmr.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include "utilities/_hypre_utilities.h" diff --git a/src/FEI_mv/fei-hypre/LLNL_FEI_Fei.cxx b/src/FEI_mv/fei-hypre/LLNL_FEI_Fei.cxx index ddb8b2f0f..b84f99ba1 100644 --- a/src/FEI_mv/fei-hypre/LLNL_FEI_Fei.cxx +++ b/src/FEI_mv/fei-hypre/LLNL_FEI_Fei.cxx @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "_hypre_utilities.h" #include "HYPRE.h" @@ -972,7 +971,7 @@ int LLNL_FEI_Fei::loadComplete() for ( iB = 0; iB < numBlocks_; iB++ ) { ierr = elemBlocks_[iB]->checkLoadComplete(); - assert( !ierr ); + hypre_assert( !ierr ); } /* ----------------------------------------------------------------- diff --git a/src/FEI_mv/fei-hypre/LLNL_FEI_Impl.cxx b/src/FEI_mv/fei-hypre/LLNL_FEI_Impl.cxx index e8208d922..76ab462f0 100644 --- a/src/FEI_mv/fei-hypre/LLNL_FEI_Impl.cxx +++ b/src/FEI_mv/fei-hypre/LLNL_FEI_Impl.cxx @@ -13,22 +13,21 @@ #include #include #include -#include #include #include "LLNL_FEI_Impl.h" /*------------------------------------------------------------------------- - local defines + local defines -------------------------------------------------------------------------*/ #define SOLVERLOCK 1024 /************************************************************************** - LLNL_FEI_Impl is the top level finite element interface. + LLNL_FEI_Impl is the top level finite element interface. **************************************************************************/ /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ LLNL_FEI_Impl::LLNL_FEI_Impl( MPI_Comm comm ) { @@ -41,7 +40,7 @@ LLNL_FEI_Impl::LLNL_FEI_Impl( MPI_Comm comm ) } /************************************************************************** - destructor + destructor -------------------------------------------------------------------------*/ LLNL_FEI_Impl::~LLNL_FEI_Impl() { @@ -51,7 +50,7 @@ LLNL_FEI_Impl::~LLNL_FEI_Impl() } /************************************************************************** - parameter function + parameter function -------------------------------------------------------------------------*/ int LLNL_FEI_Impl::parameters(int numParams, char **paramString) { @@ -79,10 +78,10 @@ int LLNL_FEI_Impl::parameters(int numParams, char **paramString) } } FLAG_SolverLib_ |= SOLVERLOCK; - if ( (FLAG_SolverLib_ - SOLVERLOCK) > 0 ) + if ( (FLAG_SolverLib_ - SOLVERLOCK) > 0 ) { if ( lscPtr_ != NULL ) delete lscPtr_; - if ( solverPtr_ != NULL ) + if ( solverPtr_ != NULL ) { delete solverPtr_; solverPtr_ = NULL; @@ -94,10 +93,10 @@ int LLNL_FEI_Impl::parameters(int numParams, char **paramString) solver = HYPRE; lscPtr_ = new LLNL_FEI_LSCore(solver); } - else + else { if ( solverPtr_ != NULL ) delete solverPtr_; - if ( lscPtr_ != NULL ) + if ( lscPtr_ != NULL ) { delete lscPtr_; lscPtr_ = NULL; @@ -105,19 +104,19 @@ int LLNL_FEI_Impl::parameters(int numParams, char **paramString) solverPtr_ = new LLNL_FEI_Solver(mpiComm_); } feiPtr_->parameters(numParams,paramString); - if (solverPtr_ != NULL) solverPtr_->parameters(numParams,paramString); - if (lscPtr_ != NULL) lscPtr_->parameters(numParams,paramString); + if (solverPtr_ != NULL) solverPtr_->parameters(numParams,paramString); + if (lscPtr_ != NULL) lscPtr_->parameters(numParams,paramString); return 0; } /************************************************************************** - solve + solve -------------------------------------------------------------------------*/ int LLNL_FEI_Impl::solve(int *status) { double *rhsVector, *solnVector; - if ((FLAG_SolverLib_ & SOLVERLOCK) != 0) FLAG_SolverLib_ -= SOLVERLOCK; + if ((FLAG_SolverLib_ & SOLVERLOCK) != 0) FLAG_SolverLib_ -= SOLVERLOCK; feiPtr_->getRHSVector(&rhsVector); feiPtr_->getSolnVector(&solnVector); feiPtr_->getMatrix(&matPtr_); @@ -143,7 +142,7 @@ int LLNL_FEI_Impl::solve(int *status) offsets = matPtr_->getEqnOffsets(); lscPtr_->setGlobalOffsets(localNRows, NULL, offsets, NULL); maxRowSize = 0; - for ( i = 0; i < localNRows; i++ ) + for ( i = 0; i < localNRows; i++ ) { rowSize = diagIA[i+1] - diagIA[i]; if (offdIA != NULL ) rowSize += offdIA[i+1] - offdIA[i]; @@ -154,25 +153,25 @@ int LLNL_FEI_Impl::solve(int *status) colInds = new int[maxRowSize]; colVals = new double[maxRowSize]; } - for ( i = 0; i < localNRows; i++ ) + for ( i = 0; i < localNRows; i++ ) { rowSize = 0; - for ( j = diagIA[i]; j < diagIA[i+1]; j++ ) + for ( j = diagIA[i]; j < diagIA[i+1]; j++ ) { - colInds[rowSize] = diagJA[j] + offsets[mypid]; - colVals[rowSize++] = diagAA[j]; + colInds[rowSize] = diagJA[j] + offsets[mypid]; + colVals[rowSize++] = diagAA[j]; } if ( offdIA != NULL ) { - for ( j = offdIA[i]; j < offdIA[i+1]; j++ ) + for ( j = offdIA[i]; j < offdIA[i+1]; j++ ) { - colInds[rowSize] = colMap[offdJA[j]-localNRows]; - colVals[rowSize++] = offdAA[j]; + colInds[rowSize] = colMap[offdJA[j]-localNRows]; + colVals[rowSize++] = offdAA[j]; } } rowInd = offsets[mypid] + i; - lscPtr_->putIntoSystemMatrix(one, &rowInd, rowSize, - (const int *) colInds, (const double* const*) &colVals); + lscPtr_->putIntoSystemMatrix(one, &rowInd, rowSize, + (const int *) colInds, (const double* const*) &colVals); } if ( maxRowSize > 0 ) { @@ -183,7 +182,7 @@ int LLNL_FEI_Impl::solve(int *status) for ( i = 0; i < localNRows; i++ ) indices[i] = i + offsets[mypid]; lscPtr_->putIntoRHSVector(localNRows, (const double *) rhsVector, (const int *) indices); - lscPtr_->putInitialGuess((const int *) indices, + lscPtr_->putInitialGuess((const int *) indices, (const double *) solnVector, localNRows); lscPtr_->matrixLoadComplete(); // Charles : this status check not in application code? @@ -196,9 +195,9 @@ int LLNL_FEI_Impl::solve(int *status) } /************************************************************************** - residual norm calculation + residual norm calculation -------------------------------------------------------------------------*/ -int LLNL_FEI_Impl::residualNorm(int whichNorm, int numFields, int *fieldIDs, +int LLNL_FEI_Impl::residualNorm(int whichNorm, int numFields, int *fieldIDs, double *norms ) { (void) numFields; diff --git a/src/FEI_mv/fei-hypre/LLNL_FEI_LSCore.cxx b/src/FEI_mv/fei-hypre/LLNL_FEI_LSCore.cxx index 190ba93f7..aaa6897f6 100644 --- a/src/FEI_mv/fei-hypre/LLNL_FEI_LSCore.cxx +++ b/src/FEI_mv/fei-hypre/LLNL_FEI_LSCore.cxx @@ -13,31 +13,30 @@ #include #include #include -#include #include "_hypre_utilities.h" #include "HYPRE.h" #include "LLNL_FEI_LSCore.h" /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ LLNL_FEI_LSCore::LLNL_FEI_LSCore(SolverLib_t solverLib) -{ +{ lsc_ = NULL; - switch (solverLib) + switch (solverLib) { case HYPRE: lsc_ = HYPRE_base_create(MPI_COMM_WORLD ); if ( lsc_ == NULL ) printf("problem building HYPRE\n"); - break; + break; default: printf("unable to determine library type in LLNL_FEI_LSCore."); } } /************************************************************************** - destructor + destructor -------------------------------------------------------------------------*/ LLNL_FEI_LSCore::~LLNL_FEI_LSCore() { @@ -45,7 +44,7 @@ LLNL_FEI_LSCore::~LLNL_FEI_LSCore() } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::setGlobalOffsets(int leng, int *nodeOffsets, int *eqnOffsets, int *blkEqnOffsets) @@ -54,7 +53,7 @@ int LLNL_FEI_LSCore::setGlobalOffsets(int leng, int *nodeOffsets, } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::setMatrixStructure(int **ptColIndices, int *ptRowLengths, int **blkColIndices,int *blkRowLengths,int *ptRowsPerBlkRow) @@ -64,9 +63,9 @@ int LLNL_FEI_LSCore::setMatrixStructure(int **ptColIndices, int *ptRowLengths, } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ -int LLNL_FEI_LSCore::sumIntoSystemMatrix(int nRows, const int *rows, +int LLNL_FEI_LSCore::sumIntoSystemMatrix(int nRows, const int *rows, int nCols, const int* cols, const double* const* vals) { @@ -74,9 +73,9 @@ int LLNL_FEI_LSCore::sumIntoSystemMatrix(int nRows, const int *rows, } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ -int LLNL_FEI_LSCore::putIntoSystemMatrix(int nRows, const int *rows, +int LLNL_FEI_LSCore::putIntoSystemMatrix(int nRows, const int *rows, int nCols, const int* cols, const double* const* vals) { @@ -84,7 +83,7 @@ int LLNL_FEI_LSCore::putIntoSystemMatrix(int nRows, const int *rows, } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::matrixLoadComplete() { @@ -92,25 +91,25 @@ int LLNL_FEI_LSCore::matrixLoadComplete() } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ -int LLNL_FEI_LSCore::sumIntoRHSVector(int num, const double *vals, +int LLNL_FEI_LSCore::sumIntoRHSVector(int num, const double *vals, const int *indices) { return(lsc_->sumIntoRHSVector(num, vals, indices)); } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ -int LLNL_FEI_LSCore::putIntoRHSVector(int num, const double *vals, +int LLNL_FEI_LSCore::putIntoRHSVector(int num, const double *vals, const int *indices) { return(lsc_->putIntoRHSVector(num, vals, indices)); } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::putInitialGuess(const int *eqnNumbers, const double *values, int len) @@ -119,7 +118,7 @@ int LLNL_FEI_LSCore::putInitialGuess(const int *eqnNumbers, } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::parameters( int nParams, char **params) { @@ -127,7 +126,7 @@ int LLNL_FEI_LSCore::parameters( int nParams, char **params) } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::solve( int *status, int *iterations) { @@ -135,7 +134,7 @@ int LLNL_FEI_LSCore::solve( int *status, int *iterations) } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::formResidual( double* values, int leng) { @@ -143,7 +142,7 @@ int LLNL_FEI_LSCore::formResidual( double* values, int leng) } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::getSolution( double *answers, int leng) { @@ -151,7 +150,7 @@ int LLNL_FEI_LSCore::getSolution( double *answers, int leng) } /************************************************************************** - direct access to LSC functions + direct access to LSC functions -------------------------------------------------------------------------*/ int LLNL_FEI_LSCore::getSolnEntry( int eqnNum, double *answers) { diff --git a/src/FEI_mv/fei-hypre/LLNL_FEI_Matrix.cxx b/src/FEI_mv/fei-hypre/LLNL_FEI_Matrix.cxx index 4a775fe7b..cfee68754 100644 --- a/src/FEI_mv/fei-hypre/LLNL_FEI_Matrix.cxx +++ b/src/FEI_mv/fei-hypre/LLNL_FEI_Matrix.cxx @@ -14,14 +14,13 @@ #include #include #include -#include #include #include "_hypre_utilities.h" #include "HYPRE.h" #include "LLNL_FEI_Matrix.h" /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ LLNL_FEI_Matrix::LLNL_FEI_Matrix( MPI_Comm comm ) { @@ -76,7 +75,7 @@ LLNL_FEI_Matrix::LLNL_FEI_Matrix( MPI_Comm comm ) } /************************************************************************** - destructor + destructor -------------------------------------------------------------------------*/ LLNL_FEI_Matrix::~LLNL_FEI_Matrix() { @@ -145,7 +144,7 @@ int LLNL_FEI_Matrix::resetMatrix(double s) if ( sendProcs_ != NULL ) delete [] sendProcs_; if ( sendProcIndices_ != NULL ) delete [] sendProcIndices_; if ( dSendBufs_ != NULL ) delete [] dSendBufs_; - if ( mpiRequests_ != NULL ) delete [] mpiRequests_; + if ( mpiRequests_ != NULL ) delete [] mpiRequests_; localNRows_ = 0; nConstraints_ = 0; extNRows_ = 0; @@ -178,8 +177,8 @@ int LLNL_FEI_Matrix::resetMatrix(double s) /************************************************************************** set element and node information -------------------------------------------------------------------------*/ -int LLNL_FEI_Matrix::setMatrix(int nRows, int *diagIA, int *diagJA, - double *diagAA, int extNRows, int *colMap, int *offdIA, +int LLNL_FEI_Matrix::setMatrix(int nRows, int *diagIA, int *diagJA, + double *diagAA, int extNRows, int *colMap, int *offdIA, int *offdJA, double *offdAA, double *diagonal, int *eqnOffsets, int *crOffsets) { @@ -260,7 +259,7 @@ int LLNL_FEI_Matrix::setComplete() } /************************************************************************** - set constraints + set constraints -------------------------------------------------------------------------*/ int LLNL_FEI_Matrix::setConstraints(int nConstr, int *constrEqns) { @@ -273,7 +272,7 @@ int LLNL_FEI_Matrix::setConstraints(int nConstr, int *constrEqns) /************************************************************************** form residual norm -------------------------------------------------------------------------*/ -int LLNL_FEI_Matrix::residualNorm(int whichNorm, double *solnVec, +int LLNL_FEI_Matrix::residualNorm(int whichNorm, double *solnVec, double *rhsVec, double* norms) { int totalNRows, irow; @@ -284,15 +283,15 @@ int LLNL_FEI_Matrix::residualNorm(int whichNorm, double *solnVec, totalNRows = localNRows_ + extNRows_; rVec = new double[totalNRows]; - matvec( solnVec, rVec ); - for ( irow = 0; irow < localNRows_; irow++ ) + matvec( solnVec, rVec ); + for ( irow = 0; irow < localNRows_; irow++ ) rVec[irow] = rhsVec[irow] - rVec[irow]; - switch(whichNorm) + switch(whichNorm) { case 0: rnorm = 0.0; - for ( irow = 0; irow < localNRows_; irow++ ) + for ( irow = 0; irow < localNRows_; irow++ ) { dtemp = fabs( rVec[irow] ); if ( dtemp > rnorm ) rnorm = dtemp; @@ -302,14 +301,14 @@ int LLNL_FEI_Matrix::residualNorm(int whichNorm, double *solnVec, break; case 1: rnorm = 0.0; - for ( irow = 0; irow < localNRows_; irow++ ) + for ( irow = 0; irow < localNRows_; irow++ ) rnorm += fabs( rVec[irow] ); MPI_Allreduce(&rnorm, &dtemp, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); (*norms) = dtemp; break; case 2: rnorm = 0.0; - for ( irow = 0; irow < localNRows_; irow++ ) + for ( irow = 0; irow < localNRows_; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, &dtemp, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); (*norms) = sqrt(dtemp); @@ -340,10 +339,10 @@ void LLNL_FEI_Matrix::matvec(double *xvec, double *yvec) * in case global stiffness matrix has been composed, use it * -----------------------------------------------------------------*/ - for ( iD = 0; iD < matDim; iD++ ) + for ( iD = 0; iD < matDim; iD++ ) { ddata = 0.0; - for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) + for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) ddata += diagAA_[iD2] * xvec[diagJA_[iD2]]; yvec[iD] = ddata; } @@ -354,10 +353,10 @@ void LLNL_FEI_Matrix::matvec(double *xvec, double *yvec) if ( offdIA_ != NULL ) { - for ( iD = 0; iD < matDim; iD++ ) + for ( iD = 0; iD < matDim; iD++ ) { ddata = 0.0; - for ( iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) + for ( iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) ddata += offdAA_[iD2] * dExtBufs_[offdJA_[iD2]-localNRows_]; yvec[iD] += ddata; } @@ -391,7 +390,7 @@ void LLNL_FEI_Matrix::scatterDData( double *dvec ) for ( iD = 0; iD < sendLengs_[iP]; iD++ ) { ind1 = sendProcIndices_[offset+iD]; - dSendBufs_[offset+iD] = dvec[ind1]; + dSendBufs_[offset+iD] = dvec[ind1]; } MPI_Send( &dSendBufs_[offset], sendLengs_[iP], MPI_DOUBLE, sendProcs_[iP], 40343, mpiComm_); @@ -405,14 +404,14 @@ void LLNL_FEI_Matrix::scatterDData( double *dvec ) for ( iD = 0; iD < recvLengs_[iP]; iD++ ) { ind1 = recvProcIndices_[offset+iD] - localNRows_; - dExtBufs_[ind1] = dRecvBufs_[offset+iD]; + dExtBufs_[ind1] = dRecvBufs_[offset+iD]; } offset += recvLengs_[iP]; } } /************************************************************************** - exchange data between processors + exchange data between processors -------------------------------------------------------------------------*/ void LLNL_FEI_Matrix::gatherAddDData( double *dvec ) { @@ -432,7 +431,7 @@ void LLNL_FEI_Matrix::gatherAddDData( double *dvec ) for ( iD = 0; iD < recvLengs_[iP]; iD++ ) { ind1 = recvProcIndices_[offset+iD]; - dRecvBufs_[offset+iD] = dvec[ind1]; + dRecvBufs_[offset+iD] = dvec[ind1]; } MPI_Send( &dRecvBufs_[offset], recvLengs_[iP], MPI_DOUBLE, recvProcs_[iP], 40342, mpiComm_); @@ -446,7 +445,7 @@ void LLNL_FEI_Matrix::gatherAddDData( double *dvec ) for ( iD = 0; iD < sendLengs_[iP]; iD++ ) { ind1 = sendProcIndices_[offset+iD]; - dvec[ind1] += dSendBufs_[offset+iD]; + dvec[ind1] += dSendBufs_[offset+iD]; } offset += sendLengs_[iP]; } @@ -475,11 +474,11 @@ void LLNL_FEI_Matrix::printMatrix() { for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) if ( diagJA_[iD2] == iD ) - fprintf(fp,"%6d %6d %25.16e \n", iD+offset+1, + fprintf(fp,"%6d %6d %25.16e \n", iD+offset+1, diagJA_[iD2]+offset+1, diagAA_[iD2]); for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) if ( diagJA_[iD2] != iD ) - fprintf(fp,"%6d %6d %25.16e \n", iD+offset+1, + fprintf(fp,"%6d %6d %25.16e \n", iD+offset+1, diagJA_[iD2]+offset+1, diagAA_[iD2]); if ( offdIA_ != NULL ) { @@ -490,7 +489,7 @@ void LLNL_FEI_Matrix::printMatrix() } } } - if ( FLAG_MatrixOverlap_ == 1 ) + if ( FLAG_MatrixOverlap_ == 1 ) { iEnd = localNRows_ + extNRows_; for ( iD = localNRows_; iD < iEnd; iD++ ) @@ -530,14 +529,14 @@ void LLNL_FEI_Matrix::printMatrix() /************************************************************************** perform local matrix matrix multiplication -------------------------------------------------------------------------*/ -void LLNL_FEI_Matrix::matMult( int ANRows, int ANCols, int *AIA, int *AJA, - double *AAA, int BNRows, int BNCols, int *BIA, int *BJA, - double *BAA, int *DNRows, int *DNCols, int **DIA, int **DJA, +void LLNL_FEI_Matrix::matMult( int ANRows, int ANCols, int *AIA, int *AJA, + double *AAA, int BNRows, int BNCols, int *BIA, int *BJA, + double *BAA, int *DNRows, int *DNCols, int **DIA, int **DJA, double **DAA) { (void) ANCols; (void) BNRows; - int CNRows, CNCols, CNnz, *CReg, ia, ib, ia2, colIndA, colIndB, iTemp; + int CNRows, CNCols, CNnz, *CReg, ia, ib, ia2, colIndA, colIndB, iTemp; int *CIA, *CJA, offset; double dTempA, dTempB, *CAA; @@ -696,12 +695,12 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() * construct and fill the communication buffers for sending matrix rows * -----------------------------------------------------------------*/ - if ( nRecvs > 0 ) + if ( nRecvs > 0 ) { dRecvBufs = new double*[nRecvs]; iRecvBufs = new int*[nRecvs]; } - if ( nSends > 0 ) + if ( nSends > 0 ) { dSendBufs = new double*[nSends]; iSendBufs = new int*[nSends]; @@ -725,7 +724,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() { for ( iD = offdIA_[currRow]; iD < offdIA_[currRow+1]; iD++ ) { - index = extColMap_[offdJA_[iD]-localNRows_]; + index = extColMap_[offdJA_[iD]-localNRows_]; iSendBufs[iP][count] = index; dSendBufs[iP][count++] = offdAA_[iD]; } @@ -762,7 +761,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() for ( iP = 0; iP < nRecvs; iP++ ) MPI_Wait( &requests[iP], &status ); if ( nRecvs > 0 ) delete [] requests; - if ( nSends > 0 ) + if ( nSends > 0 ) { for ( iP = 0; iP < nSends; iP++ ) delete [] sendRowLengs[iP]; delete [] sendRowLengs; @@ -780,12 +779,12 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() rowStart = eqnOffset; rowEndp1 = rowStart + localNRows_; diagRowLengs = new int[localNRows_]; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) diagRowLengs[iD] = diagIA_[iD+1] - diagIA_[iD]; offdRowLengs = new int[localNRows_]; if ( offdIA_ != NULL ) { - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) offdRowLengs[iD] = offdIA_[iD+1] - offdIA_[iD]; } else @@ -793,13 +792,13 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() for ( iD = 0; iD < localNRows_; iD++ ) offdRowLengs[iD] = 0; } offset = 0; - for ( iP = 0; iP < nSends_; iP++ ) + for ( iP = 0; iP < nSends_; iP++ ) { count = 0; - for ( iN = 0; iN < sendLengs_[iP]; iN++ ) + for ( iN = 0; iN < sendLengs_[iP]; iN++ ) { rowInd = sendProcIndices_[offset+iN]; - for ( iD = 0; iD < recvRowLengs[iP][iN]; iD++ ) + for ( iD = 0; iD < recvRowLengs[iP][iN]; iD++ ) { index = iRecvBufs[iP][count++]; if ( index >= rowStart && index < rowEndp1 ) @@ -820,12 +819,12 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() TdiagJA = new int[newDiagNNZ]; TdiagAA = new double[newDiagNNZ]; TdiagIA[0] = 0; - for ( iD = 1; iD <= localNRows_; iD++ ) + for ( iD = 1; iD <= localNRows_; iD++ ) TdiagIA[iD] = TdiagIA[iD-1] + diagRowLengs[iD-1]; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = TdiagIA[iD]; - for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) + for ( iD2 = diagIA_[iD]; iD2 < diagIA_[iD+1]; iD2++ ) { TdiagJA[index] = diagJA_[iD2]; TdiagAA[index] = diagAA_[iD2]; @@ -842,16 +841,16 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() ToffdJA = new int[newOffdNNZ]; ToffdAA = new double[newOffdNNZ]; ToffdIA[0] = 0; - for ( iD = 1; iD <= localNRows_; iD++ ) + for ( iD = 1; iD <= localNRows_; iD++ ) ToffdIA[iD] = ToffdIA[iD-1] + offdRowLengs[iD-1]; if ( offdIA_ != NULL ) { - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = ToffdIA[iD]; - for ( iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) + for ( iD2 = offdIA_[iD]; iD2 < offdIA_[iD+1]; iD2++ ) { - count = extColMap_[offdJA_[iD2]-localNRows_]; + count = extColMap_[offdJA_[iD2]-localNRows_]; ToffdJA[index] = count; ToffdAA[index] = offdAA_[iD2]; index++; @@ -867,13 +866,13 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() } } offset = 0; - for ( iP = 0; iP < nSends_; iP++ ) + for ( iP = 0; iP < nSends_; iP++ ) { count = 0; - for ( iN = 0; iN < sendLengs_[iP]; iN++ ) + for ( iN = 0; iN < sendLengs_[iP]; iN++ ) { rowInd = sendProcIndices_[offset+iN]; - for ( iD = 0; iD < recvRowLengs[iP][iN]; iD++ ) + for ( iD = 0; iD < recvRowLengs[iP][iN]; iD++ ) { index = iRecvBufs[iP][count]; if ( index >= rowStart && index < rowEndp1 ) @@ -891,7 +890,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() } offset += sendLengs_[iP]; } - if (nRecvs > 0) + if (nRecvs > 0) { for ( iP = 0; iP < nRecvs; iP++ ) delete [] iRecvBufs[iP]; for ( iP = 0; iP < nRecvs; iP++ ) delete [] dRecvBufs[iP]; @@ -908,15 +907,15 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() * -----------------------------------------------------------------*/ TdiagIA[0] = 0; - for ( iD = 1; iD <= localNRows_; iD++ ) + for ( iD = 1; iD <= localNRows_; iD++ ) TdiagIA[iD] = TdiagIA[iD-1] + diagRowLengs[iD-1]; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = TdiagIA[iD]; leng = diagRowLengs[iD]; IntSort2a(&(TdiagJA[index]),&(TdiagAA[index]),0,leng-1); count = index; - for ( iN = index+1; iN < index+leng; iN++ ) + for ( iN = index+1; iN < index+leng; iN++ ) { if ( TdiagJA[iN] != TdiagJA[count] ) { @@ -936,11 +935,11 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() diagAA_ = new double[newDiagNNZ]; newDiagNNZ = 0; diagIA_[0] = newDiagNNZ; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = TdiagIA[iD]; leng = diagRowLengs[iD]; - for ( iN = index; iN < index+leng; iN++ ) + for ( iN = index; iN < index+leng; iN++ ) { diagJA_[newDiagNNZ] = TdiagJA[iN]; diagAA_[newDiagNNZ++] = TdiagAA[iN]; @@ -959,16 +958,16 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() if ( newOffdNNZ > 0 ) { ToffdIA[0] = 0; - for ( iD = 1; iD <= localNRows_; iD++ ) + for ( iD = 1; iD <= localNRows_; iD++ ) ToffdIA[iD] = ToffdIA[iD-1] + offdRowLengs[iD-1]; newOffdNNZ = 0; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = ToffdIA[iD]; leng = offdRowLengs[iD]; IntSort2a(&(ToffdJA[index]),&(ToffdAA[index]),0,leng-1); count = index; - for ( iN = index+1; iN < index+leng; iN++ ) + for ( iN = index+1; iN < index+leng; iN++ ) { if ( ToffdJA[iN] != ToffdJA[count] ) { @@ -980,14 +979,14 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() } if ( leng > 0 ) offdRowLengs[iD] = count - index + 1; else offdRowLengs[iD] = 0; - for ( iN = 0; iN < offdRowLengs[iD]; iN++ ) + for ( iN = 0; iN < offdRowLengs[iD]; iN++ ) { ToffdJA[newOffdNNZ] = ToffdJA[index+iN]; ToffdAA[newOffdNNZ++] = ToffdAA[index+iN]; } } } - + /* ----------------------------------------------------------------- * sort the off-diagonal block to find distinct indices and construct * new receive information @@ -1001,20 +1000,20 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() /* sort all the off-diagonal indices */ iSortArray1 = new int[newOffdNNZ]; - for ( iD = 0; iD < newOffdNNZ; iD++ ) iSortArray1[iD] = ToffdJA[iD]; + for ( iD = 0; iD < newOffdNNZ; iD++ ) iSortArray1[iD] = ToffdJA[iD]; iSortArray2 = new int[newOffdNNZ]; - for ( iD = 0; iD < newOffdNNZ; iD++ ) iSortArray2[iD] = iD; + for ( iD = 0; iD < newOffdNNZ; iD++ ) iSortArray2[iD] = iD; IntSort2(iSortArray1, iSortArray2, 0, newOffdNNZ-1); /* put the short list in iShortList and the offset in iSortArray1 */ totalRecvs = 0; index = iSortArray1[0]; - for ( iD = 1; iD < newOffdNNZ; iD++ ) + for ( iD = 1; iD < newOffdNNZ; iD++ ) { - if ( iSortArray1[iD] != index ) + if ( iSortArray1[iD] != index ) { - totalRecvs++; + totalRecvs++; index = iSortArray1[iD]; } } @@ -1024,12 +1023,12 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() index = iSortArray1[0]; iShortList[0] = iSortArray1[0]; iSortArray1[0] = totalRecvs; - for ( iD = 1; iD < newOffdNNZ; iD++ ) + for ( iD = 1; iD < newOffdNNZ; iD++ ) { - if ( iSortArray1[iD] != index ) + if ( iSortArray1[iD] != index ) { - totalRecvs++; - index = iSortArray1[iD]; + totalRecvs++; + index = iSortArray1[iD]; iShortList[totalRecvs] = index; } iSortArray1[iD] = totalRecvs; @@ -1041,24 +1040,24 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() /* convert the indices in ToffdJA */ - for ( iD = 0; iD < newOffdNNZ; iD++ ) + for ( iD = 0; iD < newOffdNNZ; iD++ ) ToffdJA[iSortArray2[iD]] = iSortArray1[iD] + localNRows_; /* compress the Toffd matrix */ ToffdIA[0] = 0; - for ( iD = 1; iD <= localNRows_; iD++ ) + for ( iD = 1; iD <= localNRows_; iD++ ) ToffdIA[iD] = ToffdIA[iD-1] + offdRowLengs[iD-1]; offdIA_ = ToffdIA; offdJA_ = new int[newOffdNNZ]; offdAA_ = new double[newOffdNNZ]; newOffdNNZ = 0; - for ( iD = 0; iD < localNRows_; iD++ ) + for ( iD = 0; iD < localNRows_; iD++ ) { index = ToffdIA[iD]; leng = offdRowLengs[iD]; - for ( iN = index; iN < index+leng; iN++ ) + for ( iN = index; iN < index+leng; iN++ ) { offdJA_[newOffdNNZ] = ToffdJA[iN]; offdAA_[newOffdNNZ++] = ToffdAA[iN]; @@ -1070,8 +1069,8 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() /* construct nRecvs, recvLengs and recvProcs */ procLengs = new int[nprocs+1]; - for ( iP = 0; iP < nprocs; iP++ ) procLengs[iP] = 0; - for ( iP = 0; iP <= nprocs; iP++ ) + for ( iP = 0; iP < nprocs; iP++ ) procLengs[iP] = 0; + for ( iP = 0; iP <= nprocs; iP++ ) { index = globalEqnOffsets_[iP]; iD2 = BinarySearch2(iShortList,0,totalRecvs,index); @@ -1081,8 +1080,8 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() procLengs[iP] = iD2; } nRecvs = 0; - for ( iP = 0; iP < nprocs; iP++ ) - if ( procLengs[iP] != procLengs[iP+1] ) nRecvs++; + for ( iP = 0; iP < nprocs; iP++ ) + if ( procLengs[iP] != procLengs[iP+1] ) nRecvs++; if ( nRecvs > 0 ) { recvProcs = new int[nRecvs]; @@ -1094,11 +1093,11 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() recvLengs = NULL; } nRecvs = 0; - for ( iP = 0; iP < nprocs; iP++ ) - if ( procLengs[iP] != procLengs[iP+1] ) + for ( iP = 0; iP < nprocs; iP++ ) + if ( procLengs[iP] != procLengs[iP+1] ) { - recvLengs[nRecvs] = procLengs[iP+1] - procLengs[iP]; - recvProcs[nRecvs++] = iP; + recvLengs[nRecvs] = procLengs[iP+1] - procLengs[iP]; + recvProcs[nRecvs++] = iP; } delete [] iSortArray1; delete [] iSortArray2; @@ -1115,7 +1114,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() delete [] offdRowLengs; /* ----------------------------------------------------------------- - * diagnostics + * diagnostics * -----------------------------------------------------------------*/ #if 0 @@ -1123,8 +1122,8 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() char fname[20]; sprintf(fname,"extMap.%d",mypid_); FILE *fp = fopen(fname, "w"); - for ( iD = 0; iD < extNRows_; iD++ ) - fprintf(fp,"%10d %10d\n",iD,extColMap_[iD]); + for ( iD = 0; iD < extNRows_; iD++ ) + fprintf(fp,"%10d %10d\n",iD,extColMap_[iD]); for ( iP = 0; iP < nRecvs; iP++ ) fprintf(fp,"recv proc = %10d, length = %10d\n",recvProcs[iP], recvLengs[iP]); @@ -1153,16 +1152,16 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() requests = new MPI_Request[nSends]; } for ( iP = 0; iP < nSends; iP++ ) - MPI_Irecv(&(sendLengs[iP]),1,MPI_INT,MPI_ANY_SOURCE,12233,mpiComm_, + MPI_Irecv(&(sendLengs[iP]),1,MPI_INT,MPI_ANY_SOURCE,12233,mpiComm_, &requests[iP]); for ( iP = 0; iP < nRecvs; iP++ ) MPI_Send(&(recvLengs[iP]),1,MPI_INT,recvProcs[iP],12233,mpiComm_); - for ( iP = 0; iP < nSends; iP++ ) + for ( iP = 0; iP < nSends; iP++ ) { MPI_Wait( &requests[iP], &status ); sendProcs[iP] = status.MPI_SOURCE; } - if ( nSends > 0 ) + if ( nSends > 0 ) { count = 0; for ( iP = 0; iP < nSends; iP++ ) count += sendLengs[iP]; @@ -1205,10 +1204,10 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() { for ( iN = 0; iN < sendLengs_[iP]; iN++ ) { - if ( sendProcIndices[count+iN] < eqnOffset || + if ( sendProcIndices[count+iN] < eqnOffset || sendProcIndices[count+iN] >= eqnOffset+localNRows_ ) - printf("%4d : exchangeSubMatrices ERROR : sendIndex %d (%d,%d).\n", - mypid_, sendProcIndices[count+iN], eqnOffset, + printf("%4d : exchangeSubMatrices ERROR : sendIndex %d (%d,%d).\n", + mypid_, sendProcIndices[count+iN], eqnOffset, eqnOffset+localNRows_); else sendProcIndices[count+iN] -= eqnOffset; @@ -1236,7 +1235,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() if (nRecvs_+nSends_ > 0) mpiRequests_ = new MPI_Request[nRecvs_+nSends_]; /* ----------------------------------------------------------------- - * diagnostics + * diagnostics * -----------------------------------------------------------------*/ #if 0 @@ -1245,7 +1244,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() sprintf(fname,"commInfo.%d",mypid_); FILE *fp = fopen(fname, "w"); count = 0; - for ( iP = 0; iP < nRecvs_; iP++ ) + for ( iP = 0; iP < nRecvs_; iP++ ) { fprintf(fp,"recv from %10d = %10d\n",recvProcs_[iP],recvLengs_[iP]); for ( iD = 0; iD < recvLengs_[iP]; iD++ ) @@ -1255,7 +1254,7 @@ void LLNL_FEI_Matrix::exchangeSubMatrices() } } count = 0; - for ( iP = 0; iP < nSends_; iP++ ) + for ( iP = 0; iP < nSends_; iP++ ) { fprintf(fp,"send to %10d = %10d\n",sendProcs_[iP],sendLengs_[iP]); for ( iD = 0; iD < sendLengs_[iP]; iD++ ) @@ -1279,7 +1278,7 @@ int LLNL_FEI_Matrix::BinarySearch2(int *map, int start, int mapSize, int num) int k, khi, klo ; if (map == NULL) return -1 ; - + klo = start ; khi = start + mapSize; k = ((khi+klo) >> 1) + 1 ; diff --git a/src/FEI_mv/fei-hypre/LLNL_FEI_Solver.cxx b/src/FEI_mv/fei-hypre/LLNL_FEI_Solver.cxx index 68cb68396..caf107ade 100644 --- a/src/FEI_mv/fei-hypre/LLNL_FEI_Solver.cxx +++ b/src/FEI_mv/fei-hypre/LLNL_FEI_Solver.cxx @@ -13,7 +13,6 @@ #include #include #include -#include #include #include "_hypre_utilities.h" #include "HYPRE.h" @@ -29,7 +28,7 @@ #endif /************************************************************************** - Constructor + Constructor -------------------------------------------------------------------------*/ LLNL_FEI_Solver::LLNL_FEI_Solver( MPI_Comm comm ) { @@ -117,7 +116,7 @@ int LLNL_FEI_Solver::parameters(int numParams, char **paramString) else if ( !strcmp(param, "gmres") ) solverID_ = 1; else if ( !strcmp(param, "cgs") ) solverID_ = 2; else if ( !strcmp(param, "bicgstab")) solverID_ = 3; - else if ( !strcmp(param, "superlu") ) + else if ( !strcmp(param, "superlu") ) { #ifdef HAVE_SUPERLU MPI_Comm_size( mpiComm_, &nprocs ); @@ -254,16 +253,16 @@ int LLNL_FEI_Solver::solveUsingCG() diagonal = matPtr_->getMatrixDiagonal(); totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - + /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -275,7 +274,7 @@ int LLNL_FEI_Solver::solveUsingCG() rnorm = sqrt(dArray2[0]); if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_CG initial rnorm = %e (%e)\n",rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -299,17 +298,17 @@ int LLNL_FEI_Solver::solveUsingCG() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 2 ) + while ( converged == 0 && numTrials < 2 ) { innerIteration = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter++; innerIteration++; if ( innerIteration == 1 ) { if ( diagonal != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow] * diagonal[irow]; else for (irow = 0; irow < localNRows; irow++) @@ -317,7 +316,7 @@ int LLNL_FEI_Solver::solveUsingCG() rhom1 = rho; rho = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rho += rVec[irow] * zVec[irow]; dArray[0] = rho; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); @@ -325,56 +324,56 @@ int LLNL_FEI_Solver::solveUsingCG() beta = 0.0; } else beta = rho / rhom1; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) pVec[irow] = zVec[irow] + beta * pVec[irow]; - matPtr_->matvec( pVec, apVec ); + matPtr_->matvec( pVec, apVec ); sigma = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) sigma += pVec[irow] * apVec[irow]; dArray[0] = sigma; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); sigma = dArray2[0]; - alpha = rho / sigma; - for ( irow = 0; irow < localNRows; irow++ ) + alpha = rho / sigma; + for ( irow = 0; irow < localNRows; irow++ ) { solnVector_[irow] += alpha * pVec[irow]; rVec[irow] -= alpha * apVec[irow]; } rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; dArray[0] = rnorm; if ( diagonal != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow] * diagonal[irow]; else for (irow = 0; irow < localNRows; irow++) zVec[irow] = rVec[irow]; rhom1 = rho; rho = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rho += rVec[irow] * zVec[irow]; dArray[1] = rho; MPI_Allreduce(dArray, dArray2, 2, MPI_DOUBLE, MPI_SUM, mpiComm_); - rho = dArray2[1]; + rho = dArray2[1]; rnorm = sqrt( dArray2[0] ); if ( outputLevel_ >= 2 && iter % 1 == 0 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_CG : iteration %d - rnorm = %e (%e)\n", iter, rnorm, eps1); } - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; dArray[0] = rnorm; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray2[0] ); if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_CG actual rnorm = %e \n",rnorm); - if ( (rnorm < eps1 || rnorm < 1.0e-16) || + if ( (rnorm < eps1 || rnorm < 1.0e-16) || iter >= krylovMaxIterations_ ) converged = 1; numTrials++; } @@ -416,17 +415,17 @@ int LLNL_FEI_Solver::solveUsingGMRES() for (iV = 0; iV <= gmresDim_+1; iV++) kVectors[iV] = new double[totalNRows]; dArray = new double[gmresDim_+1]; dArray2 = new double[gmresDim_+1]; - + /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - + tVector = kVectors[1]; - matPtr_->matvec( solnVector_, tVector ); - for ( irow = 0; irow < localNRows; irow++ ) + matPtr_->matvec( solnVector_, tVector ); + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] = rhsVector_[irow] - tVector[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (tVector[irow] * tVector[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -439,7 +438,7 @@ int LLNL_FEI_Solver::solveUsingGMRES() if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_GMRES initial rnorm = %e (%e)\n", rnorm, rnorm0); - if ( rnorm0 < 1.0e-20 ) + if ( rnorm0 < 1.0e-20 ) { for (iV = 0; iV <= gmresDim_+1; iV++) delete [] kVectors[iV]; delete [] kVectors; @@ -466,7 +465,7 @@ int LLNL_FEI_Solver::solveUsingGMRES() iter = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { dtemp = 1.0 / rnorm; tVector = kVectors[1]; @@ -474,8 +473,8 @@ int LLNL_FEI_Solver::solveUsingGMRES() RS[1] = rnorm; innerIterations = 0; - while ( innerIterations < gmresDim_ && rnorm >= eps1 && - iter < krylovMaxIterations_ ) + while ( innerIterations < gmresDim_ && rnorm >= eps1 && + iter < krylovMaxIterations_ ) { innerIterations++; iter++; @@ -484,63 +483,63 @@ int LLNL_FEI_Solver::solveUsingGMRES() v1 = kVectors[kStep]; v2 = kVectors[0]; if ( diagonal != NULL ) - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) v2[irow] = v1[irow] * diagonal[irow]; else for (irow = 0; irow < localNRows; irow++) v2[irow] = v1[irow]; - matPtr_->matvec( kVectors[0], kVectors[kp1] ); + matPtr_->matvec( kVectors[0], kVectors[kp1] ); #if 0 tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = 0.0; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector2[irow] * tVector[irow]; dArray[iV-1] = dtemp; } - MPI_Allreduce(dArray, dArray2, kStep, MPI_DOUBLE, MPI_SUM, + MPI_Allreduce(dArray, dArray2, kStep, MPI_DOUBLE, MPI_SUM, mpiComm_); tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = dArray2[iV-1]; - HH[iV][kStep] = dtemp; + HH[iV][kStep] = dtemp; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] -= dtemp * tVector2[irow]; } #else tVector = kVectors[kp1]; - for ( iV = 1; iV <= kStep; iV++ ) + for ( iV = 1; iV <= kStep; iV++ ) { dtemp = 0.0; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector2[irow] * tVector[irow]; dArray[0] = dtemp; MPI_Allreduce(dArray, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); dtemp = dArray2[0]; - HH[iV][kStep] = dtemp; - for ( irow = 0; irow < localNRows; irow++ ) + HH[iV][kStep] = dtemp; + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] -= dtemp * tVector2[irow]; } #endif dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += tVector[irow] * tVector[irow]; MPI_Allreduce(&dtemp, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); dtemp = sqrt(dArray2[0]); HH[kp1][kStep] = dtemp; - if ( dtemp != 0.0 ) + if ( dtemp != 0.0 ) { dtemp = 1.0 / dtemp; for (irow = 0; irow < localNRows; irow++) tVector[irow] *= dtemp; } - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { dtemp = HH[iV-1][kStep]; HH[iV-1][kStep] = C[iV-1] * dtemp + S[iV-1] * HH[iV][kStep]; @@ -553,7 +552,7 @@ int LLNL_FEI_Solver::solveUsingGMRES() S[kStep] = HH[kp1][kStep] / gam; RS[kp1] = -S[kStep] * RS[kStep]; RS[kStep] = C[kStep] * RS[kStep]; - HH[kStep][kStep] = C[kStep] * HH[kStep][kStep] + + HH[kStep][kStep] = C[kStep] * HH[kStep][kStep] + S[kStep] * HH[kp1][kStep]; rnorm = fabs(RS[kp1]); if ( outputLevel_ >= 2 && mypid_ == 0 ) @@ -561,42 +560,42 @@ int LLNL_FEI_Solver::solveUsingGMRES() iter, rnorm); } RS[kStep] = RS[kStep] / HH[kStep][kStep]; - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { iV2 = kStep - iV + 1; dtemp = RS[iV2]; - for ( jV = iV2+1; jV <= kStep; jV++ ) + for ( jV = iV2+1; jV <= kStep; jV++ ) dtemp = dtemp - HH[iV2][jV] * RS[jV]; RS[iV2] = dtemp / HH[iV2][iV2]; } tVector = kVectors[1]; dtemp = RS[1]; for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] *= dtemp; - for ( iV = 2; iV <= kStep; iV++ ) + for ( iV = 2; iV <= kStep; iV++ ) { dtemp = RS[iV]; tVector2 = kVectors[iV]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] += dtemp * tVector2[irow]; } tVector = kVectors[1]; if ( diagonal != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVector[irow] *= diagonal[irow]; } - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) solnVector_[irow] += tVector[irow]; - matPtr_->matvec( solnVector_, tVector ); - for ( irow = 0; irow < localNRows; irow++ ) + matPtr_->matvec( solnVector_, tVector ); + for ( irow = 0; irow < localNRows; irow++ ) tVector[irow] = rhsVector_[irow] - tVector[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += (tVector[irow] * tVector[irow]); MPI_Allreduce(&rnorm, dArray2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt(dArray2[0]); } - if ( rnorm < eps1 ) converged = 1; + if ( rnorm < eps1 ) converged = 1; if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_GMRES : final rnorm = %e\n", rnorm); @@ -620,7 +619,7 @@ int LLNL_FEI_Solver::solveUsingGMRES() } /************************************************************************** - solve linear system using CGS + solve linear system using CGS -------------------------------------------------------------------------*/ int LLNL_FEI_Solver::solveUsingCGS() { @@ -639,16 +638,16 @@ int LLNL_FEI_Solver::solveUsingCGS() diagonal = matPtr_->getMatrixDiagonal(); totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - + /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -660,7 +659,7 @@ int LLNL_FEI_Solver::solveUsingCGS() rnorm = sqrt(dArray2[0]); if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_CGS initial rnorm = %e (%e)\n",rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -690,16 +689,16 @@ int LLNL_FEI_Solver::solveUsingCGS() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 1 ) + while ( converged == 0 && numTrials < 1 ) { innerIteration = 0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter++; innerIteration++; rho1 = rho2; beta2 = beta * beta; - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) { tVec[irow] = beta * qVec[irow]; uVec[irow] = rVec[irow] + tVec[irow]; @@ -707,28 +706,28 @@ int LLNL_FEI_Solver::solveUsingCGS() } if ( diagonal != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVec[irow] = pVec[irow] * diagonal[irow]; } else for (irow = 0; irow < localNRows; irow++) tVec[irow] = pVec[irow]; - matPtr_->matvec( tVec, vVec ); + matPtr_->matvec( tVec, vVec ); sigma = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) sigma += (rhVec[irow] * vVec[irow]); MPI_Allreduce(&sigma, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); sigma = dArray[0]; alpha = rho1 / sigma; - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) { qVec[irow] = uVec[irow] - alpha * vVec[irow]; uVec[irow] += qVec[irow]; } if ( diagonal != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { tVec[irow] = uVec[irow] * diagonal[irow]; solnVector_[irow] += alpha * uVec[irow] * diagonal[irow]; @@ -736,19 +735,19 @@ int LLNL_FEI_Solver::solveUsingCGS() } else { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { tVec[irow] = uVec[irow]; solnVector_[irow] += alpha * uVec[irow]; } } - matPtr_->matvec( tVec, vVec ); + matPtr_->matvec( tVec, vVec ); - for (irow = 0; irow < totalNRows; irow++) + for (irow = 0; irow < totalNRows; irow++) rVec[irow] -= alpha * vVec[irow]; dtemp = dtemp2 = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { dtemp += (rVec[irow] * rhVec[irow]); dtemp2 += (rVec[irow] * rVec[irow]); @@ -763,11 +762,11 @@ int LLNL_FEI_Solver::solveUsingCGS() printf("\tLLNL_FEI_Solver_CGS : iteration %d - rnorm = %e (%e)\n", iter, rnorm, eps1); } - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray[0] ); @@ -795,7 +794,7 @@ int LLNL_FEI_Solver::solveUsingCGS() } /************************************************************************** - solve linear system using Bicgstab + solve linear system using Bicgstab -------------------------------------------------------------------------*/ int LLNL_FEI_Solver::solveUsingBicgstab() { @@ -815,16 +814,16 @@ int LLNL_FEI_Solver::solveUsingBicgstab() diagonal = matPtr_->getMatrixDiagonal(); totalNRows = localNRows + extNRows; rVec = new double[totalNRows]; - + /* ----------------------------------------------------------------- * compute initial residual vector and norm * -----------------------------------------------------------------*/ - - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) + + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm0 = rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rnorm0 += (rVec[irow] * rVec[irow]); rnorm += (rhsVector_[irow] * rhsVector_[irow]); @@ -837,7 +836,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() if ( outputLevel_ >= 2 && mypid_ == 0 ) printf("\tLLNL_FEI_Solver_Bicgstab initial rnorm = %e (%e)\n", rnorm,rnorm0); - if ( rnorm0 == 0.0 ) + if ( rnorm0 == 0.0 ) { delete [] rVec; return 0; @@ -857,7 +856,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() gammapp = new double[blen+1]; mat = new double*[blen+1]; tau = new double*[blen+1]; - for ( iM = 1; iM <= blen; iM++ ) + for ( iM = 1; iM <= blen; iM++ ) { mat[iM] = new double[blen+1]; tau[iM] = new double[blen+1]; @@ -867,7 +866,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() tVec = new double[totalNRows]; utVec = new double*[blen+2]; rtVec = new double*[blen+2]; - for ( iM = 0; iM < blen+2; iM++ ) + for ( iM = 0; iM < blen+2; iM++ ) { utVec[iM] = new double[totalNRows]; rtVec[iM] = new double[totalNRows]; @@ -879,7 +878,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() * loop until convergence is achieved * -----------------------------------------------------------------*/ - while ( converged == 0 && numTrials < 1 ) + while ( converged == 0 && numTrials < 1 ) { innerIteration = 0; for ( irow = 0; irow < localNRows; irow++ ) @@ -890,7 +889,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() } omega = rho = 1.0; alpha = 0.0; - while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) + while ( rnorm >= eps1 && iter < krylovMaxIterations_ ) { iter += blen; innerIteration += blen; @@ -907,76 +906,76 @@ int LLNL_FEI_Solver::solveUsingBicgstab() for ( iM = 0; iM < blen; iM++ ) { dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rhVec[irow] * rtVec[iM+1][irow]); MPI_Allreduce(&dtemp, &rho1, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); beta = alpha * rho1 / rho; rho = rho1; dtemp = -beta; - for ( jM = 0; jM <= iM; jM++ ) - for ( irow = 0; irow < localNRows; irow++ ) - utVec[jM+1][irow] = dtemp * utVec[jM+1][irow] + - rtVec[jM+1][irow]; + for ( jM = 0; jM <= iM; jM++ ) + for ( irow = 0; irow < localNRows; irow++ ) + utVec[jM+1][irow] = dtemp * utVec[jM+1][irow] + + rtVec[jM+1][irow]; if (diagonal != NULL) { - ut1 = utVec[iM+1]; - for (irow = 0; irow < localNRows; irow++) + ut1 = utVec[iM+1]; + for (irow = 0; irow < localNRows; irow++) tVec[irow] = ut1[irow] * diagonal[irow]; } else { - ut1 = utVec[iM+1]; - for (irow = 0; irow < localNRows; irow++) + ut1 = utVec[iM+1]; + for (irow = 0; irow < localNRows; irow++) tVec[irow] = ut1[irow]; } - matPtr_->matvec( tVec, utVec[iM+2] ); + matPtr_->matvec( tVec, utVec[iM+2] ); dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rhVec[irow] * utVec[iM+2][irow]); MPI_Allreduce(&dtemp, &gamma, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); - alpha = rho / gamma; - for ( jM = 0; jM <= iM; jM++ ) - for ( irow = 0; irow < localNRows; irow++ ) - rtVec[jM+1][irow] -= alpha * utVec[jM+2][irow]; + alpha = rho / gamma; + for ( jM = 0; jM <= iM; jM++ ) + for ( irow = 0; irow < localNRows; irow++ ) + rtVec[jM+1][irow] -= alpha * utVec[jM+2][irow]; if ( diagonal != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) tVec[irow] = rtVec[iM+1][irow] * diagonal[irow]; } else { - rt1 = rtVec[iM+1]; - for (irow = 0; irow < localNRows; irow++) + rt1 = rtVec[iM+1]; + for (irow = 0; irow < localNRows; irow++) tVec[irow] = rt1[irow]; } - matPtr_->matvec( tVec, rtVec[iM+2] ); - for (irow = 0; irow < localNRows; irow++) + matPtr_->matvec( tVec, rtVec[iM+2] ); + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += alpha * utVec[1][irow]; } for ( iM = 1; iM <= blen; iM++ ) for ( jM = 1; jM <= blen; jM++ ) mat[iM][jM] = 0.0; for ( iM = 1; iM <= blen; iM++ ) { - for ( jM = 1; jM <= iM-1; jM++ ) + for ( jM = 1; jM <= iM-1; jM++ ) { dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[jM+1][irow] * rtVec[iM+1][irow]); MPI_Allreduce(&dtemp, &dtemp2, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); tau[jM][iM] = dtemp2 / sigma[jM]; mat[jM][iM] = tau[jM][iM] * sigma[jM]; dtemp = -tau[jM][iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[iM+1][irow] += dtemp * rtVec[jM+1][irow]; } dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[iM+1][irow] * rtVec[iM+1][irow]); dArray[0] = dtemp; dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[1][irow] * rtVec[iM+1][irow]); dArray[1] = dtemp; MPI_Allreduce(dArray, dArray2, 2, MPI_DOUBLE, MPI_SUM, mpiComm_); @@ -986,37 +985,37 @@ int LLNL_FEI_Solver::solveUsingBicgstab() } gammanp[blen] = gammap[blen]; omega = gammanp[blen]; - for ( iM = blen-1; iM >= 1; iM-- ) + for ( iM = blen-1; iM >= 1; iM-- ) { gammanp[iM] = gammap[iM]; for (jM=iM+1; jM<=blen; jM++) gammanp[iM] = gammanp[iM] - tau[iM][jM] * gammanp[jM]; } - for (iM=1; iM<=blen-1; iM++) + for (iM=1; iM<=blen-1; iM++) { gammapp[iM] = gammanp[iM+1]; for (jM=iM+1; jM<=blen-1; jM++) gammapp[iM] = gammapp[iM] + tau[iM][jM] * gammanp[jM+1]; } dtemp = gammanp[1]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += dtemp * rtVec[1][irow]; dtemp = - gammap[blen]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[1][irow] += dtemp * rtVec[blen+1][irow]; dtemp = - gammanp[blen]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) utVec[1][irow] += dtemp * utVec[blen+1][irow]; - for (iM=1; iM<=blen-1; iM++) + for (iM=1; iM<=blen-1; iM++) { dtemp = - gammanp[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) utVec[1][irow] += dtemp * utVec[iM+1][irow]; dtemp = gammapp[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) xhVec[irow] += dtemp * rtVec[iM+1][irow]; dtemp = - gammap[iM]; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) rtVec[1][irow] += dtemp * rtVec[iM+1][irow]; } ut1 = utVec[0]; @@ -1027,10 +1026,10 @@ int LLNL_FEI_Solver::solveUsingBicgstab() { ut1[irow] = ut2[irow]; rt1[irow] = rt2[irow]; - solnVector_[irow] = xhVec[irow]; + solnVector_[irow] = xhVec[irow]; } dtemp = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) dtemp += (rtVec[1][irow] * rtVec[1][irow]); MPI_Allreduce(&dtemp, &rnorm, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( rnorm ); @@ -1040,14 +1039,14 @@ int LLNL_FEI_Solver::solveUsingBicgstab() } if ( diagonal != NULL ) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) solnVector_[irow] *= diagonal[irow]; } - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; MPI_Allreduce(&rnorm, dArray, 1, MPI_DOUBLE, MPI_SUM, mpiComm_); rnorm = sqrt( dArray[0] ); @@ -1069,7 +1068,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() delete [] gammap; delete [] gammanp; delete [] gammapp; - for ( iM = 1; iM <= blen; iM++ ) + for ( iM = 1; iM <= blen; iM++ ) { delete [] mat[iM]; delete [] tau[iM]; @@ -1080,7 +1079,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() delete [] rhVec; delete [] xhVec; delete [] tVec; - for ( iM = 0; iM < blen+2; iM++ ) + for ( iM = 0; iM < blen+2; iM++ ) { delete [] utVec[iM]; delete [] rtVec[iM]; @@ -1092,7 +1091,7 @@ int LLNL_FEI_Solver::solveUsingBicgstab() } /************************************************************************** - solve linear system using SuperLU + solve linear system using SuperLU -------------------------------------------------------------------------*/ int LLNL_FEI_Solver::solveUsingSuperLU() { @@ -1115,7 +1114,7 @@ int LLNL_FEI_Solver::solveUsingSuperLU() /* --------------------------------------------------------------- * conversion from CSR to CSC * -------------------------------------------------------------*/ - + matPtr_->getLocalMatrix(&localNRows,&diagIA,&diagJA,&diagAA); countArray = new int[localNRows]; for ( irow = 0; irow < localNRows; irow++ ) countArray[irow] = 0; @@ -1156,8 +1155,8 @@ int LLNL_FEI_Solver::solveUsingSuperLU() * make SuperMatrix * -------------------------------------------------------------*/ - dCreate_CompCol_Matrix(&superLU_Amat, localNRows, localNRows, - cscJA[localNRows], cscAA, cscIA, cscJA, SLU_NC, + dCreate_CompCol_Matrix(&superLU_Amat, localNRows, localNRows, + cscJA[localNRows], cscAA, cscIA, cscJA, SLU_NC, SLU_D, SLU_GE); etree = new int[localNRows]; permC = new int[localNRows]; @@ -1190,9 +1189,9 @@ int LLNL_FEI_Solver::solveUsingSuperLU() * create a SuperLU dense matrix from right hand side * -----------------------------------------------------------*/ - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) solnVector_[irow] = rhsVector_[irow]; - dCreate_Dense_Matrix(&B, localNRows, 1, solnVector_, localNRows, + dCreate_Dense_Matrix(&B, localNRows, 1, solnVector_, localNRows, SLU_DN, SLU_D, SLU_GE); /* ------------------------------------------------------------- @@ -1200,14 +1199,14 @@ int LLNL_FEI_Solver::solveUsingSuperLU() * -----------------------------------------------------------*/ trans = NOTRANS; - dgstrs (trans, &superLU_Lmat, &superLU_Umat, permC, permR, &B, + dgstrs (trans, &superLU_Lmat, &superLU_Umat, permC, permR, &B, &slu_stat, &info); rVec = new double[localNRows]; - matPtr_->matvec( solnVector_, rVec ); - for ( irow = 0; irow < localNRows; irow++ ) - rVec[irow] = rhsVector_[irow] - rVec[irow]; + matPtr_->matvec( solnVector_, rVec ); + for ( irow = 0; irow < localNRows; irow++ ) + rVec[irow] = rhsVector_[irow] - rVec[irow]; rnorm = 0.0; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) rnorm += rVec[irow] * rVec[irow]; rnorm = sqrt( rnorm ); if ( outputLevel_ >= 2 && mypid_ == 0 ) @@ -1235,4 +1234,4 @@ int LLNL_FEI_Solver::solveUsingSuperLU() return (1); #endif } - + diff --git a/src/FEI_mv/fei-hypre/Makefile b/src/FEI_mv/fei-hypre/Makefile index 476679413..4da2a8892 100644 --- a/src/FEI_mv/fei-hypre/Makefile +++ b/src/FEI_mv/fei-hypre/Makefile @@ -26,6 +26,7 @@ C_COMPILE_FLAGS = \ -I$(srcdir)/../../utilities\ -I$(srcdir)/../../multivector\ -I$(srcdir)/../../krylov\ + -I$(srcdir)/../../parcsr_block_mv\ -I$(srcdir)/../../parcsr_mv\ -I$(srcdir)/../../parcsr_ls\ -I$(srcdir)/../../seq_mv\ @@ -46,6 +47,7 @@ CXX_COMPILE_FLAGS = \ -I$(srcdir)/../../utilities\ -I$(srcdir)/../../multivector\ -I$(srcdir)/../../krylov\ + -I$(srcdir)/../../parcsr_block_mv\ -I$(srcdir)/../../parcsr_mv\ -I$(srcdir)/../../parcsr_ls\ -I$(srcdir)/../../seq_mv\ diff --git a/src/FEI_mv/fei-hypre/driver.C b/src/FEI_mv/fei-hypre/driver.C index a155407e5..aa94c0542 100644 --- a/src/FEI_mv/fei-hypre/driver.C +++ b/src/FEI_mv/fei-hypre/driver.C @@ -16,7 +16,6 @@ #include #include #include -#include //************************************************************************** // HYPRE includes @@ -38,14 +37,14 @@ void fei_hypre_domaindecomposition(int, char **); void fei_hypre_test(int, char **); extern "C" { -int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, +int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, HYPRE_ParVector b_csr ); -void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, +void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, int **ja, int *N, double **rhs, char *matfile, char *rhsfile); } //*************************************************************************** -// main program +// main program //*************************************************************************** main(int argc, char *argv[]) @@ -54,7 +53,7 @@ main(int argc, char *argv[]) } //*************************************************************************** -// a test program +// a test program //*************************************************************************** void fei_hypre_test(int argc, char *argv[]) @@ -126,7 +125,7 @@ void fei_hypre_test(int argc, char *argv[]) rowLengths = new int[local_nrows]; colIndices = new int*[local_nrows]; - for ( i = mybegin; i < myend+1; i++ ) + for ( i = mybegin; i < myend+1; i++ ) { ncnt = ia[i+1] - ia[i]; rowLengths[i-mybegin] = ncnt; @@ -142,7 +141,7 @@ void fei_hypre_test(int argc, char *argv[]) delete [] rowLengths; //------------------------------------------------------------------ - // load the matrix + // load the matrix //------------------------------------------------------------------ for ( i = mybegin; i <= myend; i++ ) { @@ -154,12 +153,12 @@ void fei_hypre_test(int argc, char *argv[]) free( ia ); free( ja ); free( val ); - + //------------------------------------------------------------------ - // load the right hand side + // load the right hand side //------------------------------------------------------------------ - for ( i = mybegin; i <= myend; i++ ) + for ( i = mybegin; i <= myend; i++ ) { index = i; H.sumIntoRHSVector(1, &rhs[i], &index); @@ -256,20 +255,20 @@ void fei_hypre_test(int argc, char *argv[]) for ( i = H.localStartRow_; i <= H.localEndRow_; i++ ) { H.putInitialGuess(&i, &ddata, 1); - } + } H.launchSolver(status, iterations); ddata = 0.0; for ( i = H.localStartRow_; i <= H.localEndRow_; i++ ) { H.putInitialGuess(&i, &ddata, 1); - } + } H.launchSolver(status, iterations); */ if ( status != 1 ) { printf("%4d : HYPRE_LinSysCore : solve unsuccessful.\n", my_rank); - } + } else if ( my_rank == 0 ) { printf("HYPRE_LinSysCore : solve successful.\n", my_rank); @@ -287,7 +286,7 @@ void fei_hypre_test(int argc, char *argv[]) } //------------------------------------------------------------------ - // clean up + // clean up //------------------------------------------------------------------ MPI_Finalize(); @@ -378,7 +377,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) rowLengths = new int[local_nrows]; colIndices = new int*[local_nrows]; - for ( i = myBegin; i < myEnd+1; i++ ) + for ( i = myBegin; i < myEnd+1; i++ ) { ncnt = ia[i+1] - ia[i]; rowLengths[i-myBegin] = ncnt; @@ -393,7 +392,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) delete [] colIndices; delete [] rowLengths; - for ( i = myBegin; i <= myEnd; i++ ) + for ( i = myBegin; i <= myEnd; i++ ) { ncnt = ia[i+1] - ia[i]; index = i + 1; @@ -403,12 +402,12 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) free( ia ); free( ja ); free( val ); - + //****************************************************************** - // load the right hand side + // load the right hand side //------------------------------------------------------------------ - for ( i = myBegin; i <= myEnd; i++ ) + for ( i = myBegin; i <= myEnd; i++ ) { index = i + 1; H.sumIntoRHSVector(1, &rhs[i], &index); @@ -430,7 +429,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) //------------------------------------------------------------------ HYPRE_LSI_DDAMGSolve(A_csr,x_csr,b_csr); - + MPI_Finalize(); } diff --git a/src/FEI_mv/fei-hypre/driver.cxx b/src/FEI_mv/fei-hypre/driver.cxx index 6a3dfc49c..1fad7d415 100644 --- a/src/FEI_mv/fei-hypre/driver.cxx +++ b/src/FEI_mv/fei-hypre/driver.cxx @@ -16,7 +16,6 @@ #include #include #include -#include //************************************************************************** // HYPRE includes @@ -38,14 +37,14 @@ void fei_hypre_domaindecomposition(int, char **); void fei_hypre_test(int, char **); extern "C" { -int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, +int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, HYPRE_ParVector b_csr ); -void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, +void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, int **ja, int *N, double **rhs, char *matfile, char *rhsfile); } //*************************************************************************** -// main program +// main program //*************************************************************************** main(int argc, char *argv[]) @@ -54,7 +53,7 @@ main(int argc, char *argv[]) } //*************************************************************************** -// a test program +// a test program //*************************************************************************** void fei_hypre_test(int argc, char *argv[]) @@ -126,7 +125,7 @@ void fei_hypre_test(int argc, char *argv[]) rowLengths = new int[local_nrows]; colIndices = new int*[local_nrows]; - for ( i = mybegin; i < myend+1; i++ ) + for ( i = mybegin; i < myend+1; i++ ) { ncnt = ia[i+1] - ia[i]; rowLengths[i-mybegin] = ncnt; @@ -142,7 +141,7 @@ void fei_hypre_test(int argc, char *argv[]) delete [] rowLengths; //------------------------------------------------------------------ - // load the matrix + // load the matrix //------------------------------------------------------------------ for ( i = mybegin; i <= myend; i++ ) { @@ -154,12 +153,12 @@ void fei_hypre_test(int argc, char *argv[]) free( ia ); free( ja ); free( val ); - + //------------------------------------------------------------------ - // load the right hand side + // load the right hand side //------------------------------------------------------------------ - for ( i = mybegin; i <= myend; i++ ) + for ( i = mybegin; i <= myend; i++ ) { index = i; H.sumIntoRHSVector(1, &rhs[i], &index); @@ -256,20 +255,20 @@ void fei_hypre_test(int argc, char *argv[]) for ( i = H.localStartRow_; i <= H.localEndRow_; i++ ) { H.putInitialGuess(&i, &ddata, 1); - } + } H.launchSolver(status, iterations); ddata = 0.0; for ( i = H.localStartRow_; i <= H.localEndRow_; i++ ) { H.putInitialGuess(&i, &ddata, 1); - } + } H.launchSolver(status, iterations); */ if ( status != 1 ) { printf("%4d : HYPRE_LinSysCore : solve unsuccessful.\n", my_rank); - } + } else if ( my_rank == 0 ) { printf("HYPRE_LinSysCore : solve successful.\n", my_rank); @@ -287,7 +286,7 @@ void fei_hypre_test(int argc, char *argv[]) } //------------------------------------------------------------------ - // clean up + // clean up //------------------------------------------------------------------ MPI_Finalize(); @@ -378,7 +377,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) rowLengths = new int[local_nrows]; colIndices = new int*[local_nrows]; - for ( i = myBegin; i < myEnd+1; i++ ) + for ( i = myBegin; i < myEnd+1; i++ ) { ncnt = ia[i+1] - ia[i]; rowLengths[i-myBegin] = ncnt; @@ -393,7 +392,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) delete [] colIndices; delete [] rowLengths; - for ( i = myBegin; i <= myEnd; i++ ) + for ( i = myBegin; i <= myEnd; i++ ) { ncnt = ia[i+1] - ia[i]; index = i + 1; @@ -403,12 +402,12 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) free( ia ); free( ja ); free( val ); - + //****************************************************************** - // load the right hand side + // load the right hand side //------------------------------------------------------------------ - for ( i = myBegin; i <= myEnd; i++ ) + for ( i = myBegin; i <= myEnd; i++ ) { index = i + 1; H.sumIntoRHSVector(1, &rhs[i], &index); @@ -430,7 +429,7 @@ void fei_hypre_domaindecomposition(int argc, char *argv[]) //------------------------------------------------------------------ HYPRE_LSI_DDAMGSolve(A_csr,x_csr,b_csr); - + MPI_Finalize(); } diff --git a/src/FEI_mv/fei-hypre/hypre_lsi_amge.c b/src/FEI_mv/fei-hypre/hypre_lsi_amge.c index 764151da9..4360b22ac 100644 --- a/src/FEI_mv/fei-hypre/hypre_lsi_amge.c +++ b/src/FEI_mv/fei-hypre/hypre_lsi_amge.c @@ -23,12 +23,12 @@ #include "seq_mv/csr_matrix.h" extern int hypre_AMGeMatrixTopologySetup(hypre_AMGeMatrixTopology ***A, - int *level, int *i_element_node_0, int *j_element_node_0, + int *level, int *i_element_node_0, int *j_element_node_0, int num_elements, int num_nodes, int Max_level); -extern int hypre_AMGeCoarsenodeSetup(hypre_AMGeMatrixTopology **A, int *level, - int **i_node_neighbor_coarsenode, int **j_node_neighbor_coarsenode, - int **i_node_coarsenode, int **j_node_coarsenode, - int **i_block_node, int **j_block_node, int *Num_blocks, +extern int hypre_AMGeCoarsenodeSetup(hypre_AMGeMatrixTopology **A, int *level, + int **i_node_neighbor_coarsenode, int **j_node_neighbor_coarsenode, + int **i_node_coarsenode, int **j_node_coarsenode, + int **i_block_node, int **j_block_node, int *Num_blocks, int *Num_elements, int *Num_nodes); /* ********************************************************************* */ @@ -77,14 +77,14 @@ int HYPRE_LSI_AMGeDestroy() int i; printf("LSI_AMGe destructor\n"); - if ( i_element_node_0 != NULL ) free( i_element_node_0 ); - if ( j_element_node_0 != NULL ) free( j_element_node_0 ); - if ( i_dof_on_boundary != NULL ) free( i_dof_on_boundary ); - if ( temp_elem_node_cnt != NULL ) free( temp_elem_node_cnt ); + hypre_TFree(i_element_node_0, HYPRE_MEMORY_HOST); + hypre_TFree(j_element_node_0, HYPRE_MEMORY_HOST); + hypre_TFree(i_dof_on_boundary, HYPRE_MEMORY_HOST); + hypre_TFree(temp_elem_node_cnt, HYPRE_MEMORY_HOST); for ( i = 0; i < num_elements; i++ ) { - if ( temp_elem_node[i] != NULL ) free( temp_elem_node[i] ); - if ( temp_elem_data[i] != NULL ) free( temp_elem_data[i] ); + hypre_TFree(temp_elem_node[i], HYPRE_MEMORY_HOST); + hypre_TFree(temp_elem_data[i], HYPRE_MEMORY_HOST); } temp_elem_node = NULL; temp_elem_node_cnt = NULL; @@ -151,9 +151,9 @@ int HYPRE_LSI_AMGeSetBoundary(int size, int *list) i_dof_on_boundary = hypre_TAlloc(int, num_nodes * system_size , HYPRE_MEMORY_HOST); for ( i = 0; i < num_nodes*system_size; i++ ) i_dof_on_boundary[i] = -1; - for ( i = 0; i < size; i++ ) + for ( i = 0; i < size; i++ ) { - if (list[i] >= 0 && list[i] < num_nodes*system_size) + if (list[i] >= 0 && list[i] < num_nodes*system_size) i_dof_on_boundary[list[i]] = 0; else printf("AMGeSetBoundary ERROR : %d(%d)\n", list[i],num_nodes*system_size); } @@ -179,14 +179,14 @@ int HYPRE_LSI_AMGePutRow(int row, int length, const double *colVal, temp_elem_node_cnt[element_count] = length / system_size; nbytes = length / system_size * sizeof(int); temp_elem_node[element_count] = hypre_TAlloc( nbytes ,HYPRE_MEMORY_HOST); - for ( i = 0; i < length; i+=system_size ) + for ( i = 0; i < length; i+=system_size ) temp_elem_node[element_count][i/system_size] = (colInd[i]-1)/system_size; nbytes = length * length * sizeof(double); temp_elem_data[element_count] = hypre_TAlloc(nbytes,HYPRE_MEMORY_HOST); temp_elemat_cnt = 0; rowLeng = length; } - for ( i = 0; i < length; i++ ) + for ( i = 0; i < length; i++ ) temp_elem_data[element_count][temp_elemat_cnt++] = colVal[i]; if ( temp_elemat_cnt == rowLeng * rowLeng ) { @@ -296,9 +296,9 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) multiplier *= multiplier; for ( j = 0; j < multiplier; j++ ) element_data[counter++] = temp_elem_data[i][j]; - free(temp_elem_data[i]); - } - free(temp_elem_data); + hypre_TFree(temp_elem_data[i], HYPRE_MEMORY_HOST); + } + hypre_TFree(temp_elem_data, HYPRE_MEMORY_HOST); temp_elem_data = NULL; total_length = 0; @@ -306,15 +306,15 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) i_element_node_0 = hypre_TAlloc(int, (num_elements + 1) , HYPRE_MEMORY_HOST); j_element_node_0 = hypre_TAlloc(int, total_length , HYPRE_MEMORY_HOST); counter = 0; - for (i = 0; i < num_elements; i++) + for (i = 0; i < num_elements; i++) { i_element_node_0[i] = counter; - for (j = 0; j < temp_elem_node_cnt[i]; j++) + for (j = 0; j < temp_elem_node_cnt[i]; j++) j_element_node_0[counter++] = temp_elem_node[i][j]; - free(temp_elem_node[i]); - } + hypre_TFree(temp_elem_node[i], HYPRE_MEMORY_HOST); + } i_element_node_0[num_elements] = counter; - free(temp_elem_node); + hypre_TFree(temp_elem_node, HYPRE_MEMORY_HOST); temp_elem_node = NULL; /* -------------------------------------------------------------- */ @@ -376,7 +376,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) { ierr = compute_dof_on_boundary(&i_dof_on_boundary, i_node_on_boundary, Num_nodes[0], system_size); - free(i_node_on_boundary); + hypre_TFree(i_node_on_boundary, HYPRE_MEMORY_HOST); i_node_on_boundary = NULL; } */ @@ -469,7 +469,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) /* one V(1,1) --cycle as preconditioner in PCG: ======================== */ /* ILU solve pre--smoothing, ILU solve post--smoothing; ================ */ - w = hypre_CTAlloc(double*, level+1, HYPRE_MEMORY_HOST); + w = hypre_CTAlloc(double*, level+1, HYPRE_MEMORY_HOST); d = hypre_CTAlloc(double*, level+1, HYPRE_MEMORY_HOST); for (l=0; l < level+1; l++) @@ -492,7 +492,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) /*x = hypre_CTAlloc(double, num_dofs); */ /*rhs = hypre_CTAlloc(double, num_dofs);*/ - r = hypre_CTAlloc(double, num_dofs, HYPRE_MEMORY_HOST); + r = hypre_CTAlloc(double, num_dofs, HYPRE_MEMORY_HOST); aux = hypre_CTAlloc(double, num_dofs, HYPRE_MEMORY_HOST); v_fine = hypre_CTAlloc(double, num_dofs, HYPRE_MEMORY_HOST); w_fine = hypre_CTAlloc(double, num_dofs, HYPRE_MEMORY_HOST); @@ -508,7 +508,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) printf("\n\n=======================================================\n"); printf(" Testing level[%d] PCG solve: \n",l); printf("===========================================================\n"); - + for (i=0; i < Num_dofs[l]; i++) x[i] = 0.e0; /* for (i=0; i < Num_dofs[l]; i++) rhs[i] = rand(); */ @@ -529,7 +529,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) printf("\n\n=======================================================\n"); printf(" END test PCG solve: \n"); printf("===========================================================\n"); - + } printf("\n\n===============================================================\n"); @@ -539,11 +539,11 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) num_dofs = Num_dofs[0]; /* for (i=0; i < num_dofs; i++) rhs[i] = rand(); */ - + ierr = hypre_VcycleILUpcg(x, rhs, w, d, &reduction_factor, Matrix, i_ILUdof_to_dof, i_ILUdof_ILUdof, j_ILUdof_ILUdof, LD_data, - i_ILUdof_ILUdof_t, j_ILUdof_ILUdof_t, U_data, P, aux, r, - v_fine, w_fine, d_fine, max_iter, v_coarse, w_coarse, d_coarse, + i_ILUdof_ILUdof_t, j_ILUdof_ILUdof_t, U_data, P, aux, r, + v_fine, w_fine, d_fine, max_iter, v_coarse, w_coarse, d_coarse, nu, level, coarse_level, Num_dofs); /* hypre_TFree(x); */ @@ -573,7 +573,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) hypre_CSRMatrixI(P[l]) = NULL; hypre_CSRMatrixJ(P[l]) = NULL; } - + } for (l=0; l < level; l++) { @@ -632,7 +632,7 @@ int HYPRE_LSI_AMGeSolve(double *rhs, double *x) hypre_TFree(i_node_neighbor_coarsenode, HYPRE_MEMORY_HOST); hypre_TFree(j_node_neighbor_coarsenode, HYPRE_MEMORY_HOST); - free(element_data); + hypre_TFree(element_data, HYPRE_MEMORY_HOST); return 0; } @@ -658,24 +658,24 @@ int HYPRE_LSI_AMGeWriteToFile() fprintf(fp, "\n"); } fprintf(fp, "\n"); - } + } fclose(fp); fp = fopen("elem_node", "w"); - + fprintf(fp, "%d %d\n", element_count, num_nodes); - for (i = 0; i < element_count; i++) + for (i = 0; i < element_count; i++) { - for (j = 0; j < temp_elem_node_cnt[i]; j++) + for (j = 0; j < temp_elem_node_cnt[i]; j++) fprintf(fp, "%d ", temp_elem_node[i][j]+1); fprintf(fp,"\n"); - } + } fclose(fp); fp = fopen("node_bc", "w"); - for (i = 0; i < num_nodes*system_size; i++) + for (i = 0; i < num_nodes*system_size; i++) { fprintf(fp, "%d\n", i_dof_on_boundary[i]); } diff --git a/src/FEI_mv/fei-hypre/hypre_lsi_ddamg.c b/src/FEI_mv/fei-hypre/hypre_lsi_ddamg.c index 4e5e0c44f..f0931b9c6 100644 --- a/src/FEI_mv/fei-hypre/hypre_lsi_ddamg.c +++ b/src/FEI_mv/fei-hypre/hypre_lsi_ddamg.c @@ -9,7 +9,6 @@ #include #include #include -#include #define habs(x) ((x > 0 ) ? x : -(x)) @@ -56,7 +55,7 @@ int interior_nrows, *offRowLengths; int **offColInd; int *remap_array; double **offColVal; -MPI_Comm parComm; +MPI_Comm parComm; HYPRE_Solver cSolver; HYPRE_Solver cPrecon; @@ -65,7 +64,7 @@ HYPRE_Solver cPrecon; /* [E_ob] vb */ /***************************************************************************/ -int HYPRE_LocalAMGSolve(HYPRE_Solver solver, HYPRE_ParVector x_csr, +int HYPRE_LocalAMGSolve(HYPRE_Solver solver, HYPRE_ParVector x_csr, HYPRE_ParVector y_csr ) { int i, local_nrows, *temp_list; @@ -102,14 +101,14 @@ int HYPRE_LocalAMGSolve(HYPRE_Solver solver, HYPRE_ParVector x_csr, temp_list = hypre_TAlloc(int, interior_nrows , HYPRE_MEMORY_HOST); temp_vect = hypre_TAlloc(double, interior_nrows , HYPRE_MEMORY_HOST); for (i = 0; i < interior_nrows; i++) temp_list[i] = i; - for (i = 0; i < local_nrows; i++) + for (i = 0; i < local_nrows; i++) { if (remap_array[i] >= 0) temp_vect[remap_array[i]] = x_par_data[i]; } HYPRE_IJVectorSetValues(localb,interior_nrows,(const int *) temp_list, temp_vect); - free( temp_list ); - free( temp_vect ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); + hypre_TFree(temp_vect, HYPRE_MEMORY_HOST); /* --------------------------------------------------------*/ /* perform one cycle of AMG to subdomain (internal nodes) */ @@ -128,7 +127,7 @@ int HYPRE_LocalAMGSolve(HYPRE_Solver solver, HYPRE_ParVector x_csr, Lx_par = (hypre_ParVector *) Lx_csr; Lx_local = hypre_ParVectorLocalVector(Lx_par); Lx_data = hypre_VectorData(Lx_local); - for (i = 0; i < local_nrows; i++) + for (i = 0; i < local_nrows; i++) { if (remap_array[i] >= 0) y_par_data[i] = Lx_data[remap_array[i]]; } @@ -140,7 +139,7 @@ int HYPRE_LocalAMGSolve(HYPRE_Solver solver, HYPRE_ParVector x_csr, /* [E_ob] vb */ /***************************************************************************/ -int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, +int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, HYPRE_ParVector y_csr ) { int i, j, index, local_nrows, global_nrows, *temp_list; @@ -181,7 +180,7 @@ int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, /* --------------------------------------------------------*/ index = 0; - for (i = 0; i < local_nrows; i++) + for (i = 0; i < local_nrows; i++) { if ( remap_array[i] < 0 ) y_par_data[i] = x_par_data[index++]; else y_par_data[i] = 0.0; @@ -194,21 +193,21 @@ int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, temp_list = hypre_TAlloc(int, interior_nrows , HYPRE_MEMORY_HOST); temp_vect = hypre_TAlloc(double, interior_nrows , HYPRE_MEMORY_HOST); for (i = 0; i < interior_nrows; i++) temp_list[i] = i; - for (i = 0; i < local_nrows; i++) + for (i = 0; i < local_nrows; i++) { - if (remap_array[i] >= 0 && remap_array[i] < interior_nrows) + if (remap_array[i] >= 0 && remap_array[i] < interior_nrows) { temp_vect[remap_array[i]] = 0.0; - for (j = 0; j < offRowLengths[i]; j++) - temp_vect[remap_array[i]] += + for (j = 0; j < offRowLengths[i]; j++) + temp_vect[remap_array[i]] += (offColVal[i][j] * y_par_data[offColInd[i][j]]); - } else if ( remap_array[i] >= interior_nrows) + } else if ( remap_array[i] >= interior_nrows) printf("WARNING : index out of range.\n"); } HYPRE_IJVectorSetValues(localb,interior_nrows,(const int*) temp_list, temp_vect); - free( temp_list ); - free( temp_vect ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); + hypre_TFree(temp_vect, HYPRE_MEMORY_HOST); /* --------------------------------------------------------*/ /* perform one cycle of AMG to subdomain (internal nodes) */ @@ -226,7 +225,7 @@ int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, Lx_par = (hypre_ParVector *) Lx_csr; Lx_local = hypre_ParVectorLocalVector(Lx_par); Lx_data = hypre_VectorData(Lx_local); - for (i=0; i= 0) y_par_data[i] = -Lx_data[remap_array[i]]; } @@ -237,7 +236,7 @@ int HYPRE_ApplyExtension(HYPRE_Solver solver, HYPRE_ParVector x_csr, /* Apply [I E_ob^T] v */ /***************************************************************************/ -int HYPRE_ApplyExtensionTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, +int HYPRE_ApplyExtensionTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, HYPRE_ParVector y_csr ) { int i, j, index, local_nrows, global_nrows, *temp_list; @@ -298,15 +297,15 @@ int HYPRE_ApplyExtensionTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, temp_list = hypre_TAlloc(int, interior_nrows , HYPRE_MEMORY_HOST); temp_vect = hypre_TAlloc(double, interior_nrows , HYPRE_MEMORY_HOST); for (i=0; i= 0 && remap_array[i] < interior_nrows) + if (remap_array[i] >= 0 && remap_array[i] < interior_nrows) temp_vect[remap_array[i]] = x_par_data[i]; } HYPRE_IJVectorSetValues(localb,interior_nrows,(const int*) temp_list, temp_vect); - free( temp_list ); - free( temp_vect ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); + hypre_TFree(temp_vect, HYPRE_MEMORY_HOST); /* --------------------------------------------------------*/ /* perform one cycle of AMG to subdomain (internal nodes) */ @@ -325,11 +324,11 @@ int HYPRE_ApplyExtensionTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, Lx_par = (hypre_ParVector *) Lx_csr; Lx_local = hypre_ParVectorLocalVector(Lx_par); Lx_data = hypre_VectorData(Lx_local); - for (i=0; i= 0 ) + if ( remap_array[i] >= 0 ) { - for (j=0; j= 0 && remap_array[i] < interior_nrows) + if ( remap_array[i] >= 0 && remap_array[i] < interior_nrows) { temp_vect[remap_array[i]] = 0.0; - for (j = 0; j < offRowLengths[i]; j++) - temp_vect[remap_array[i]] += + for (j = 0; j < offRowLengths[i]; j++) + temp_vect[remap_array[i]] += (offColVal[i][j] * x_par_data[offColInd[i][j]]); - } else if ( remap_array[i] >= interior_nrows) + } else if ( remap_array[i] >= interior_nrows) printf("WARNING : index out of range.\n"); } HYPRE_IJVectorSetValues(localb,interior_nrows,(const int*) temp_list, temp_vect); - free( temp_list ); - free( temp_vect ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); + hypre_TFree(temp_vect, HYPRE_MEMORY_HOST); /* --------------------------------------------------------*/ /* perform one cycle of AMG to subdomain (internal nodes) */ @@ -442,7 +441,7 @@ int HYPRE_ApplyTransform( HYPRE_Solver solver, HYPRE_ParVector x_csr, Lx_par = (hypre_ParVector *) Lx_csr; Lx_local = hypre_ParVectorLocalVector(Lx_par); Lx_data = hypre_VectorData(Lx_local); - for (i=0; i= 0) y_par_data[i] -= Lx_data[remap_array[i]]; } @@ -453,7 +452,7 @@ int HYPRE_ApplyTransform( HYPRE_Solver solver, HYPRE_ParVector x_csr, /* Apply E^T to an incoming vector */ /***************************************************************************/ -int HYPRE_ApplyTransformTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, +int HYPRE_ApplyTransformTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, HYPRE_ParVector y_csr ) { int i, j, index, local_nrows, *temp_list; @@ -501,15 +500,15 @@ int HYPRE_ApplyTransformTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, temp_list = hypre_TAlloc(int, interior_nrows , HYPRE_MEMORY_HOST); temp_vect = hypre_TAlloc(double, interior_nrows , HYPRE_MEMORY_HOST); for (i=0; i= 0 && remap_array[i] < interior_nrows) + if (remap_array[i] >= 0 && remap_array[i] < interior_nrows) temp_vect[remap_array[i]] = x_par_data[i]; } HYPRE_IJVectorSetValues(localb,interior_nrows,(const int*) temp_list, temp_vect); - free( temp_list ); - free( temp_vect ); + hypre_TFree(temp_list, HYPRE_MEMORY_HOST); + hypre_TFree(temp_vect, HYPRE_MEMORY_HOST); /* --------------------------------------------------------*/ /* perform one cycle of AMG to subdomain (internal nodes) */ @@ -528,11 +527,11 @@ int HYPRE_ApplyTransformTranspose(HYPRE_Solver solver, HYPRE_ParVector x_csr, Lx_par = (hypre_ParVector *) Lx_csr; Lx_local = hypre_ParVectorLocalVector(Lx_par); Lx_data = hypre_VectorData(Lx_local); - for (i=0; i= 0 ) + if ( remap_array[i] >= 0 ) { - for (j=0; j myEnd ) + if ( colInd[j] < myBegin || colInd[j] > myEnd ) {remap_array[i-myBegin] = -1; break;} HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); } interior_nrows = 0; - for ( i = 0; i < local_nrows; i++ ) + for ( i = 0; i < local_nrows; i++ ) if ( remap_array[i] == 0 ) remap_array[i] = interior_nrows++; /* --------------------------------------------------------*/ @@ -965,14 +966,14 @@ int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); for ( j = 0; j < rowSize; j++ ) { - if ( colInd[j] >= myBegin && colInd[j] <= myEnd ) + if ( colInd[j] >= myBegin && colInd[j] <= myEnd ) { if (remap_array[colInd[j]-myBegin] >= 0) rowLengths[rowCnt]++; else offRowLengths[i-myBegin]++; } } nnz += rowLengths[rowCnt]; - maxRowSize = (rowLengths[rowCnt] > maxRowSize) ? + maxRowSize = (rowLengths[rowCnt] > maxRowSize) ? rowLengths[rowCnt] : maxRowSize; HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); rowCnt++; @@ -1008,7 +1009,7 @@ int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, k = 0; for ( j = 0; j < rowSize; j++ ) { - if ( colInd[j] >= myBegin && colInd[j] <= myEnd ) + if ( colInd[j] >= myBegin && colInd[j] <= myEnd ) { if ( remap_array[colInd[j]-myBegin] >= 0 ) { @@ -1029,8 +1030,8 @@ int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, rowCnt++; } } - free( newColInd ); - free( newColVal ); + hypre_TFree(newColInd , HYPRE_MEMORY_HOST); + hypre_TFree(newColVal , HYPRE_MEMORY_HOST); HYPRE_IJMatrixAssemble(localA); /* --------------------------------------------------------*/ @@ -1073,7 +1074,7 @@ int HYPRE_LSI_DDAMGSolve(HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector x_csr, /* diagnostics */ /* --------------------------------------------------------*/ -/* small code to check symmetry +/* small code to check symmetry HYPRE_ParVectorSetRandomValues( x_csr, 10345 ); HYPRE_ParVectorSetRandomValues( b_csr, 24893 ); HYPRE_DDAMGSolve( SeqPrecon, A_csr, x_csr, r_csr); @@ -1096,8 +1097,8 @@ printf("CHECK 2 = %e\n", ddata); myBegin_int = 0; for (i = 0; i < myRank; i++) myBegin_int += itemp_vec2[i]; myEnd_int = myBegin_int + local_intface_nrows - 1; - free( itemp_vec ); - free( itemp_vec2 ); + hypre_TFree(itemp_vec, HYPRE_MEMORY_HOST); + hypre_TFree(itemp_vec2, HYPRE_MEMORY_HOST); HYPRE_IJVectorCreate(parComm, myBegin_int, myEnd_int, &tvec); HYPRE_IJVectorSetObjectType(tvec, HYPRE_PARCSR); @@ -1120,7 +1121,7 @@ printf("CHECK 2 = %e\n", ddata); /* for ( i = 0; i < global_intface_nrows; i++ ) - { + { MPI_Barrier(MPI_COMM_WORLD); HYPRE_IJVectorZeroLocalComponents(tvec); if ( i >= myBegin_int && i <= myEnd_int ) @@ -1140,7 +1141,7 @@ printf("CHECK 2 = %e\n", ddata); /* --------------------------------------------------------*/ HYPRE_ParCSRGMRESCreate(parComm, &PSolver); - HYPRE_ParCSRGMRESSetPrecond(PSolver,HYPRE_DDAMGSolve,HYPRE_DummySetup, + HYPRE_ParCSRGMRESSetPrecond(PSolver,HYPRE_DDAMGSolve,HYPRE_DummySetup, SeqPrecon); HYPRE_ParCSRGMRESSetKDim(PSolver, 100); HYPRE_ParCSRGMRESSetMaxIter(PSolver, 100); @@ -1149,7 +1150,7 @@ printf("CHECK 2 = %e\n", ddata); HYPRE_ParCSRGMRESSolve(PSolver, A_csr, b_csr, x_csr); HYPRE_ParCSRGMRESGetNumIterations(PSolver, &num_iterations); /*HYPRE_ParCSRPCGCreate(parComm, &PSolver); - HYPRE_ParCSRPCGSetPrecond(PSolver,HYPRE_DDAMGSolve,HYPRE_DummySetup, + HYPRE_ParCSRPCGSetPrecond(PSolver,HYPRE_DDAMGSolve,HYPRE_DummySetup, SeqPrecon); HYPRE_ParCSRPCGSetMaxIter(PSolver, 100); HYPRE_ParCSRPCGSetTol(PSolver, 1.0E-8); diff --git a/src/FEI_mv/fei-hypre/hypre_lsi_misc.c b/src/FEI_mv/fei-hypre/hypre_lsi_misc.c index 0a0da4875..d4c0e24af 100644 --- a/src/FEI_mv/fei-hypre/hypre_lsi_misc.c +++ b/src/FEI_mv/fei-hypre/hypre_lsi_misc.c @@ -8,7 +8,6 @@ #include #include #include -#include #include "utilities/_hypre_utilities.h" #include "IJ_mv/HYPRE_IJ_mv.h" @@ -28,7 +27,7 @@ extern void hypre_qsort1(int*, double*, int, int); /* (read by a single processor) */ /*-------------------------------------------------------------------------*/ -void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, +void HYPRE_LSI_Get_IJAMatrixFromFile(double **val, int **ia, int **ja, int *N, double **rhs, char *matfile, char *rhsfile) { int i, j, Nrows, nnz, icount, rowindex, colindex, curr_row; @@ -179,8 +178,8 @@ int HYPRE_LSI_Search2(int key, int nlist, int *list) /* this function extracts the matrix in a CSR format */ /* ------------------------------------------------------------------------ */ -int HYPRE_LSI_GetParCSRMatrix(HYPRE_IJMatrix Amat, int nrows, int nnz, - int *ia_ptr, int *ja_ptr, double *a_ptr) +int HYPRE_LSI_GetParCSRMatrix(HYPRE_IJMatrix Amat, int nrows, int nnz, + int *ia_ptr, int *ja_ptr, double *a_ptr) { int nz, i, j, ierr, rowSize, *colInd, nz_ptr, *colInd2; int firstNnz; @@ -200,7 +199,7 @@ int HYPRE_LSI_GetParCSRMatrix(HYPRE_IJMatrix Amat, int nrows, int nnz, for ( i = 0; i < nrows; i++ ) { ierr = HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); - assert(!ierr); + hypre_assert(!ierr); colInd2 = hypre_TAlloc(int, rowSize , HYPRE_MEMORY_HOST); colVal2 = hypre_TAlloc(double, rowSize , HYPRE_MEMORY_HOST); for ( j = 0; j < rowSize; j++ ) @@ -218,18 +217,18 @@ int HYPRE_LSI_GetParCSRMatrix(HYPRE_IJMatrix Amat, int nrows, int nnz, { if ( colVal2[j] != 0.0 ) { - if (nz_ptr > 0 && firstNnz > 0 && colInd2[j] == ja_ptr[nz_ptr-1]) + if (nz_ptr > 0 && firstNnz > 0 && colInd2[j] == ja_ptr[nz_ptr-1]) { a_ptr[nz_ptr-1] += colVal2[j]; printf("HYPRE_LSI_GetParCSRMatrix:: repeated col in row %d\n",i); } else - { + { ja_ptr[nz_ptr] = colInd2[j]; a_ptr[nz_ptr++] = colVal2[j]; if ( nz_ptr > nnz ) { - printf("HYPRE_LSI_GetParCSRMatrix Error (1) - %d %d.\n",i, + printf("HYPRE_LSI_GetParCSRMatrix Error (1) - %d %d.\n",i, nrows); exit(1); } @@ -237,12 +236,12 @@ int HYPRE_LSI_GetParCSRMatrix(HYPRE_IJMatrix Amat, int nrows, int nnz, } } else nz++; } - free( colInd2 ); - free( colVal2 ); + hypre_TFree(colInd2, HYPRE_MEMORY_HOST); + hypre_TFree(colVal2, HYPRE_MEMORY_HOST); ia_ptr[i+1] = nz_ptr; ierr = HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - assert(!ierr); - } + hypre_assert(!ierr); + } /* if ( nnz != nz_ptr ) { @@ -352,8 +351,8 @@ int HYPRE_LSI_SplitDSort2(double *dlist, int nlist, int *ilist, int limit) dlist[count1+1+i] = darray2[i]; ilist[count1+1+i] = iarray2[i]; } - free( darray1 ); - free( iarray1 ); + hypre_TFree(darray1, HYPRE_MEMORY_HOST); + hypre_TFree(iarray1, HYPRE_MEMORY_HOST); if ( count1+1 == limit ) return 0; else if ( count1+1 < limit ) HYPRE_LSI_SplitDSort2(&(dlist[count1+1]),count2,&(ilist[count1+1]), @@ -386,8 +385,8 @@ int HYPRE_LSI_SplitDSort(double *dlist, int nlist, int *ilist, int limit) first = 0; last = nlist - 1; - do - { + do + { cur_index = first; cur_val = dlist[cur_index]; @@ -403,7 +402,7 @@ int HYPRE_LSI_SplitDSort(double *dlist, int nlist, int *ilist, int limit) dlist[cur_index] = dlist[i]; dlist[i] = dtemp; } - } + } itemp = ilist[cur_index]; ilist[cur_index] = ilist[first]; ilist[first] = itemp; @@ -411,8 +410,8 @@ int HYPRE_LSI_SplitDSort(double *dlist, int nlist, int *ilist, int limit) dlist[cur_index] = dlist[first]; dlist[first] = dtemp; - if ( cur_index > limit ) last = cur_index - 1; - else if ( cur_index < limit ) first = cur_index + 1; + if ( cur_index > limit ) last = cur_index - 1; + else if ( cur_index < limit ) first = cur_index + 1; } while ( cur_index != limit ); return 0; @@ -453,17 +452,17 @@ int HYPRE_LSI_Cuthill(int n, int *ia, int *ja, double *aa, int *order_array, root = -1; for ( i = 0; i < n; i++ ) { - if ( nz_array[i] == 1 ) + if ( nz_array[i] == 1 ) { tag_array[i] = 1; order_array[norder++] = i; reorder_array[i] = norder-1; } - else if ( nz_array[i] < mindeg ) + else if ( nz_array[i] < mindeg ) { mindeg = nz_array[i]; root = i; - } + } } if ( root == -1 ) { @@ -481,7 +480,7 @@ int HYPRE_LSI_Cuthill(int n, int *ia, int *ja, double *aa, int *order_array, reorder_array[root] = norder - 1; for ( j = ia[root]; j < ia[root+1]; j++ ) { - if ( tag_array[ja[j]] == 0 ) + if ( tag_array[ja[j]] == 0 ) { tag_array[ja[j]] = 1; queue[nqueue++] = ja[j]; @@ -490,7 +489,7 @@ int HYPRE_LSI_Cuthill(int n, int *ia, int *ja, double *aa, int *order_array, if ( qhead == nqueue && norder < n ) for ( j = 0; j < n; j++ ) if ( tag_array[j] == 0 ) queue[nqueue++] = j; - } + } ia2 = hypre_TAlloc(int, (n+1) , HYPRE_MEMORY_HOST); ja2 = hypre_TAlloc(int, nnz , HYPRE_MEMORY_HOST); aa2 = hypre_TAlloc(double, nnz , HYPRE_MEMORY_HOST); @@ -501,22 +500,22 @@ int HYPRE_LSI_Cuthill(int n, int *ia, int *ja, double *aa, int *order_array, cnt = order_array[i]; for ( j = ia[cnt]; j < ia[cnt+1]; j++ ) { - ja2[nnz] = ja[j]; - aa2[nnz++] = aa[j]; + ja2[nnz] = ja[j]; + aa2[nnz++] = aa[j]; } ia2[i+1] = nnz; } - for ( i = 0; i < nnz; i++ ) ja[i] = reorder_array[ja2[i]]; - for ( i = 0; i < nnz; i++ ) aa[i] = aa2[i]; + for ( i = 0; i < nnz; i++ ) ja[i] = reorder_array[ja2[i]]; + for ( i = 0; i < nnz; i++ ) aa[i] = aa2[i]; for ( i = 0; i <= n; i++ ) ia[i] = ia2[i]; - free( ia2 ); - free( ja2 ); - free( aa2 ); - free( nz_array ); - free( tag_array ); - free( queue ); + hypre_TFree(ia2, HYPRE_MEMORY_HOST); + hypre_TFree(ja2, HYPRE_MEMORY_HOST); + hypre_TFree(aa2, HYPRE_MEMORY_HOST); + hypre_TFree(nz_array, HYPRE_MEMORY_HOST); + hypre_TFree(tag_array, HYPRE_MEMORY_HOST); + hypre_TFree(queue, HYPRE_MEMORY_HOST); return 0; -} +} /* ******************************************************************** */ /* matrix of a dense matrix */ @@ -528,12 +527,12 @@ int HYPRE_LSI_MatrixInverse( double **Amat, int ndim, double ***Cmat ) double denom, **Bmat, dmax; (*Cmat) = NULL; - if ( ndim == 1 ) + if ( ndim == 1 ) { if ( habs(Amat[0][0]) <= 1.0e-16 ) return -1; Bmat = hypre_TAlloc(double*, ndim , HYPRE_MEMORY_HOST); - for ( i = 0; i < ndim; i++ ) - Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); + for ( i = 0; i < ndim; i++ ) + Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); Bmat[0][0] = 1.0 / Amat[0][0]; (*Cmat) = Bmat; return 0; @@ -543,8 +542,8 @@ int HYPRE_LSI_MatrixInverse( double **Amat, int ndim, double ***Cmat ) denom = Amat[0][0] * Amat[1][1] - Amat[0][1] * Amat[1][0]; if ( habs( denom ) <= 1.0e-16 ) return -1; Bmat = hypre_TAlloc(double*, ndim , HYPRE_MEMORY_HOST); - for ( i = 0; i < ndim; i++ ) - Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); + for ( i = 0; i < ndim; i++ ) + Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); Bmat[0][0] = Amat[1][1] / denom; Bmat[1][1] = Amat[0][0] / denom; Bmat[0][1] = - ( Amat[0][1] / denom ); @@ -555,55 +554,55 @@ int HYPRE_LSI_MatrixInverse( double **Amat, int ndim, double ***Cmat ) else { Bmat = hypre_TAlloc(double*, ndim , HYPRE_MEMORY_HOST); - for ( i = 0; i < ndim; i++ ) - { - Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); + for ( i = 0; i < ndim; i++ ) + { + Bmat[i] = hypre_TAlloc(double, ndim , HYPRE_MEMORY_HOST); for ( j = 0; j < ndim; j++ ) Bmat[i][j] = 0.0; Bmat[i][i] = 1.0; - } - for ( i = 1; i < ndim; i++ ) + } + for ( i = 1; i < ndim; i++ ) { - for ( j = 0; j < i; j++ ) + for ( j = 0; j < i; j++ ) { if ( habs(Amat[j][j]) < 1.0e-16 ) return -1; denom = Amat[i][j] / Amat[j][j]; - for ( k = 0; k < ndim; k++ ) + for ( k = 0; k < ndim; k++ ) { Amat[i][k] -= denom * Amat[j][k]; Bmat[i][k] -= denom * Bmat[j][k]; } } } - for ( i = ndim-2; i >= 0; i-- ) + for ( i = ndim-2; i >= 0; i-- ) { - for ( j = ndim-1; j >= i+1; j-- ) + for ( j = ndim-1; j >= i+1; j-- ) { if ( habs(Amat[j][j]) < 1.0e-16 ) return -1; denom = Amat[i][j] / Amat[j][j]; - for ( k = 0; k < ndim; k++ ) + for ( k = 0; k < ndim; k++ ) { Amat[i][k] -= denom * Amat[j][k]; Bmat[i][k] -= denom * Bmat[j][k]; } } } - for ( i = 0; i < ndim; i++ ) + for ( i = 0; i < ndim; i++ ) { denom = Amat[i][i]; if ( habs(denom) < 1.0e-16 ) return -1; for ( j = 0; j < ndim; j++ ) Bmat[i][j] /= denom; } - for ( i = 0; i < ndim; i++ ) - for ( j = 0; j < ndim; j++ ) + for ( i = 0; i < ndim; i++ ) + for ( j = 0; j < ndim; j++ ) if ( habs(Bmat[i][j]) < 1.0e-17 ) Bmat[i][j] = 0.0; dmax = 0.0; - for ( i = 0; i < ndim; i++ ) + for ( i = 0; i < ndim; i++ ) { - for ( j = 0; j < ndim; j++ ) + for ( j = 0; j < ndim; j++ ) if ( habs(Bmat[i][j]) > dmax ) dmax = habs(Bmat[i][j]); /* - for ( j = 0; j < ndim; j++ ) + for ( j = 0; j < ndim; j++ ) if ( habs(Bmat[i][j]/dmax) < 1.0e-15 ) Bmat[i][j] = 0.0; */ } @@ -622,22 +621,22 @@ int HYPRE_LSI_PartitionMatrix( int nRows, int startRow, int *rowLengths, int *nLabels, int **labels) { int irow, rowCnt, labelNum, *localLabels, actualNRows; - int jcol, root, indHead, indTail, *indSet, index; + int jcol, root, indHead, indTail, *indSet, index; /*----------------------------------------------------------------*/ /* search for constraint rows */ /*----------------------------------------------------------------*/ - for ( irow = nRows-1; irow >= 0; irow-- ) + for ( irow = nRows-1; irow >= 0; irow-- ) { index = irow + startRow; - for ( jcol = 0; jcol < rowLengths[irow]; jcol++ ) + for ( jcol = 0; jcol < rowLengths[irow]; jcol++ ) if (colIndices[irow][jcol] == index && colValues[irow][jcol] != 0.0) break; if ( jcol != rowLengths[irow] ) break; } (*nLabels) = actualNRows = irow + 1; - + /*----------------------------------------------------------------*/ /* search for constraint rows */ /*----------------------------------------------------------------*/ @@ -645,7 +644,7 @@ int HYPRE_LSI_PartitionMatrix( int nRows, int startRow, int *rowLengths, localLabels = hypre_TAlloc(int, actualNRows , HYPRE_MEMORY_HOST); for ( irow = 0; irow < actualNRows; irow++ ) localLabels[irow] = -1; indSet = hypre_TAlloc(int, actualNRows , HYPRE_MEMORY_HOST); - + labelNum = 0; rowCnt = actualNRows; @@ -660,15 +659,15 @@ int HYPRE_LSI_PartitionMatrix( int nRows, int startRow, int *rowLengths, exit(1); } indHead = indTail = 0; - localLabels[root] = labelNum; + localLabels[root] = labelNum; rowCnt--; for ( jcol = 0; jcol < rowLengths[root]; jcol++ ) { index = colIndices[root][jcol] - startRow; - if ( index >= 0 && index < actualNRows && localLabels[index] < 0 ) + if ( index >= 0 && index < actualNRows && localLabels[index] < 0 ) { indSet[indTail++] = index; - localLabels[index] = labelNum; + localLabels[index] = labelNum; } } while ( (indTail - indHead) > 0 ) @@ -678,30 +677,30 @@ int HYPRE_LSI_PartitionMatrix( int nRows, int startRow, int *rowLengths, for ( jcol = 0; jcol < rowLengths[root]; jcol++ ) { index = colIndices[root][jcol] - startRow; - if ( index >= 0 && index < actualNRows && localLabels[index] < 0 ) + if ( index >= 0 && index < actualNRows && localLabels[index] < 0 ) { indSet[indTail++] = index; - localLabels[index] = labelNum; + localLabels[index] = labelNum; } } } labelNum++; } - if ( labelNum > 4 ) + if ( labelNum > 4 ) { printf("HYPRE_LSI_PartitionMatrix : number of labels %d too large.\n", labelNum+1); - free( localLabels ); - (*nLabels) = 0; - (*labels) = NULL; + hypre_TFree(localLabels, HYPRE_MEMORY_HOST); + (*nLabels) = 0; + (*labels) = NULL; } else { printf("HYPRE_LSI_PartitionMatrix : number of labels = %d.\n", labelNum); - (*labels) = localLabels; + (*labels) = localLabels; } - free( indSet ); + hypre_TFree(indSet, HYPRE_MEMORY_HOST); return 0; -} +} diff --git a/src/FEI_mv/fei-hypre/hypre_schur_reduce.cxx b/src/FEI_mv/fei-hypre/hypre_schur_reduce.cxx index 55a1f68dc..967f3d6f6 100644 --- a/src/FEI_mv/fei-hypre/hypre_schur_reduce.cxx +++ b/src/FEI_mv/fei-hypre/hypre_schur_reduce.cxx @@ -8,7 +8,6 @@ #include #include #include -#include #include "HYPRE.h" #include "utilities/_hypre_utilities.h" @@ -20,7 +19,7 @@ #define habs(x) ((x > 0) ? x : -(x)) //--------------------------------------------------------------------------- -// _hypre_parcsr_mv.h is put here instead of in HYPRE_LinSysCore.h +// _hypre_parcsr_mv.h is put here instead of in HYPRE_LinSysCore.h // because it gives warning when compiling cfei.cc //--------------------------------------------------------------------------- @@ -65,7 +64,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() HYPRE_ParVector f1_csr, f2hat_csr; //****************************************************************** - // initial clean up and set up + // initial clean up and set up //------------------------------------------------------------------ if ( mypid_ == 0 && (HYOutputLevel_ & HYFEI_SCHURREDUCE1) ) @@ -102,7 +101,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() // get information about processor offsets and globalNRows // (ProcNRows, globalNRows) //------------------------------------------------------------------ - + ProcNRows = new int[numProcs_]; tempList = new int[numProcs_]; for ( i = 0; i < numProcs_; i++ ) tempList[i] = 0; @@ -116,22 +115,22 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() //------------------------------------------------------------------ nSchur = 0; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); searchIndex = globalNRows + 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colIndex < searchIndex && colVal[j] != 0.0 ) + if ( colIndex < searchIndex && colVal[j] != 0.0 ) searchIndex = colIndex; } if ( searchIndex < i ) nSchur++; //searchIndex = -1; - //for (j = 0; j < rowSize; j++) + //for (j = 0; j < rowSize; j++) //{ // colIndex = colInd[j]; - // if ( colIndex < i && colVal[j] != 0.0 ) + // if ( colIndex < i && colVal[j] != 0.0 ) // if ( colIndex > searchIndex ) searchIndex = colIndex; //} //if ( searchIndex >= StartRow ) nSchur++; @@ -145,7 +144,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() //------------------------------------------------------------------ if ( nSchur > 0 ) schurList = new int[nSchur]; - else schurList = NULL; + else schurList = NULL; //------------------------------------------------------------------ // compose the list of rows having zero diagonal @@ -153,22 +152,22 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() //------------------------------------------------------------------ nSchur = 0; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); searchIndex = globalNRows + 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colIndex < searchIndex && colVal[j] != 0.0 ) + if ( colIndex < searchIndex && colVal[j] != 0.0 ) searchIndex = colIndex; } if ( searchIndex < i ) schurList[nSchur++] = i; //searchIndex = -1; - //for (j = 0; j < rowSize; j++) + //for (j = 0; j < rowSize; j++) //{ // colIndex = colInd[j]; - // if ( colIndex < i && colVal[j] != 0.0 ) + // if ( colIndex < i && colVal[j] != 0.0 ) // if ( colIndex > searchIndex ) searchIndex = colIndex; //} //if ( searchIndex >= StartRow ) schurList[nSchur++] = i; @@ -201,7 +200,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() displArray = new int[numProcs_]; MPI_Allgather(&nSchur, 1, MPI_INT, recvCntArray, 1, MPI_INT, comm_); displArray[0] = 0; - for ( i = 1; i < numProcs_; i++ ) + for ( i = 1; i < numProcs_; i++ ) displArray[i] = displArray[i-1] + recvCntArray[i-1]; MPI_Allgatherv(schurList, nSchur, MPI_INT, globalSchurList, recvCntArray, displArray, MPI_INT, comm_); @@ -214,12 +213,12 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() printf("%4d : buildSchurSystem - schurList %d = %d\n",mypid_, i,schurList[i]); } - + //------------------------------------------------------------------ // get information about processor offsets for nSchur // (ProcNSchur) //------------------------------------------------------------------ - + ProcNSchur = new int[numProcs_]; tempList = new int[numProcs_]; for ( i = 0; i < numProcs_; i++ ) tempList[i] = 0; @@ -228,7 +227,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() delete [] tempList; globalNSchur = 0; ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNSchur += ProcNSchur[i]; ncnt2 = ProcNSchur[i]; @@ -253,7 +252,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d : buildSchurSystem : CStartRow = %d\n",mypid_,CStartRow); - printf("%4d : buildSchurSystem : CGlobalDim = %d %d\n", mypid_, + printf("%4d : buildSchurSystem : CGlobalDim = %d %d\n", mypid_, CGlobalNRows, CGlobalNCols); printf("%4d : buildSchurSystem : CLocalDim = %d %d\n",mypid_, CNRows, CNCols); @@ -267,7 +266,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ierr = HYPRE_IJMatrixCreate(comm_, CStartRow, CStartRow+CNRows-1, CStartCol, CStartCol+CNCols-1, &Cmat); ierr += HYPRE_IJMatrixSetObjectType(Cmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros per row in Cmat and call set up @@ -276,15 +275,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() maxRowSize = 0; CMatSize = new int[CNRows]; - for ( i = 0; i < nSchur; i++ ) + for ( i = 0; i < nSchur; i++ ) { rowIndex = schurList[i]; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - searchIndex = hypre_BinarySearch(globalSchurList,colIndex, + searchIndex = hypre_BinarySearch(globalSchurList,colIndex, globalNSchur); if (searchIndex < 0) newRowSize++; else if ( colVal[j] != 0.0 ) @@ -303,7 +302,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } ierr = HYPRE_IJMatrixSetRowSizes(Cmat, CMatSize); ierr += HYPRE_IJMatrixInitialize(Cmat); - assert(!ierr); + hypre_assert(!ierr); delete [] CMatSize; //------------------------------------------------------------------ @@ -318,14 +317,14 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() rowIndex = schurList[i]; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; searchIndex = HYPRE_LSI_Search(globalSchurList,colIndex, - globalNSchur); - if ( searchIndex < 0 ) + globalNSchur); + if ( searchIndex < 0 ) { searchIndex = - searchIndex - 1; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) @@ -342,9 +341,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() printf("%4d : buildSchurSystem WARNING - Cmat ", mypid_); printf("out of range %d - %d (%d)\n", rowCount,colIndex, CGlobalNCols); - } - } - if ( newRowSize > maxRowSize+1 ) + } + } + if ( newRowSize > maxRowSize+1 ) { if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { @@ -353,7 +352,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } } } - } + } } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); HYPRE_IJMatrixSetValues(Cmat, 1, &newRowSize, (const int *) &rowCount, @@ -364,7 +363,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() delete [] newColVal; //------------------------------------------------------------------ - // finally assemble the matrix + // finally assemble the matrix //------------------------------------------------------------------ HYPRE_IJMatrixAssemble(Cmat); @@ -375,15 +374,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() { ncnt = 0; MPI_Barrier(MPI_COMM_WORLD); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d buildSchurSystem : matrix Cmat assembled %d.\n", mypid_,CStartRow); fflush(stdout); - for ( i = CStartRow; i < CStartRow+nSchur; i++ ) + for ( i = CStartRow; i < CStartRow+nSchur; i++ ) { HYPRE_ParCSRMatrixGetRow(C_csr,i,&rowSize,&colInd,&colVal); printf("Cmat ROW = %6d (%d)\n", i, rowSize); @@ -414,7 +413,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d : buildSchurSystem - MStartRow = %d\n",mypid_,MStartRow); - printf("%4d : buildSchurSystem - MGlobalDim = %d %d\n", mypid_, + printf("%4d : buildSchurSystem - MGlobalDim = %d %d\n", mypid_, MGlobalNRows, MGlobalNCols); printf("%4d : buildSchurSystem - MLocalDim = %d %d\n",mypid_, MNRows, MNCols); @@ -432,7 +431,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() for ( i = 0; i < MNRows; i++ ) MMatSize[i] = 1; ierr = HYPRE_IJMatrixSetRowSizes(Mmat, MMatSize); ierr += HYPRE_IJMatrixInitialize(Mmat); - assert(!ierr); + hypre_assert(!ierr); delete [] MMatSize; //------------------------------------------------------------------ @@ -444,19 +443,19 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() diagonal = new double[MNRows]; rowIndex = MStartRow; ierr = 0; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { searchIndex = hypre_BinarySearch(schurList, i, nSchur); if ( searchIndex < 0 ) { ncnt = 0; HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colIndex == i && colVal[j] != 0.0 ) - { - ddata = 1.0 / colVal[j]; + if ( colIndex == i && colVal[j] != 0.0 ) + { + ddata = 1.0 / colVal[j]; maxdiag = ( colVal[j] > maxdiag ) ? colVal[j] : maxdiag; mindiag = ( colVal[j] < mindiag ) ? colVal[j] : mindiag; break; @@ -469,7 +468,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() printf("%4d : buildSchurSystem WARNING - diag[%d] not found\n", mypid_, i); ierr = 1; - } + } else if ( ncnt > 1 ) ierr = 1; diagonal[rowIndex-MStartRow] = ddata; HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); @@ -511,9 +510,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } schurReduction_ = 0; delete [] ProcNRows; - delete [] ProcNSchur; - if ( nSchur > 0 ) delete [] schurList; - if ( globalNSchur > 0 ) delete [] globalSchurList; + delete [] ProcNSchur; + if ( nSchur > 0 ) delete [] schurList; + if ( globalNSchur > 0 ) delete [] globalSchurList; HYPRE_IJMatrixDestroy(Cmat); return; } @@ -535,7 +534,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d : buildSchurSystem - CTStartRow = %d\n",mypid_,CTStartRow); - printf("%4d : buildSchurSystem - CTGlobalDim = %d %d\n", mypid_, + printf("%4d : buildSchurSystem - CTGlobalDim = %d %d\n", mypid_, CTGlobalNRows, CTGlobalNCols); printf("%4d : buildSchurSystem - CTLocalDim = %d %d\n",mypid_, CTNRows, CTNCols); @@ -548,7 +547,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ierr = HYPRE_IJMatrixCreate(comm_, CTStartRow, CTStartRow+CTNRows-1, CStartRow, CStartRow+CTNCols-1, &CTmat); ierr += HYPRE_IJMatrixSetObjectType(CTmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros per row in CTmat and call set up @@ -558,17 +557,17 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() CTMatSize = new int[CTNRows]; rowCount = 0; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { searchIndex = hypre_BinarySearch(schurList, i, nSchur); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - searchIndex = hypre_BinarySearch(globalSchurList,colIndex, + searchIndex = hypre_BinarySearch(globalSchurList,colIndex, globalNSchur); if (searchIndex >= 0) newRowSize++; } @@ -582,7 +581,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } ierr = HYPRE_IJMatrixSetRowSizes(CTmat, CTMatSize); ierr += HYPRE_IJMatrixInitialize(CTmat); - assert(!ierr); + hypre_assert(!ierr); delete [] CTMatSize; //------------------------------------------------------------------ @@ -593,19 +592,19 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() newColVal = new double[maxRowSize+1]; rowCount = CTStartRow; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { searchIndex = hypre_BinarySearch(schurList, i, nSchur); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - searchIndex = hypre_BinarySearch(globalSchurList,colIndex, + searchIndex = hypre_BinarySearch(globalSchurList,colIndex, globalNSchur); - if (searchIndex >= 0) + if (searchIndex >= 0) { newColInd[newRowSize] = searchIndex; if ( searchIndex >= globalNSchur ) @@ -613,9 +612,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d : buildSchurSystem WARNING - CTmat ",mypid_); - printf("out of range %d - %d (%d)\n", rowCount, + printf("out of range %d - %d (%d)\n", rowCount, searchIndex, globalNSchur); - } + } } newColVal[newRowSize++] = colVal[j]; } @@ -627,7 +626,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() newRowSize = 1; } HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - HYPRE_IJMatrixSetValues(CTmat, 1, &newRowSize, + HYPRE_IJMatrixSetValues(CTmat, 1, &newRowSize, (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); rowCount++; @@ -637,7 +636,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() delete [] newColVal; //------------------------------------------------------------------ - // finally assemble the matrix + // finally assemble the matrix //------------------------------------------------------------------ HYPRE_IJMatrixAssemble(CTmat); @@ -648,15 +647,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() { ncnt = 0; MPI_Barrier(MPI_COMM_WORLD); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d buildSchurSystem : matrix CTmat assembled %d.\n", mypid_,CTStartRow); fflush(stdout); - for ( i = CTStartRow; i < CTStartRow+CTNRows; i++ ) + for ( i = CTStartRow; i < CTStartRow+CTNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(CT_csr,i,&rowSize,&colInd,&colVal); printf("CTmat ROW = %6d (%d)\n", i, rowSize); @@ -694,7 +693,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() { if ( mypid_ == ncnt ) { - for ( i = CStartRow; i < CStartRow+CNRows; i++ ) + for ( i = CStartRow; i < CStartRow+CNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(S_csr,i,&rowSize,&colInd, &colVal); printf("Schur ROW = %6d (%d)\n", i, rowSize); @@ -720,15 +719,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() HYPRE_IJVectorSetObjectType(f1, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f1); ierr += HYPRE_IJVectorAssemble(f1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(comm_, CStartRow, CStartRow+CNRows-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2hat); ierr += HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); rowCount = CTStartRow; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { searchIndex = hypre_BinarySearch(schurList, i, nSchur); if ( searchIndex < 0 ) @@ -737,29 +736,29 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ddata *= diagonal[rowCount-CTStartRow]; ierr = HYPRE_IJVectorSetValues(f1, 1, (const int *) &rowCount, (const double *) &ddata); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; } - } - + } + HYPRE_IJVectorGetObject(f1, (void **) &f1_csr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_ParCSRMatrixMatvec( 1.0, C_csr, f1_csr, 0.0, f2hat_csr ); delete [] diagonal; - HYPRE_IJVectorDestroy(f1); + HYPRE_IJVectorDestroy(f1); //------------------------------------------------------------------ - // form f2 = f2 - f2hat + // form f2 = f2 - f2hat //------------------------------------------------------------------ HYPRE_IJVectorCreate(comm_, CStartRow, CStartRow+CNRows-1, &f2); HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); rowCount = CStartRow; - for ( i = 0; i < nSchur; i++ ) + for ( i = 0; i < nSchur; i++ ) { rowIndex = schurList[i]; HYPRE_IJVectorGetValues(HYb_, 1, &rowIndex, &ddata); @@ -770,9 +769,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() HYPRE_IJVectorAddToValues(f2, 1, (const int *) &rowCount, (const double *) &ddata); HYPRE_IJVectorGetValues(f2, 1, &rowCount, &ddata); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; - } + } HYPRE_IJVectorDestroy(f2hat); // ***************************************************************** @@ -782,7 +781,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ierr = HYPRE_IJMatrixCreate(comm_, CStartRow, CStartRow+CNRows-1, CStartRow, CStartRow+CNRows-1, &reducedA_); ierr += HYPRE_IJMatrixSetObjectType(reducedA_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for the Schur complement @@ -790,7 +789,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() CMatSize = new int[CNRows]; maxRowSize = 0; - for ( i = CStartRow; i < CStartRow+CNRows; i++ ) + for ( i = CStartRow; i < CStartRow+CNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(S_csr,i,&rowSize,&colInd,NULL); rowIndex = schurList[i-CStartRow]; @@ -799,10 +798,10 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() newColInd = new int[newRowSize]; for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; ncnt = 0; - for (j = 0; j < rowSize2; j++) + for (j = 0; j < rowSize2; j++) { colIndex = colInd2[j]; - searchIndex = hypre_BinarySearch(globalSchurList,colIndex, + searchIndex = hypre_BinarySearch(globalSchurList,colIndex, globalNSchur); if ( searchIndex >= 0 ) { @@ -830,14 +829,14 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } ierr = HYPRE_IJMatrixSetRowSizes(reducedA_, CMatSize); ierr += HYPRE_IJMatrixInitialize(reducedA_); - assert(!ierr); + hypre_assert(!ierr); delete [] CMatSize; //------------------------------------------------------------------ // load and assemble the Schur complement matrix //------------------------------------------------------------------ - for ( i = CStartRow; i < CStartRow+CNRows; i++ ) + for ( i = CStartRow; i < CStartRow+CNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(S_csr,i,&rowSize,&colInd,&colVal); rowIndex = schurList[i-CStartRow]; @@ -845,16 +844,16 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() newRowSize = rowSize + rowSize2; newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { newColInd[j] = colInd[j]; newColVal[j] = colVal[j]; } ncnt = 0; - for (j = 0; j < rowSize2; j++) + for (j = 0; j < rowSize2; j++) { colIndex = colInd2[j]; - searchIndex = hypre_BinarySearch(globalSchurList,colIndex, + searchIndex = hypre_BinarySearch(globalSchurList,colIndex, globalNSchur); if ( searchIndex >= 0 ) { @@ -872,7 +871,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() { newColVal[ncnt] += newColVal[j]; } - else + else { ncnt++; newColInd[ncnt] = newColInd[j]; @@ -883,10 +882,10 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ncnt = 0; ddata = 0.0; for ( j = 0; j < newRowSize; j++ ) - if ( habs(newColVal[j]) > ddata ) ddata = habs(newColVal[j]); + if ( habs(newColVal[j]) > ddata ) ddata = habs(newColVal[j]); for ( j = 0; j < newRowSize; j++ ) { - if ( habs(newColVal[j]) > ddata*1.0e-14 ) + if ( habs(newColVal[j]) > ddata*1.0e-14 ) { newColInd[ncnt] = newColInd[j]; newColVal[ncnt++] = newColVal[j]; @@ -913,13 +912,13 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() ierr = HYPRE_IJVectorSetObjectType(reducedX_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedX_); ierr = HYPRE_IJVectorAssemble(reducedX_); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJVectorCreate(comm_,CStartRow,CStartRow+CNRows-1,&reducedR_); ierr = HYPRE_IJVectorSetObjectType(reducedR_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedR_); ierr = HYPRE_IJVectorAssemble(reducedR_); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // save A21 and invA22 for solution recovery @@ -931,9 +930,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() currR_ = reducedR_; currX_ = reducedX_; - HYA21_ = CTmat; - HYA12_ = Cmat; - HYinvA22_ = Mmat; + HYA21_ = CTmat; + HYA12_ = Cmat; + HYinvA22_ = Mmat; A21NRows_ = CTNRows; A21NCols_ = CTNCols; buildSchurInitialGuess(); @@ -959,7 +958,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem() } //***************************************************************************** -// build the solution vector for Schur-reduced systems +// build the solution vector for Schur-reduced systems //----------------------------------------------------------------------------- double HYPRE_LinSysCore::buildSchurReducedSoln() @@ -969,7 +968,7 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() double ddata, rnorm; HYPRE_ParCSRMatrix A_csr, A21_csr, A22_csr; HYPRE_ParVector x_csr, x2_csr, r_csr, b_csr; - HYPRE_IJVector R1, x2; + HYPRE_IJVector R1, x2; if ( HYA21_ == NULL || HYinvA22_ == NULL ) { @@ -1001,7 +1000,7 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() ierr = HYPRE_IJVectorSetObjectType(R1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(R1); ierr = HYPRE_IJVectorAssemble(R1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYA21_, (void **) &A21_csr); HYPRE_IJVectorGetObject(currX_, (void **) &x_csr); HYPRE_IJVectorGetObject(R1, (void **) &r_csr); @@ -1030,7 +1029,7 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() for ( i = localStartRow_-1; i < localEndRow_-A21NCols_; i++ ) { HYPRE_IJVectorGetValues(HYb_, 1, &i, &ddata); - HYPRE_IJVectorAddToValues(R1, 1, (const int *) &rowNum, + HYPRE_IJVectorAddToValues(R1, 1, (const int *) &rowNum, (const double *) &ddata); HYPRE_IJVectorGetValues(R1, 1, &rowNum, &ddata); rowNum++; @@ -1045,7 +1044,7 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() ierr = HYPRE_IJVectorSetObjectType(x2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(x2); ierr = HYPRE_IJVectorAssemble(x2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYinvA22_, (void **) &A22_csr); HYPRE_IJVectorGetObject(R1, (void **) &r_csr); HYPRE_IJVectorGetObject(x2, (void **) &x2_csr); @@ -1073,8 +1072,8 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &rowNum, (const double *) &ddata); rowNum++; - } - } + } + } else { for ( i = startRow2; i < startRow2+localNRows; i++ ) @@ -1091,11 +1090,11 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &rowNum, (const double *) &ddata); rowNum++; - } - } + } + } //------------------------------------------------------------- - // residual norm check + // residual norm check //------------------------------------------------------------- HYPRE_IJMatrixGetObject(HYA_, (void **) &A_csr); @@ -1109,15 +1108,15 @@ double HYPRE_LinSysCore::buildSchurReducedSoln() if ( mypid_ == 0 && ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) ) printf(" buildReducedSystemSoln::final residual norm = %e\n", rnorm); - } + } currX_ = HYx_; //**************************************************************** // clean up //---------------------------------------------------------------- - HYPRE_IJVectorDestroy(R1); - HYPRE_IJVectorDestroy(x2); + HYPRE_IJVectorDestroy(R1); + HYPRE_IJVectorDestroy(x2); return rnorm; } @@ -1133,7 +1132,7 @@ void HYPRE_LinSysCore::buildSchurInitialGuess() HYPRE_ParVector hypre_x; //------------------------------------------------------------------ - // initial set up + // initial set up //------------------------------------------------------------------ if (reducedX_ == HYx_ || reducedX_ == NULL || reducedA_ == NULL) return; @@ -1152,15 +1151,15 @@ void HYPRE_LinSysCore::buildSchurInitialGuess() else { getIndices = new int[nSchur]; - for ( i = 0; i < nSchur; i++ ) getIndices[i] = EndRow+1-nSchur+i; + for ( i = 0; i < nSchur; i++ ) getIndices[i] = EndRow+1-nSchur+i; } dArray = new double[nSchur]; putIndices = new int[nSchur]; for ( i = 0; i < nSchur; i++ ) putIndices[i] = CStartRow + i; HYPRE_IJVectorGetValues(HYx_, nSchur, getIndices, dArray); - ierr = HYPRE_IJVectorSetValues(reducedX_, nSchur, + ierr = HYPRE_IJVectorSetValues(reducedX_, nSchur, (const int *) putIndices, (const double *) dArray); - assert( !ierr ); + hypre_assert( !ierr ); delete [] dArray; delete [] putIndices; if ( selectedList_ == NULL ) delete [] getIndices; @@ -1185,7 +1184,7 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() HYPRE_ParCSRMatrix M_csr, C_csr; //****************************************************************** - // initial set up + // initial set up //------------------------------------------------------------------ if ( mypid_ == 0 && (HYOutputLevel_ & HYFEI_SCHURREDUCE1) ) @@ -1202,14 +1201,14 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() // get information about processor offsets and globalNRows // (ProcNRows, globalNRows) //------------------------------------------------------------------ - + ProcNRows = new int[numProcs_]; tempList = new int[numProcs_]; for ( i = 0; i < numProcs_; i++ ) tempList[i] = 0; tempList[mypid_] = EndRow - StartRow + 1; MPI_Allreduce(tempList, ProcNRows, numProcs_, MPI_INT, MPI_SUM, comm_); ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { ncnt2 = ProcNRows[i]; ProcNRows[i] = ncnt; @@ -1221,7 +1220,7 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() MPI_Allreduce(tempList, ProcNSchur, numProcs_, MPI_INT, MPI_SUM, comm_); globalNSchur = 0; ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNSchur += ProcNSchur[i]; ncnt2 = ProcNSchur[i]; @@ -1238,8 +1237,8 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() CTNCols = A21NCols_; MPI_Allreduce(&CTNRows, &CTGlobalNRows, 1, MPI_INT, MPI_SUM, comm_); MPI_Allreduce(&CTNCols, &CTGlobalNCols, 1, MPI_INT, MPI_SUM, comm_); - Cmat = HYA12_; - Mmat = HYinvA22_; + Cmat = HYA12_; + Mmat = HYinvA22_; CNRows = CTNCols; nSchur = A21NCols_; schurList = selectedList_; @@ -1254,17 +1253,17 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() HYPRE_IJVectorSetObjectType(f1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(f1); ierr = HYPRE_IJVectorAssemble(f1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(comm_, CStartRow, CStartRow+CNRows-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2hat); ierr = HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); rowCount = CTStartRow; if ( schurList != NULL ) { - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { searchIndex = hypre_BinarySearch(schurList, i, nSchur); if ( searchIndex < 0 ) @@ -1277,14 +1276,14 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() (const double *) &ddata); HYPRE_ParCSRMatrixRestoreRow(M_csr,rowCount,&rowSize,&colInd, &colVal); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; } - } - } + } + } else { - for ( i = StartRow; i <= EndRow-nSchur; i++ ) + for ( i = StartRow; i <= EndRow-nSchur; i++ ) { HYPRE_IJVectorGetValues(HYb_, 1, &i, &ddata); HYPRE_ParCSRMatrixGetRow(M_csr,rowCount,&rowSize,&colInd,&colVal); @@ -1294,38 +1293,38 @@ void HYPRE_LinSysCore::buildSchurReducedRHS() (const double *) &ddata); HYPRE_ParCSRMatrixRestoreRow(M_csr,rowCount,&rowSize,&colInd, &colVal); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; - } - } + } + } HYPRE_IJVectorGetObject(f1, (void **) &f1_csr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_ParCSRMatrixMatvec( 1.0, C_csr, f1_csr, 0.0, f2hat_csr ); - HYPRE_IJVectorDestroy(f1); + HYPRE_IJVectorDestroy(f1); //------------------------------------------------------------------ - // form f2 = f2 - f2hat + // form f2 = f2 - f2hat //------------------------------------------------------------------ HYPRE_IJVectorCreate(comm_, CStartRow, CStartRow+CNRows-1, &f2); HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); rowCount = CStartRow; - for ( i = 0; i < nSchur; i++ ) + for ( i = 0; i < nSchur; i++ ) { if ( schurList != NULL ) rowIndex = schurList[i]; - else rowIndex = EndRow+1-nSchur+i; + else rowIndex = EndRow+1-nSchur+i; HYPRE_IJVectorGetValues(HYb_, 1, &rowIndex, &ddata); HYPRE_IJVectorGetValues(f2hat, 1, &rowCount, &ddata2); ddata = ddata2 - ddata; ierr = HYPRE_IJVectorSetValues(f2, 1, (const int *) &rowCount, (const double *) &ddata); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; - } + } HYPRE_IJVectorDestroy(f2hat); //****************************************************************** @@ -1415,26 +1414,26 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() MPI_Allreduce(tempList, ProcNRows, numProcs_, MPI_INT, MPI_SUM, comm_); delete [] tempList; globalNRows = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { ncnt = globalNRows; globalNRows += ProcNRows[i]; ProcNRows[i] = ncnt; - } + } //****************************************************************** // perform an automatic search for nSchur //------------------------------------------------------------------ nSchur = 0; - for ( i = StartRow; i <= EndRow; i++ ) + for ( i = StartRow; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); searchIndex = globalNRows + 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colIndex < searchIndex && colVal[j] != 0.0 ) + if ( colIndex < searchIndex && colVal[j] != 0.0 ) searchIndex = colIndex; } HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); @@ -1443,7 +1442,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } nSchur = EndRow - StartRow + 1 - nSchur; MPI_Allreduce(&nSchur, &globalNSchur, 1, MPI_INT, MPI_SUM,comm_); - + if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d buildSchurSystem : nSchur = %d\n",mypid_,nSchur); @@ -1468,7 +1467,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() MPI_Allreduce(tempList, ProcNSchur, numProcs_, MPI_INT, MPI_SUM, comm_); delete [] tempList; globalNSchur = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { ncnt = globalNSchur; globalNSchur += ProcNSchur[i]; @@ -1492,7 +1491,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d buildSchurSystem : CStartRow = %d\n",mypid_,CStartRow); - printf("%4d buildSchurSystem : CGlobalDim = %d %d\n", mypid_, + printf("%4d buildSchurSystem : CGlobalDim = %d %d\n", mypid_, CGlobalNRows, CGlobalNCols); printf("%4d buildSchurSystem : CLocalDim = %d %d\n",mypid_, CNRows, CNCols); @@ -1506,7 +1505,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, CStartRow, CStartRow+CNRows-1, CStartCol, CStartCol+CNCols-1, &Cmat); ierr += HYPRE_IJMatrixSetObjectType(Cmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros per row in Cmat and call set up @@ -1515,12 +1514,12 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() maxRowSize = 0; CMatSize = new int[CNRows]; - for ( i = 0; i < nSchur; i++ ) + for ( i = 0; i < nSchur; i++ ) { rowIndex = EndRow - nSchur + i + 1; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; searchIndex = HYPRE_Schur_Search(colIndex, numProcs_, ProcNRows, @@ -1533,7 +1532,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } ierr = HYPRE_IJMatrixSetRowSizes(Cmat, CMatSize); ierr += HYPRE_IJMatrixInitialize(Cmat); - assert(!ierr); + hypre_assert(!ierr); delete [] CMatSize; //------------------------------------------------------------------ @@ -1548,14 +1547,14 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() rowIndex = EndRow - nSchur + i + 1; HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; searchIndex = HYPRE_Schur_Search(colIndex, numProcs_, ProcNRows, ProcNSchur, globalNRows, globalNSchur); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { searchIndex = - searchIndex - 1; colIndex = searchIndex; @@ -1566,11 +1565,11 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d buildSchurSystem WARNING : Cmat ", mypid_); - printf("out of range %d - %d (%d)\n", rowCount, colIndex, + printf("out of range %d - %d (%d)\n", rowCount, colIndex, CGlobalNCols); - } - } - if ( newRowSize > maxRowSize+1 ) + } + } + if ( newRowSize > maxRowSize+1 ) { if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { @@ -1579,7 +1578,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } } } - } + } } HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); HYPRE_IJMatrixSetValues(Cmat, 1, &newRowSize, (const int *) &rowCount, @@ -1590,7 +1589,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() delete [] newColVal; //------------------------------------------------------------------ - // finally assemble the matrix + // finally assemble the matrix //------------------------------------------------------------------ HYPRE_IJMatrixAssemble(Cmat); @@ -1601,15 +1600,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() { ncnt = 0; MPI_Barrier(MPI_COMM_WORLD); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d buildSchurSystem : matrix Cmat assembled %d.\n", mypid_,CStartRow); fflush(stdout); - for ( i = CStartRow; i < CStartRow+nSchur; i++ ) + for ( i = CStartRow; i < CStartRow+nSchur; i++ ) { HYPRE_ParCSRMatrixGetRow(C_csr,i,&rowSize,&colInd,&colVal); printf("Cmat ROW = %6d (%d)\n", i, rowSize); @@ -1645,7 +1644,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() if ( HYOutputLevel_ & HYFEI_SCHURREDUCE1 ) { printf("%4d buildSchurSystem : MStartRow = %d\n",mypid_,MStartRow); - printf("%4d buildSchurSystem : MGlobalDim = %d %d\n", mypid_, + printf("%4d buildSchurSystem : MGlobalDim = %d %d\n", mypid_, MGlobalNRows, MGlobalNCols); printf("%4d buildSchurSystem : MLocalDim = %d %d\n",mypid_, MNRows, MNCols); @@ -1658,11 +1657,11 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, MStartRow, MStartRow+MNRows-1, MStartRow, MStartRow+MNCols-1, &Mmat); ierr += HYPRE_IJMatrixSetObjectType(Mmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixCreate(comm_, CTStartRow, CTStartRow+CTNRows-1, CStartRow, CStartRow+CTNCols-1, &CTmat); ierr += HYPRE_IJMatrixSetObjectType(Mmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for Mmat @@ -1672,7 +1671,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() for ( i = 0; i < MNRows; i++ ) MMatSize[i] = 1; ierr = HYPRE_IJMatrixSetRowSizes(Mmat, MMatSize); ierr += HYPRE_IJMatrixInitialize(Mmat); - assert(!ierr); + hypre_assert(!ierr); delete [] MMatSize; //------------------------------------------------------------------ @@ -1682,11 +1681,11 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() maxRowSize = 0; CTMatSize = new int[CTNRows]; rowCount = 0; - for ( i = StartRow; i <= EndRow-nSchur; i++ ) + for ( i = StartRow; i <= EndRow-nSchur; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) @@ -1705,7 +1704,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } ierr = HYPRE_IJMatrixSetRowSizes(CTmat, CTMatSize); ierr += HYPRE_IJMatrixInitialize(CTmat); - assert(!ierr); + hypre_assert(!ierr); delete [] CTMatSize; //------------------------------------------------------------------ @@ -1719,12 +1718,12 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() diagonal = new double[MNRows]; rowIndex = MStartRow; ierr = 0; - for ( i = StartRow; i <= EndRow-nSchur; i++ ) + for ( i = StartRow; i <= EndRow-nSchur; i++ ) { ncnt = 0; HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) @@ -1745,9 +1744,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } newColVal[newRowSize++] = colVal[j]; } - else if ( colIndex == i && colVal[j] != 0.0 ) - { - ddata = 1.0 / colVal[j]; + else if ( colIndex == i && colVal[j] != 0.0 ) + { + ddata = 1.0 / colVal[j]; ncnt++; maxdiag = ( colVal[j] > maxdiag ) ? colVal[j] : maxdiag; mindiag = ( colVal[j] < mindiag ) ? colVal[j] : mindiag; @@ -1760,7 +1759,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() printf("%4d : buildSchurSystem WARNING - diag[%d] not found.\n", mypid_, i); ierr = 1; - } + } else if ( ncnt > 1 ) ierr = 1; if ( newRowSize == 0 ) { @@ -1822,7 +1821,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } schurReduction_ = 0; delete [] ProcNRows; - delete [] ProcNSchur; + delete [] ProcNSchur; HYPRE_IJMatrixDestroy(Cmat); return; } @@ -1835,15 +1834,15 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() { ncnt = 0; MPI_Barrier(MPI_COMM_WORLD); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d buildSchurSystem : matrix CTmat assembled %d.\n", mypid_,CTStartRow); fflush(stdout); - for ( i = CTStartRow; i < CTStartRow+CTNRows; i++ ) + for ( i = CTStartRow; i < CTStartRow+CTNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(CT_csr,i,&rowSize,&colInd,&colVal); printf("CTmat ROW = %6d (%d)\n", i, rowSize); @@ -1909,28 +1908,28 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() HYPRE_IJVectorSetObjectType(f1, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f1); ierr += HYPRE_IJVectorAssemble(f1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(comm_, CStartRow, CStartRow+CNRows-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2hat); ierr += HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); rowCount = CTStartRow; - for ( i = StartRow; i <= EndRow-nSchur; i++ ) + for ( i = StartRow; i <= EndRow-nSchur; i++ ) { HYPRE_IJVectorGetValues(HYb_, 1, &i, &ddata); ddata *= diagonal[rowCount-CTStartRow]; ierr = HYPRE_IJVectorSetValues(f1, 1, (const int *) &rowCount, (const double *) &ddata); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; - } + } HYPRE_IJVectorGetObject(f1, (void **) &f1_csr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_ParCSRMatrixMatvec( 1.0, C_csr, f1_csr, 0.0, f2hat_csr ); delete [] diagonal; - HYPRE_IJVectorDestroy(f1); + HYPRE_IJVectorDestroy(f1); //------------------------------------------------------------------ // form f2 = f2 - f2hat (and negate) @@ -1940,10 +1939,10 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); rowCount = CStartRow; - for ( i = 0; i < nSchur; i++ ) + for ( i = 0; i < nSchur; i++ ) { rowIndex = EndRow - nSchur + i + 1; HYPRE_IJVectorGetValues(HYb_, 1, &rowIndex, &ddata); @@ -1954,9 +1953,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() HYPRE_IJVectorAddToValues(f2, 1, (const int *) &rowCount, (const double *) &ddata); HYPRE_IJVectorGetValues(f2, 1, &rowCount, &ddata); - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; - } + } HYPRE_IJVectorDestroy(f2hat); //****************************************************************** @@ -1966,7 +1965,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, CStartRow, CStartRow+CNRows-1, CStartRow, CStartRow+CNRows-1, &reducedA_); ierr += HYPRE_IJMatrixSetObjectType(reducedA_, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute row sizes for the Schur complement @@ -1974,7 +1973,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() CMatSize = new int[CNRows]; maxRowSize = 0; - for ( i = CStartRow; i < CStartRow+CNRows; i++ ) + for ( i = CStartRow; i < CStartRow+CNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(S_csr,i,&rowSize,&colInd,NULL); rowIndex = EndRow - nSchur + i + 1; @@ -1983,7 +1982,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() newColInd = new int[newRowSize]; for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; ncnt = 0; - for (j = 0; j < rowSize2; j++) + for (j = 0; j < rowSize2; j++) { colIndex = colInd2[j]; searchIndex = HYPRE_Schur_Search(colIndex, numProcs_, ProcNRows, @@ -2014,14 +2013,14 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() } ierr = HYPRE_IJMatrixSetRowSizes(reducedA_, CMatSize); ierr += HYPRE_IJMatrixInitialize(reducedA_); - assert(!ierr); + hypre_assert(!ierr); delete [] CMatSize; //------------------------------------------------------------------ // load and assemble the Schur complement matrix //------------------------------------------------------------------ - for ( i = CStartRow; i < CStartRow+CNRows; i++ ) + for ( i = CStartRow; i < CStartRow+CNRows; i++ ) { HYPRE_ParCSRMatrixGetRow(S_csr,i,&rowSize,&colInd,&colVal); rowIndex = EndRow - nSchur + i + 1; @@ -2029,13 +2028,13 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() newRowSize = rowSize + rowSize2; newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { newColInd[j] = colInd[j]; newColVal[j] = colVal[j]; } ncnt = 0; - for (j = 0; j < rowSize2; j++) + for (j = 0; j < rowSize2; j++) { colIndex = colInd2[j]; searchIndex = HYPRE_Schur_Search(colIndex, numProcs_, ProcNRows, @@ -2056,7 +2055,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() { newColVal[ncnt] += newColVal[j]; } - else + else { ncnt++; newColInd[ncnt] = newColInd[j]; @@ -2067,10 +2066,10 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() ncnt = 0; rowmax = 0.0; for ( j = 0; j < newRowSize; j++ ) - if ( habs(newColVal[j]) > rowmax ) rowmax = habs(newColVal[j]); + if ( habs(newColVal[j]) > rowmax ) rowmax = habs(newColVal[j]); for ( j = 0; j < newRowSize; j++ ) { - if ( habs(newColVal[j]) > rowmax*1.0e-14 ) + if ( habs(newColVal[j]) > rowmax*1.0e-14 ) { newColInd[ncnt] = newColInd[j]; newColVal[ncnt++] = newColVal[j]; @@ -2096,12 +2095,12 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() ierr = HYPRE_IJVectorSetObjectType(reducedX_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedX_); ierr = HYPRE_IJVectorAssemble(reducedX_); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJVectorCreate(comm_,CStartRow,CStartRow+CNRows-1,&reducedR_); ierr = HYPRE_IJVectorSetObjectType(reducedR_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedR_); ierr = HYPRE_IJVectorAssemble(reducedR_); - assert(!ierr); + hypre_assert(!ierr); reducedB_ = f2; currA_ = reducedA_; @@ -2113,9 +2112,9 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() // save A21 and invA22 for solution recovery //------------------------------------------------------------------ - HYA21_ = CTmat; - HYA12_ = Cmat; - HYinvA22_ = Mmat; + HYA21_ = CTmat; + HYA12_ = Cmat; + HYinvA22_ = Mmat; A21NRows_ = CTNRows; A21NCols_ = CTNCols; @@ -2140,7 +2139,7 @@ void HYPRE_LinSysCore::buildSchurReducedSystem2() // search to see if the key is in the range //----------------------------------------------------------------------------- -int HYPRE_LinSysCore::HYPRE_Schur_Search(int key, int nprocs, int *Barray, +int HYPRE_LinSysCore::HYPRE_Schur_Search(int key, int nprocs, int *Barray, int *Sarray, int globalNrows, int globalNSchur) { int i, index1, index2, search_index, out_of_range, not_found; @@ -2159,9 +2158,9 @@ int HYPRE_LinSysCore::HYPRE_Schur_Search(int key, int nprocs, int *Barray, else { index1 = Barray[i+1]; - index2 = index1 - Sarray[i+1]; + index2 = index1 - Sarray[i+1]; } - if ( key >= index2 && key < index1 ) + if ( key >= index2 && key < index1 ) { search_index += ( key - index2 ); break; @@ -2176,7 +2175,7 @@ int HYPRE_LinSysCore::HYPRE_Schur_Search(int key, int nprocs, int *Barray, out_of_range += (key - Barray[i]); not_found = 1; break; - } + } if ( i == (nprocs-1) ) out_of_range += (index1 - index2); } if ( not_found ) return (-out_of_range-1); diff --git a/src/FEI_mv/fei-hypre/hypre_slide_reduce.cxx b/src/FEI_mv/fei-hypre/hypre_slide_reduce.cxx index 0202f35fc..ed2ad10c5 100644 --- a/src/FEI_mv/fei-hypre/hypre_slide_reduce.cxx +++ b/src/FEI_mv/fei-hypre/hypre_slide_reduce.cxx @@ -12,7 +12,6 @@ #include #include #include -#include //*************************************************************************** // HYPRE includes @@ -33,7 +32,7 @@ #define habs(x) (((x) > 0.0) ? x : -(x)) -extern "C" +extern "C" { int hypre_BoomerAMGBuildCoarseOperator(hypre_ParCSRMatrix*, hypre_ParCSRMatrix*, @@ -68,7 +67,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() HYPRE_ParCSRMatrix A_csr, RAP_csr; //****************************************************************** - // initial set up + // initial set up //------------------------------------------------------------------ if ( mypid_ == 0 && (HYOutputLevel_ & HYFEI_SLIDEREDUCE1) ) @@ -94,15 +93,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() // search the entire local matrix to find where the constraint // equations are, if not already given //------------------------------------------------------------------ - + MPI_Allreduce(&nConstraints_,&globalNConstr,1,MPI_INT,MPI_SUM,comm_); if ( globalNConstr == 0 ) { - for ( i = EndRow; i >= StartRow; i-- ) + for ( i = EndRow; i >= StartRow; i-- ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); isAConstr = 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) if ( colInd[j] == i && colVal[j] != 0.0 ) {isAConstr = 0; break;} HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); if ( isAConstr ) nConstraints_++; @@ -118,7 +117,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() //------------------------------------------------------------------ // get information about nRows from all processors //------------------------------------------------------------------ - + nRows = localEndRow_ - localStartRow_ + 1; ProcNRows = new int[numProcs_]; tempList = new int[numProcs_]; @@ -136,7 +135,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() globalNRows = 0; ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNRows += ProcNRows[i]; ncnt2 = ProcNRows[i]; @@ -147,7 +146,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() //------------------------------------------------------------------ // compose a global array marking where the constraint equations are //------------------------------------------------------------------ - + globalNConstr = 0; tempList = new int[numProcs_]; ProcNConstr = new int[numProcs_]; @@ -162,14 +161,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() //------------------------------------------------------------------ ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNConstr += ProcNConstr[i]; ncnt2 = ProcNConstr[i]; ProcNConstr[i] = ncnt; ncnt += ncnt2; } - + //****************************************************************** // compose the local and global selected node lists //------------------------------------------------------------------ @@ -179,7 +178,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() //------------------------------------------------------------------ globalNSelected = globalNConstr; - if (globalNSelected > 0) + if (globalNSelected > 0) { globalSelectedList = new int[globalNSelected]; globalSelectedListAux = new int[globalNSelected]; @@ -187,29 +186,29 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() else globalSelectedList = globalSelectedListAux = NULL; if ( selectedList_ != NULL ) delete [] selectedList_; if ( selectedListAux_ != NULL ) delete [] selectedListAux_; - if ( nConstraints_ > 0 ) + if ( nConstraints_ > 0 ) { selectedList_ = new int[nConstraints_]; selectedListAux_ = new int[nConstraints_]; } else selectedList_ = selectedListAux_ = NULL; - + //------------------------------------------------------------------ // call the three parts //------------------------------------------------------------------ - buildSlideReducedSystemPartA(ProcNRows,ProcNConstr,globalNRows, - globalNConstr,globalSelectedList, + buildSlideReducedSystemPartA(ProcNRows,ProcNConstr,globalNRows, + globalNConstr,globalSelectedList, globalSelectedListAux); buildSlideReducedSystemPartB(ProcNRows,ProcNConstr,globalNRows, - globalNConstr,globalSelectedList, + globalNConstr,globalSelectedList, globalSelectedListAux, &RAP_csr); buildSlideReducedSystemPartC(ProcNRows,ProcNConstr,globalNRows, - globalNConstr,globalSelectedList, + globalNConstr,globalSelectedList, globalSelectedListAux, RAP_csr); //------------------------------------------------------------------ - // initialize global variables and clean up + // initialize global variables and clean up //------------------------------------------------------------------ currA_ = reducedA_; @@ -235,7 +234,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() if ( colValues_[j] != NULL ) delete [] colValues_[j]; delete [] colValues_; colValues_ = NULL; - if ( rowLengths_ != NULL ) + if ( rowLengths_ != NULL ) { delete [] rowLengths_; rowLengths_ = NULL; @@ -247,9 +246,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystem() // Part A of buildSlideReducedSystem : generate a selected equation list //----------------------------------------------------------------------------- -void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, +void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, int *ProcNConstr, int globalNRows, - int globalNConstr, int *globalSelectedList, + int globalNConstr, int *globalSelectedList, int *globalSelectedListAux) { int i, ncnt2, StartRow, EndRow, ncnt;; @@ -286,26 +285,26 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, // candidates are those with 1 link to the constraint list //------------------------------------------------------------------ - for ( i = StartRow; i <= EndRow-nConstraints_; i++ ) + for ( i = StartRow; i <= EndRow-nConstraints_; i++ ) { ierr = HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); - assert(!ierr); + hypre_assert(!ierr); ncnt = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; for (procIndex=0; procIndex < numProcs_; procIndex++ ) if ( colIndex < ProcNRows[procIndex] ) break; - if ( procIndex == numProcs_ ) - ubound = globalNRows - + if ( procIndex == numProcs_ ) + ubound = globalNRows - (globalNConstr-ProcNConstr[procIndex-1]); - else - ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex] - - ProcNConstr[procIndex-1]); + else + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex] - + ProcNConstr[procIndex-1]); //Note : include structural zeros by not checking for nonzero - //if ( colIndex >= ubound && colVal[j] != 0.0 ) - if ( colIndex >= ubound ) + //if ( colIndex >= ubound && colVal[j] != 0.0 ) + if ( colIndex >= ubound ) { ncnt++; searchIndex = colIndex; @@ -313,15 +312,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, if ( ncnt > 1 ) break; } HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); - if ( j == rowSize && ncnt == 1 ) + if ( j == rowSize && ncnt == 1 ) { constrListAux[nSlaves] = searchIndex; constrList_[nSlaves++] = i; } if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE2 ) { - if ( j == rowSize && ncnt == 1 ) - printf("%4d : SlideReductionA - slave candidate %d = %d(%d)\n", + if ( j == rowSize && ncnt == 1 ) + printf("%4d : SlideReductionA - slave candidate %d = %d(%d)\n", mypid_, nSlaves-1, i, constrListAux[nSlaves-1]); } } @@ -336,34 +335,34 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, if ( mypid_ == 0 && ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) ) printf("%4d : SlideReductionA WARNING - constraint list not empty\n", mypid_); - } + } //--------------------------------------------------------------------- // search the constraint equations for the selected nodes // (search for candidates column index with maximum magnitude) //--------------------------------------------------------------------- - + nSelected = 0; rowIndex = -1; searchIndex = 0; - for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) + for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) { ierr = HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); - assert(!ierr); + hypre_assert(!ierr); searchIndex = -1; searchValue = -1.0E10; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { - if (colVal[j] != 0.0 && colInd[j] >= StartRow - && colInd[j] <= (EndRow-nConstraints_)) + if (colVal[j] != 0.0 && colInd[j] >= StartRow + && colInd[j] <= (EndRow-nConstraints_)) { colIndex = hypre_BinarySearch(constrList_,colInd[j],nSlaves); - if ( colIndex >= 0 && constrListAux[colIndex] != -1) + if ( colIndex >= 0 && constrListAux[colIndex] != -1) { if ( habs(colVal[j]) > searchValue ) { - if (i != constrListAux[colIndex]) + if (i != constrListAux[colIndex]) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { @@ -377,7 +376,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, } } } - } + } if ( searchIndex >= 0 ) { selectedList[nSelected++] = searchIndex; @@ -386,15 +385,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, printf("%4d : SlideReductionA - constraint %4d <=> slave %d \n", mypid_,i,searchIndex); } - } - else + } + else { // get ready for error processing colInd2 = new int[rowSize]; colVal2 = new double[rowSize]; for ( j = 0; j < rowSize; j++ ) - { + { colInd2[j] = colInd[j]; colVal2[j] = colVal[j]; } @@ -418,23 +417,23 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, { ncnt2 = 0; while ( ncnt2 < numProcs_ ) - { + { if ( ncnt2 == mypid_ && rowIndex >= 0 ) { printf("%4d : SlideReductionA ERROR - constraint number",mypid_); printf(" cannot be found for row %d\n", rowIndex); - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { printf("ROW %4d COL = %d VAL = %e\n",rowIndex,colInd[j], colVal[j]); - if (colVal[j] != 0.0 && colInd[j] >= StartRow - && colInd[j] <= (EndRow-nConstraints_)) + if (colVal[j] != 0.0 && colInd[j] >= StartRow + && colInd[j] <= (EndRow-nConstraints_)) { colIndex = colInd[j]; HYPRE_ParCSRMatrixGetRow(A_csr,colIndex,&rowSize2,&colInd2, &colVal2); printf(" row %4d (%d) : \n",colIndex, rowSize2); - for (k = 0; k < rowSize2; k++) + for (k = 0; k < rowSize2; k++) printf(" row %4d col = %d val = %e\n",colIndex, colInd2[k],colVal2[k]); HYPRE_ParCSRMatrixRestoreRow(A_csr,colIndex,&rowSize2, @@ -458,34 +457,34 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, //------------------------------------------------------------------ dble_array = new double[nSelected]; - for ( i = 0; i < nSelected; i++ ) dble_array[i] = (double) i; + for ( i = 0; i < nSelected; i++ ) dble_array[i] = (double) i; if ( nSelected > 1 ) hypre_qsort1(selectedList, dble_array, 0, nSelected-1); - for (i = 1; i < nSelected; i++) + for (i = 1; i < nSelected; i++) { if ( selectedList[i] == selectedList[i-1] ) { - printf("%4d : SlideReductionA ERROR - repeated selected nodes %d \n", + printf("%4d : SlideReductionA ERROR - repeated selected nodes %d \n", mypid_, selectedList[i]); exit(1); } } for (i = 0; i < nSelected; i++) selectedListAux[i] = (int) dble_array[i]; delete [] dble_array; - + recvCntArray = new int[numProcs_]; displArray = new int[numProcs_]; MPI_Allgather(&nSelected, 1, MPI_INT, recvCntArray, 1,MPI_INT, comm_); displArray[0] = 0; - for ( i = 1; i < numProcs_; i++ ) + for ( i = 1; i < numProcs_; i++ ) displArray[i] = displArray[i-1] + recvCntArray[i-1]; - for ( i = 0; i < nSelected; i++ ) - selectedListAux[i] += displArray[mypid_]; + for ( i = 0; i < nSelected; i++ ) + selectedListAux[i] += displArray[mypid_]; MPI_Allgatherv(selectedList, nSelected, MPI_INT, globalSelectedList, recvCntArray, displArray, MPI_INT, comm_); MPI_Allgatherv(selectedListAux, nSelected, MPI_INT, globalSelectedListAux, recvCntArray, displArray, MPI_INT, comm_); - for ( i = 0; i < nSelected; i++ ) - selectedListAux[i] -= displArray[mypid_]; + for ( i = 0; i < nSelected; i++ ) + selectedListAux[i] -= displArray[mypid_]; delete [] recvCntArray; delete [] displArray; @@ -496,15 +495,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartA(int *ProcNRows, i,selectedList[i],selectedListAux[i]); } } - + //***************************************************************************** // Part B of buildSlideReducedSystem : create submatrices //----------------------------------------------------------------------------- -void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, - int *ProcNConstr, int globalNRows, - int globalNConstr, int *globalSelectedList, - int *globalSelectedListAux, +void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, + int *ProcNConstr, int globalNRows, + int globalNConstr, int *globalSelectedList, + int *globalSelectedListAux, HYPRE_ParCSRMatrix *rap_csr) { int A21NRows, A21GlobalNRows, A21NCols, A21GlobalNCols, A21StartRow; @@ -531,7 +530,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, nSelected = nConstraints_; selectedList = selectedList_; selectedListAux = selectedListAux_; - + //------------------------------------------------------------------ // calculate the dimension of A21 //------------------------------------------------------------------ @@ -547,7 +546,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, { printf("%4d : SlideReductionB - A21StartRow = %d\n", mypid_, A21StartRow); - printf("%4d : SlideReductionB - A21GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReductionB - A21GlobalDim = %d %d\n", mypid_, A21GlobalNRows, A21GlobalNCols); printf("%4d : SlideReductionB - A21LocalDim = %d %d\n",mypid_, A21NRows, A21NCols); @@ -560,7 +559,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, ierr = HYPRE_IJMatrixCreate(comm_, A21StartRow, A21StartRow+A21NRows-1, A21StartCol, A21StartCol+A21NCols-1, &A21); ierr += HYPRE_IJMatrixSetObjectType(A21, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in the first nConstraint row of A21 @@ -573,27 +572,27 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, newEndRow = EndRow - nConstraints_; A21MatSize = new int[A21NRows]; - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux[j] == i ) + if ( selectedListAux[j] == i ) { - rowIndex = selectedList[j]; + rowIndex = selectedList[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); rowSize2 = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colVal[j] != 0.0 ) + if ( colVal[j] != 0.0 ) { - searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, + searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, globalNSelected); - if (searchIndex < 0 && - (colIndex <= newEndRow || colIndex >= localEndRow_)) + if (searchIndex < 0 && + (colIndex <= newEndRow || colIndex >= localEndRow_)) rowSize2++; } } @@ -608,22 +607,22 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, // (which consists of the rows in constraint equations), the nnz will // be reduced by excluding the selected slave columns only (since the // entries corresponding to the constraint columns are 0, and since - // the selected matrix is a diagonal matrix, there is no need to + // the selected matrix is a diagonal matrix, there is no need to // search for slave equations in the off-processor list) //------------------------------------------------------------------ rowCount = nSelected; - for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) + for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); rowSize2 = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, - globalNSelected); + globalNSelected); if ( searchIndex < 0 ) rowSize2++; } } @@ -641,7 +640,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, ierr = HYPRE_IJMatrixSetRowSizes(A21, A21MatSize); ierr += HYPRE_IJMatrixInitialize(A21); - assert(!ierr); + hypre_assert(!ierr); delete [] A21MatSize; //------------------------------------------------------------------ @@ -658,26 +657,26 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, diagCount = 0; for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux[j] == i ) + if ( selectedListAux[j] == i ) { - rowIndex = selectedList[j]; + rowIndex = selectedList[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; - if (colIndex <= newEndRow || colIndex >= localEndRow_) + if (colIndex <= newEndRow || colIndex >= localEndRow_) { - searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, - globalNSelected); - if ( searchIndex < 0 ) + searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, + globalNSelected); + if ( searchIndex < 0 ) { searchIndex = - searchIndex - 1; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) @@ -691,11 +690,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { printf("%4d : SlideReductionB WARNING - A21 ",mypid_); - printf("out of range (%d,%d (%d))\n", rowCount, + printf("out of range (%d,%d (%d))\n", rowCount, colIndex, A21GlobalNCols); - } - } - if ( newRowSize > maxRowSize+1 ) + } + } + if ( newRowSize > maxRowSize+1 ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { @@ -705,7 +704,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, } } } - else if ( colIndex > newEndRow && colIndex <= EndRow ) + else if ( colIndex > newEndRow && colIndex <= EndRow ) { if ( colVal[j] != 0.0 ) diagonal[diagCount++] = colVal[j]; if ( habs(colVal[j]) < 1.0E-8 ) @@ -718,7 +717,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, } } } - } + } } HYPRE_IJMatrixSetValues(A21, 1, &newRowSize, (const int *) &rowCount, @@ -741,7 +740,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, displArray = new int[numProcs_]; MPI_Allgather(&diagCount, 1, MPI_INT, recvCntArray, 1, MPI_INT, comm_); displArray[0] = 0; - for ( i = 1; i < numProcs_; i++ ) + for ( i = 1; i < numProcs_; i++ ) displArray[i] = displArray[i-1] + recvCntArray[i-1]; ncnt = displArray[numProcs_-1] + recvCntArray[numProcs_-1]; if ( ncnt > 0 ) extDiagonal = new double[ncnt]; @@ -757,16 +756,16 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, // next load the second nConstraint rows to A21 extracted from A //------------------------------------------------------------------ - for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) + for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, - globalNSelected); - if ( searchIndex < 0 && colVal[j] != 0.0 ) + globalNSelected); + if ( searchIndex < 0 && colVal[j] != 0.0 ) { searchIndex = - searchIndex - 1; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) @@ -780,8 +779,8 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) printf("%4d : SlideReductionB WARNING - A21(%d,%d(%d))\n", mypid_, rowCount, colIndex, A21GlobalNCols); - } - if ( newRowSize > maxRowSize+1 ) + } + if ( newRowSize > maxRowSize+1 ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { @@ -789,7 +788,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, printf("passing array boundary(2).\n"); } } - } + } } HYPRE_IJMatrixSetValues(A21, 1, &newRowSize, (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); @@ -811,15 +810,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, { ncnt = 0; MPI_Barrier(comm_); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d : SlideReductionB - matrix A21 assembled %d.\n", mypid_,A21StartRow); fflush(stdout); - for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) + for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) { HYPRE_ParCSRMatrixGetRow(A21_csr,i,&rowSize,&colInd,&colVal); printf("A21 ROW = %6d (%d)\n", i, rowSize); @@ -849,9 +848,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, invA22GlobalNCols = invA22GlobalNRows; if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { - printf("%4d : SlideReductionB - A22GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReductionB - A22GlobalDim = %d %d\n", mypid_, invA22GlobalNRows, invA22GlobalNCols); - printf("%4d : SlideReductionB - A22LocalDim = %d %d\n", mypid_, + printf("%4d : SlideReductionB - A22LocalDim = %d %d\n", mypid_, invA22NRows, invA22NCols); } @@ -862,7 +861,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, ierr = HYPRE_IJMatrixCreate(comm_, A21StartRow, A21StartRow+invA22NRows-1, A21StartRow, A21StartRow+invA22NCols-1, &invA22); ierr += HYPRE_IJMatrixSetObjectType(invA22, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the no. of nonzeros in the first nConstraint row of invA22 @@ -873,37 +872,37 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, for ( i = 0; i < nConstraints_; i++ ) invA22MatSize[i] = 1; //------------------------------------------------------------------ - // compute the number of nonzeros in the second nConstraints row of + // compute the number of nonzeros in the second nConstraints row of // invA22 (consisting of [D and A22 block]) //------------------------------------------------------------------ - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux[j] == i ) + if ( selectedListAux[j] == i ) { - rowIndex = selectedList[j]; + rowIndex = selectedList[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); rowSize2 = 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colVal[j] != 0.0 ) + if ( colVal[j] != 0.0 ) { - if ( colIndex >= StartRow && colIndex <= newEndRow ) + if ( colIndex >= StartRow && colIndex <= newEndRow ) { - searchIndex = hypre_BinarySearch(selectedList, colIndex, - nSelected); + searchIndex = hypre_BinarySearch(selectedList, colIndex, + nSelected); if ( searchIndex >= 0 ) rowSize2++; - } - else if ( colIndex < StartRow || colIndex > EndRow ) + } + else if ( colIndex < StartRow || colIndex > EndRow ) { - searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, - globalNSelected); + searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, + globalNSelected); if ( searchIndex >= 0 ) rowSize2++; } } @@ -919,7 +918,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, ierr = HYPRE_IJMatrixSetRowSizes(invA22, invA22MatSize); ierr += HYPRE_IJMatrixInitialize(invA22); - assert(!ierr); + hypre_assert(!ierr); delete [] invA22MatSize; //------------------------------------------------------------------ @@ -931,74 +930,74 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, newColInd = new int[maxRowSize]; newColVal = new double[maxRowSize]; - for ( i = 0; i < diagCount; i++ ) + for ( i = 0; i < diagCount; i++ ) { extDiagonal[i] = 1.0 / extDiagonal[i]; } - for ( i = 0; i < nConstraints_; i++ ) + for ( i = 0; i < nConstraints_; i++ ) { - newColInd[0] = A21StartRow + nConstraints_ + i; + newColInd[0] = A21StartRow + nConstraints_ + i; rowIndex = A21StartRow + i; if ( newColInd[0] < 0 || newColInd[0] >= invA22GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) - printf("%4d : SlideReductionB WARNING - A22 (%d, %d (%d))\n", + printf("%4d : SlideReductionB WARNING - A22 (%d, %d (%d))\n", mypid_, rowIndex, newColInd[0], invA22GlobalNCols); - } + } newColVal[0] = extDiagonal[A21StartRow/2+i]; ierr = HYPRE_IJMatrixSetValues(invA22, 1, &one, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } //------------------------------------------------------------------ // next load the second nConstraints_ rows to A22 extracted from A //------------------------------------------------------------------ - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux[j] == i ) + if ( selectedListAux[j] == i ) { - rowIndex = selectedList[j]; + rowIndex = selectedList[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 1; newColInd[0] = A21StartRow + i; - newColVal[0] = extDiagonal[A21StartRow/2+i]; - for (j = 0; j < rowSize; j++) + newColVal[0] = extDiagonal[A21StartRow/2+i]; + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) { - searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, - globalNSelected); - if ( searchIndex >= 0 ) + searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, + globalNSelected); + if ( searchIndex >= 0 ) { searchIndex = globalSelectedListAux[searchIndex]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; if ( procIndex == numProcs_ ) { - newColInd[newRowSize] = searchIndex + globalNConstr; + newColInd[newRowSize] = searchIndex + globalNConstr; } else { - newColInd[newRowSize] = searchIndex + - ProcNConstr[procIndex]; + newColInd[newRowSize] = searchIndex + + ProcNConstr[procIndex]; } - if ( newColInd[newRowSize] < 0 || + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= invA22GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) printf("%4d : SlideReductionB WARNING - A22(%d,%d,%d)\n", mypid_,rowCount,newColInd[newRowSize], invA22GlobalNCols); - } - newColVal[newRowSize++] = - extDiagonal[A21StartRow/2+i] * + } + newColVal[newRowSize++] = - extDiagonal[A21StartRow/2+i] * colVal[j] * extDiagonal[searchIndex]; if ( newRowSize > maxRowSize ) { @@ -1008,14 +1007,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, printf("passing array boundary(3).\n"); } } - } - } + } + } } rowCount = A21StartRow + nConstraints_ + i; - ierr = HYPRE_IJMatrixSetValues(invA22, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } delete [] newColInd; @@ -1034,13 +1033,13 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, { ncnt = 0; MPI_Barrier(comm_); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d : SlideReductionB - invA22 \n", mypid_); - for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) + for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) { HYPRE_ParCSRMatrixGetRow(invA22_csr,i,&rowSize,&colInd, &colVal); @@ -1068,7 +1067,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, hypre_BoomerAMGBuildCoarseOperator( (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix *) invA22_csr, - (hypre_ParCSRMatrix *) A21_csr, + (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix **) &RAP_csr); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) @@ -1100,8 +1099,8 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, // set global objects and checking //------------------------------------------------------------------ - HYA21_ = A21; - HYinvA22_ = invA22; + HYA21_ = A21; + HYinvA22_ = invA22; (*rap_csr) = RAP_csr; MPI_Allreduce(&nnzA21,&ncnt,1,MPI_INT,MPI_SUM,comm_); @@ -1116,10 +1115,10 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartB(int *ProcNRows, void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, int *ProcNConstr, int globalNRows, int globalNConstr, int *globalSelectedList, - int *globalSelectedListAux, + int *globalSelectedListAux, HYPRE_ParCSRMatrix RAP_csr) { - int i, j, nRows, StartRow, EndRow; + int i, j, nRows, StartRow, EndRow; int newNRows, *reducedAMatSize, reducedAStartRow; int rowCount, rowIndex, newRowSize, rowSize, rowSize2, *newColInd; int *colInd, *colInd2, colIndex, searchIndex, ubound, ncnt, ierr; @@ -1147,7 +1146,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, nSelected = nConstraints_; selectedList = selectedList_; selectedListAux = selectedListAux_; - + //------------------------------------------------------------------ // first calculate the dimension of the reduced matrix //------------------------------------------------------------------ @@ -1157,7 +1156,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, ierr = HYPRE_IJMatrixCreate(comm_, A21StartCol, A21StartCol+newNRows-1, A21StartCol, A21StartCol+newNRows-1, &reducedA); ierr += HYPRE_IJMatrixSetObjectType(reducedA, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // set up reducedA with proper sizes @@ -1168,37 +1167,37 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, rowCount = reducedAStartRow; rowIndex = 0; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList, i, nSelected); - if ( searchIndex < 0 ) + searchIndex = hypre_BinarySearch(selectedList, i, nSelected); + if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow(RAP_csr,rowCount,&rowSize2, &colInd2, &colVal2); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize + rowSize2; newColInd = new int[newRowSize]; - for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; + for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; for (j = 0; j < rowSize2; j++) newColInd[rowSize+j] = colInd2[j]; hypre_qsort0(newColInd, 0, newRowSize-1); ncnt = 0; - for ( j = 1; j < newRowSize; j++ ) + for ( j = 1; j < newRowSize; j++ ) { - if ( newColInd[j] != newColInd[ncnt] ) + if ( newColInd[j] != newColInd[ncnt] ) { ncnt++; newColInd[ncnt] = newColInd[j]; - } + } } if ( newRowSize > 0 ) ncnt++; reducedAMatSize[rowIndex++] = ncnt; - + HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowCount,&rowSize2, &colInd2,&colVal2); delete [] newColInd; - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; } } @@ -1209,17 +1208,17 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, ierr = HYPRE_IJMatrixSetRowSizes(reducedA, reducedAMatSize); ierr += HYPRE_IJMatrixInitialize(reducedA); - assert(!ierr); + hypre_assert(!ierr); delete [] reducedAMatSize; //------------------------------------------------------------------ - // load the reducedA matrix + // load the reducedA matrix //------------------------------------------------------------------ rowCount = reducedAStartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList, i, nSelected); + searchIndex = hypre_BinarySearch(selectedList, i, nSelected); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr, i, &rowSize, &colInd, &colVal); @@ -1229,55 +1228,55 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; ncnt = 0; - - for ( j = 0; j < rowSize; j++ ) + + for ( j = 0; j < rowSize; j++ ) { colIndex = colInd[j]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) + if ( procIndex == numProcs_ ) ubound = globalNRows-(globalNConstr-ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex < ubound ) + if ( colIndex < ubound ) { - searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, - globalNSelected); - if ( searchIndex < 0 ) + searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, + globalNSelected); + if ( searchIndex < 0 ) { searchIndex = - searchIndex - 1; - newColInd[ncnt] = colIndex - ProcNConstr[procIndex] - + newColInd[ncnt] = colIndex - ProcNConstr[procIndex] - searchIndex; - newColVal[ncnt++] = colVal[j]; + newColVal[ncnt++] = colVal[j]; } } } - for ( j = 0; j < rowSize2; j++ ) + for ( j = 0; j < rowSize2; j++ ) { - newColInd[ncnt+j] = colInd2[j]; - newColVal[ncnt+j] = - colVal2[j]; + newColInd[ncnt+j] = colInd2[j]; + newColVal[ncnt+j] = - colVal2[j]; } newRowSize = ncnt + rowSize2; hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncnt = 0; - for ( j = 0; j < newRowSize; j++ ) + for ( j = 0; j < newRowSize; j++ ) { - if ( j != ncnt && newColInd[j] == newColInd[ncnt] ) + if ( j != ncnt && newColInd[j] == newColInd[ncnt] ) newColVal[ncnt] += newColVal[j]; - else if ( newColInd[j] != newColInd[ncnt] ) + else if ( newColInd[j] != newColInd[ncnt] ) { ncnt++; newColVal[ncnt] = newColVal[j]; newColInd[ncnt] = newColInd[j]; - } - } + } + } newRowSize = ncnt + 1; - ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowCount,&rowSize2,&colInd2, &colVal2); @@ -1293,7 +1292,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, HYPRE_IJMatrixAssemble(reducedA); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) - printf("%4d : SlideReductionC - reducedAStartRow = %d\n", mypid_, + printf("%4d : SlideReductionC - reducedAStartRow = %d\n", mypid_, reducedAStartRow); HYPRE_IJMatrixGetObject(reducedA, (void **) &reducedA_csr); @@ -1307,7 +1306,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, if ( mypid_ == ncnt ) { printf("====================================================\n"); - for ( i = reducedAStartRow; + for ( i = reducedAStartRow; i < reducedAStartRow+nRows-2*nConstraints_; i++ ) { printf("%d : reducedA ROW %d\n", mypid_, i); @@ -1345,22 +1344,22 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(comm_, A21StartRow, A21StartRow+A21NRows-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2hat); ierr += HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); colInd = new int[nSelected*2]; colVal = new double[nSelected*2]; - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux[j] == i ) + if ( selectedListAux[j] == i ) { colInd[i] = selectedList[j]; break; @@ -1373,7 +1372,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, exit(1); } } - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { colInd[nSelected+i] = EndRow - nConstraints_ + i + 1; } @@ -1381,14 +1380,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, for ( i = 0; i < nSelected*2; i++ ) colInd[i] = A21StartRow + i; ierr = HYPRE_IJVectorSetValues(f2,2*nSelected,(const int *) colInd, (const double *) colVal); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(f2, (void **) &f2_csr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_IJMatrixGetObject(HYinvA22_, (void **) &invA22_csr); HYPRE_ParCSRMatrixMatvec( 1.0, invA22_csr, f2_csr, 0.0, f2hat_csr ); delete [] colVal; delete [] colInd; - HYPRE_IJVectorDestroy(f2); + HYPRE_IJVectorDestroy(f2); // ***************************************************************** // set up A12 with proper sizes before forming f2til = A12 * f2hat @@ -1406,9 +1405,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, A12StartRow = ProcNRows[mypid_] - 2 * ProcNConstr[mypid_]; if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { - printf("%4d : SlideReductionC - A12GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReductionC - A12GlobalDim = %d %d\n", mypid_, A12GlobalNRows, A12GlobalNCols); - printf("%4d : SlideReductionC - A12LocalDim = %d %d\n", mypid_, + printf("%4d : SlideReductionC - A12LocalDim = %d %d\n", mypid_, A12NRows, A12NCols); } @@ -1419,7 +1418,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, ierr = HYPRE_IJMatrixCreate(comm_, A21StartCol, A21StartCol+A12NRows-1, A21StartRow, A21StartRow+A12NCols-1, &A12); ierr += HYPRE_IJMatrixSetObjectType(A12, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in each row of A12 @@ -1429,31 +1428,31 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, rowCount = A12StartRow; rowIndex = 0; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList, i, nSelected); - if ( searchIndex < 0 ) + searchIndex = hypre_BinarySearch(selectedList, i, nSelected); + if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) - ubound = globalNRows - + if ( procIndex == numProcs_ ) + ubound = globalNRows - (globalNConstr - ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex >= ubound ) newRowSize++; + if ( colIndex >= ubound ) newRowSize++; else { - if (hypre_BinarySearch(globalSelectedList,colIndex, + if (hypre_BinarySearch(globalSelectedList,colIndex, globalNSelected) >= 0) newRowSize++; } @@ -1464,7 +1463,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, rowCount++; } } - + //------------------------------------------------------------------ // after fetching the row sizes, set up A12 with such sizes //------------------------------------------------------------------ @@ -1473,55 +1472,55 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, for ( i = 0; i < A12NRows; i++ ) nnzA12 += A12MatSize[i]; ierr = HYPRE_IJMatrixSetRowSizes(A12, A12MatSize); ierr += HYPRE_IJMatrixInitialize(A12); - assert(!ierr); + hypre_assert(!ierr); delete [] A12MatSize; //------------------------------------------------------------------ - // load the A12 matrix + // load the A12 matrix //------------------------------------------------------------------ rowCount = A12StartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList, i, nSelected); + searchIndex = hypre_BinarySearch(selectedList, i, nSelected); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr, i, &rowSize, &colInd, &colVal); newRowSize = 0; newColInd = new int[rowSize]; newColVal = new double[rowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) + if ( procIndex == numProcs_ ) ubound = globalNRows-(globalNConstr-ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex >= ubound ) { - if ( procIndex != numProcs_ - 1 ) + if ( colIndex >= ubound ) { + if ( procIndex != numProcs_ - 1 ) { - newColInd[newRowSize] = colInd[j] - ubound + + newColInd[newRowSize] = colInd[j] - ubound + ProcNConstr[procIndex] + ProcNConstr[procIndex+1]; } - else + else { - newColInd[newRowSize] = colInd[j] - ubound + + newColInd[newRowSize] = colInd[j] - ubound + ProcNConstr[procIndex] + globalNConstr; } - if ( newColInd[newRowSize] < 0 || + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= A12GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { printf("%4d : SlideReductionC WARNING - A12 col index ", mypid_); - printf("out of range %d %d(%d)\n", i, + printf("out of range %d %d(%d)\n", i, newColInd[newRowSize], A12GlobalNCols); } } @@ -1530,18 +1529,18 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, { searchIndex = HYPRE_LSI_Search(globalSelectedList,colInd[j], globalNSelected); - if ( searchIndex >= 0) + if ( searchIndex >= 0) { searchIndex = globalSelectedListAux[searchIndex]; - newColInd[newRowSize] = searchIndex + - ProcNConstr[procIndex]; - if ( newColInd[newRowSize] < 0 || + newColInd[newRowSize] = searchIndex + + ProcNConstr[procIndex]; + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= A12GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { printf("%4d : SlideReductionC WARNING - \n",mypid_); - printf(" A12(%d,%d,%d))\n", i, + printf(" A12(%d,%d,%d))\n", i, newColInd[newRowSize], A12GlobalNCols); } } @@ -1549,10 +1548,10 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, } } } - ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); rowCount++; @@ -1565,11 +1564,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, printf(" SlideReductionC : NNZ of A12 = %d\n", ncnt); //------------------------------------------------------------------ - // assemble the A12 matrix + // assemble the A12 matrix //------------------------------------------------------------------ ierr = HYPRE_IJMatrixAssemble(A12); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(A12, (void **) &A12_csr); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE3 ) @@ -1603,29 +1602,29 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, // form reducedB_ = A12 * f2hat //------------------------------------------------------------------ - ierr = HYPRE_IJVectorCreate(comm_, reducedAStartRow, + ierr = HYPRE_IJVectorCreate(comm_, reducedAStartRow, reducedAStartRow+newNRows-1, &reducedB_); ierr += HYPRE_IJVectorSetObjectType(reducedB_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedB_); ierr += HYPRE_IJVectorAssemble(reducedB_); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(reducedB_, (void **) &reducedB_csr); HYPRE_ParCSRMatrixMatvec( -1.0, A12_csr, f2hat_csr, 0.0, reducedB_csr ); - HYPRE_IJMatrixDestroy(A12); - HYPRE_IJVectorDestroy(f2hat); + HYPRE_IJMatrixDestroy(A12); + HYPRE_IJVectorDestroy(f2hat); //------------------------------------------------------------------ // finally form reducedB = f1 - f2til //------------------------------------------------------------------ rowCount = reducedAStartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - if ( hypre_BinarySearch(selectedList, i, nSelected) < 0 ) + if ( hypre_BinarySearch(selectedList, i, nSelected) < 0 ) { HYPRE_IJVectorGetValues(HYb_, 1, &i, &ddata); - HYPRE_IJVectorAddToValues(reducedB_, 1, (const int *) &rowCount, + HYPRE_IJVectorAddToValues(reducedB_, 1, (const int *) &rowCount, (const double *) &ddata); rowCount++; } @@ -1641,14 +1640,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystemPartC(int *ProcNRows, ierr = HYPRE_IJVectorSetObjectType(reducedX_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedX_); ierr = HYPRE_IJVectorAssemble(reducedX_); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJVectorCreate(comm_, reducedAStartRow, reducedAStartRow+newNRows-1, &reducedR_); ierr = HYPRE_IJVectorSetObjectType(reducedR_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedR_); ierr = HYPRE_IJVectorAssemble(reducedR_); - assert(!ierr); + hypre_assert(!ierr); } //***************************************************************************** @@ -1720,15 +1719,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // (The constraint equations are assumed to be at the end of the // matrix) ==> nConstraints, globalNConstr //------------------------------------------------------------------ - + MPI_Allreduce(&nConstraints_,&globalNConstr,1,MPI_INT,MPI_SUM,comm_); if ( globalNConstr == 0 ) { - for ( i = EndRow; i >= StartRow; i-- ) + for ( i = EndRow; i >= StartRow; i-- ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); isAConstr = 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) if (colInd[j] == i && colVal[j] != 0.0) {isAConstr = 0; break;} HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); if ( isAConstr ) nConstraints_++; @@ -1748,7 +1747,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // (This is needed later on for column index conversion) // ==> ProcNRows, globalNRows //------------------------------------------------------------------ - + ProcNRows = new int[numProcs_]; tempList = new int[numProcs_]; for ( i = 0; i < numProcs_; i++ ) tempList[i] = 0; @@ -1757,7 +1756,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() delete [] tempList; globalNRows = 0; ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNRows += ProcNRows[i]; ncnt2 = ProcNRows[i]; @@ -1773,7 +1772,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // (This is needed later on for column index conversion) // ==> ProcNConstr, globalNConstr //------------------------------------------------------------------ - + globalNConstr = 0; tempList = new int[numProcs_]; ProcNConstr = new int[numProcs_]; @@ -1782,7 +1781,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() MPI_Allreduce(tempList,ProcNConstr,numProcs_,MPI_INT,MPI_SUM,comm_); delete [] tempList; ncnt = 0; - for ( i = 0; i < numProcs_; i++ ) + for ( i = 0; i < numProcs_; i++ ) { globalNConstr += ProcNConstr[i]; ncnt2 = ProcNConstr[i]; @@ -1793,7 +1792,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if ( HYPreconID_ == HYMLI ) HYPRE_LSI_MLIAdjustNodeEqnMap(HYPrecon_, ProcNRows, ProcNConstr); #endif - + //****************************************************************** // compose the local and global selected node lists //------------------------------------------------------------------ @@ -1801,22 +1800,22 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if ( selectedList_ != NULL ) delete [] selectedList_; if ( selectedListAux_ != NULL ) delete [] selectedListAux_; nSelected = nConstraints_; - if ( nConstraints_ > 0 ) + if ( nConstraints_ > 0 ) { selectedList_ = new int[nConstraints_]; selectedListAux_ = new int[nConstraints_]; } else selectedList_ = selectedListAux_ = NULL; globalNSelected = globalNConstr; - if (globalNSelected > 0) + if (globalNSelected > 0) { globalSelectedList = new int[globalNSelected]; globalSelectedListAux = new int[globalNSelected]; } else globalSelectedList = globalSelectedListAux = NULL; - - buildSlideReducedSystemPartA(ProcNRows,ProcNConstr,globalNRows, - globalNSelected,globalSelectedList, + + buildSlideReducedSystemPartA(ProcNRows,ProcNConstr,globalNRows, + globalNSelected,globalSelectedList, globalSelectedListAux); //****************************************************************** @@ -1838,7 +1837,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() { printf("%4d : SlideReduction2 : A21StartRow = %d\n", mypid_, A21StartRow); - printf("%4d : SlideReduction2 : A21GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReduction2 : A21GlobalDim = %d %d\n", mypid_, A21GlobalNRows, A21GlobalNCols); printf("%4d : SlideReduction2 : A21LocalDim = %d %d\n",mypid_, A21NRows, A21NCols); @@ -1851,7 +1850,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, A21StartRow, A21StartRow+A21NRows-1, A21StartCol, A21StartCol+A21NCols-1, &A21); ierr += HYPRE_IJMatrixSetObjectType(A21, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in the first nConstraint row of A21 @@ -1864,30 +1863,30 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() newEndRow = EndRow - nConstraints_; A21MatSize = new int[A21NRows]; - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - rowIndex = selectedList_[j]; + rowIndex = selectedList_[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colVal[j] != 0.0 ) + if ( colVal[j] != 0.0 ) { if (colIndex <= newEndRow || colIndex >= localEndRow_) { if (colIndex >= StartRow && colIndex <= newEndRow ) - searchIndex = hypre_BinarySearch(selectedList_,colIndex, + searchIndex = hypre_BinarySearch(selectedList_,colIndex, nSelected); - else + else searchIndex = hypre_BinarySearch(globalSelectedList, colIndex, globalNSelected); if (searchIndex < 0 ) newRowSize++; @@ -1905,16 +1904,16 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // (which consists of the rows in constraint equations), the nnz will // be reduced by excluding the selected slave columns only (since the // entries corresponding to the constraint columns are 0, and since - // the selected matrix is a diagonal matrix, there is no need to + // the selected matrix is a diagonal matrix, there is no need to // search for slave equations in the off-processor list) //------------------------------------------------------------------ rowCount = nSelected; - for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) + for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { @@ -1922,9 +1921,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if (colIndex <= newEndRow || colIndex >= localEndRow_) { if (colIndex >= StartRow && colIndex <= newEndRow ) - searchIndex = hypre_BinarySearch(selectedList_,colIndex, + searchIndex = hypre_BinarySearch(selectedList_,colIndex, nSelected); - else + else searchIndex = hypre_BinarySearch(globalSelectedList, colIndex, globalNSelected); if ( searchIndex < 0 ) newRowSize++; @@ -1945,7 +1944,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixSetRowSizes(A21, A21MatSize); ierr += HYPRE_IJMatrixInitialize(A21); - assert(!ierr); + hypre_assert(!ierr); delete [] A21MatSize; //------------------------------------------------------------------ @@ -1962,27 +1961,27 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() diagCount = 0; for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - rowIndex = selectedList_[j]; + rowIndex = selectedList_[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; - if (colIndex <= newEndRow || colIndex >= localEndRow_) + if (colIndex <= newEndRow || colIndex >= localEndRow_) { searchIndex = hypre_BinarySearch(globalSelectedList, colIndex, globalNSelected); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; @@ -1995,11 +1994,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if (HYOutputLevel_ & HYFEI_SLIDEREDUCE1) { printf("%4d : SlideReduction2 WARNING - ",mypid_); - printf(" A21(%d,%d(%d))\n", rowCount, + printf(" A21(%d,%d(%d))\n", rowCount, colIndex, A21GlobalNCols); - } - } - if ( newRowSize > maxRowSize+1 ) + } + } + if ( newRowSize > maxRowSize+1 ) { if (HYOutputLevel_ & HYFEI_SLIDEREDUCE1) { @@ -2011,11 +2010,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } //--------------------------------------------------------- - // slave equations should only have one nonzeros + // slave equations should only have one nonzeros // corresponding to the D in A22 //--------------------------------------------------------- - else if ( colIndex > newEndRow && colIndex <= EndRow ) + else if ( colIndex > newEndRow && colIndex <= EndRow ) { if ( colVal[j] != 0.0 ) diagonal[diagCount++] = colVal[j]; if ( habs(colVal[j]) < 1.0E-8 ) @@ -2027,7 +2026,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } } } - } + } } HYPRE_IJMatrixSetValues(A21, 1, &newRowSize, (const int *) &rowCount, @@ -2050,7 +2049,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() displArray = new int[numProcs_]; MPI_Allgather(&diagCount, 1, MPI_INT, recvCntArray, 1, MPI_INT, comm_); displArray[0] = 0; - for ( i = 1; i < numProcs_; i++ ) + for ( i = 1; i < numProcs_; i++ ) displArray[i] = displArray[i-1] + recvCntArray[i-1]; ncnt = displArray[numProcs_-1] + recvCntArray[numProcs_-1]; if ( ncnt > 0 ) extDiagonal = new double[ncnt]; @@ -2067,11 +2066,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // (assume the constraint-constraint block is 0 ) //------------------------------------------------------------------ - for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) + for ( i = EndRow-nConstraints_+1; i <= EndRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) @@ -2079,13 +2078,13 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if (colIndex <= newEndRow || colIndex >= localEndRow_) { if (colIndex >= StartRow && colIndex <= newEndRow ) - searchIndex = hypre_BinarySearch(selectedList_,colIndex, + searchIndex = hypre_BinarySearch(selectedList_,colIndex, nSelected); - else + else searchIndex = hypre_BinarySearch(globalSelectedList, colIndex, globalNSelected); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; @@ -2098,11 +2097,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if (HYOutputLevel_ & HYFEI_SLIDEREDUCE1) { printf("%4d : SlideReduction2 WARNING - ",mypid_); - printf(" A21(%d,%d(%d))\n", rowCount, + printf(" A21(%d,%d(%d))\n", rowCount, colIndex, A21GlobalNCols); - } - } - if ( newRowSize > maxRowSize+1 ) + } + } + if ( newRowSize > maxRowSize+1 ) { if (HYOutputLevel_ & HYFEI_SLIDEREDUCE1) { @@ -2112,7 +2111,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } } } - } + } } if ( newRowSize == 0 && (HYOutputLevel_ & HYFEI_SLIDEREDUCE1)) printf("%4d : SlideReduction2 WARNING : loading all 0 to A21\n", @@ -2138,15 +2137,15 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() { ncnt = 0; MPI_Barrier(comm_); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d : SlideReduction2 : matrix A21 assembled %d.\n", mypid_,A21StartRow); fflush(stdout); - for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) + for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) { HYPRE_ParCSRMatrixGetRow(A21_csr,i,&rowSize,&colInd,&colVal); printf("A21 ROW = %6d (%d)\n", i, rowSize); @@ -2176,9 +2175,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() invA22GlobalNCols = invA22GlobalNRows; if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { - printf("%4d : SlideReduction2 - A22GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReduction2 - A22GlobalDim = %d %d\n", mypid_, invA22GlobalNRows, invA22GlobalNCols); - printf("%4d : SlideReduction2 - A22LocalDim = %d %d\n", mypid_, + printf("%4d : SlideReduction2 - A22LocalDim = %d %d\n", mypid_, invA22NRows, invA22NCols); } @@ -2189,7 +2188,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, A21StartRow, A21StartRow+invA22NRows-1, A21StartRow, A21StartRow+invA22NCols-1, &invA22); ierr += HYPRE_IJMatrixSetObjectType(invA22, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the no. of nonzeros in the first nConstraint row of invA22 @@ -2200,37 +2199,37 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() for ( i = 0; i < nConstraints_; i++ ) invA22MatSize[i] = 1; //------------------------------------------------------------------ - // compute the number of nonzeros in the second nConstraints row of + // compute the number of nonzeros in the second nConstraints row of // invA22 (consisting of [D and A22 block]) //------------------------------------------------------------------ - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - rowIndex = selectedList_[j]; + rowIndex = selectedList_[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); rowSize2 = 1; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; - if ( colVal[j] != 0.0 ) + if ( colVal[j] != 0.0 ) { - if ( colIndex >= StartRow && colIndex <= newEndRow ) + if ( colIndex >= StartRow && colIndex <= newEndRow ) { - searchIndex = hypre_BinarySearch(selectedList_, colIndex, - nSelected); + searchIndex = hypre_BinarySearch(selectedList_, colIndex, + nSelected); if ( searchIndex >= 0 ) rowSize2++; - } - else if ( colIndex < StartRow || colIndex > EndRow ) + } + else if ( colIndex < StartRow || colIndex > EndRow ) { - searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, - globalNSelected); + searchIndex = hypre_BinarySearch(globalSelectedList,colIndex, + globalNSelected); if ( searchIndex >= 0 ) rowSize2++; } } @@ -2246,7 +2245,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixSetRowSizes(invA22, invA22MatSize); ierr += HYPRE_IJMatrixInitialize(invA22); - assert(!ierr); + hypre_assert(!ierr); delete [] invA22MatSize; //------------------------------------------------------------------ @@ -2259,65 +2258,65 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() newColVal = new double[maxRowSize]; for ( i = 0; i < diagCount; i++ ) extDiagonal[i] = 1.0 / extDiagonal[i]; - for ( i = 0; i < nConstraints_; i++ ) + for ( i = 0; i < nConstraints_; i++ ) { - newColInd[0] = A21StartRow + nConstraints_ + i; + newColInd[0] = A21StartRow + nConstraints_ + i; rowIndex = A21StartRow + i; if ( newColInd[0] < 0 || newColInd[0] >= invA22GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) - printf("%4d : SlideReduction2 WARNING - A22(%d,%d(%d))\n", + printf("%4d : SlideReduction2 WARNING - A22(%d,%d(%d))\n", mypid_, rowIndex, newColInd[0], invA22GlobalNCols); - } + } newColVal[0] = extDiagonal[A21StartRow/2+i]; ierr = HYPRE_IJMatrixSetValues(invA22, 1, &one, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } //------------------------------------------------------------------ // next load the second nConstraints_ rows to A22 extracted from A //------------------------------------------------------------------ - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - rowIndex = selectedList_[j]; + rowIndex = selectedList_[j]; break; } } HYPRE_ParCSRMatrixGetRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); newRowSize = 1; newColInd[0] = A21StartRow + i; - newColVal[0] = extDiagonal[A21StartRow/2+i]; - for (j = 0; j < rowSize; j++) + newColVal[0] = extDiagonal[A21StartRow/2+i]; + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) { searchIndex = hypre_BinarySearch(globalSelectedList, colIndex,globalNSelected); - if ( searchIndex >= 0 ) + if ( searchIndex >= 0 ) { searchIndex = globalSelectedListAux[searchIndex]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; if ( procIndex == numProcs_ ) - newColInd[newRowSize] = searchIndex + globalNConstr; + newColInd[newRowSize] = searchIndex + globalNConstr; else - newColInd[newRowSize] = searchIndex+ProcNConstr[procIndex]; - if ( newColInd[newRowSize] < 0 || + newColInd[newRowSize] = searchIndex+ProcNConstr[procIndex]; + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= invA22GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) printf("%4d : SlideReduction2 WARNING - A22(%d,%d,%d)\n", - mypid_, rowCount, newColInd[newRowSize], + mypid_, rowCount, newColInd[newRowSize], invA22GlobalNCols); - } - newColVal[newRowSize++] = - extDiagonal[A21StartRow/2+i] * + } + newColVal[newRowSize++] = - extDiagonal[A21StartRow/2+i] * colVal[j] * extDiagonal[searchIndex]; if ( newRowSize > maxRowSize ) { @@ -2327,14 +2326,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() printf("passing array boundary(3).\n"); } } - } - } + } + } } rowCount = A21StartRow + nConstraints_ + i; - ierr = HYPRE_IJMatrixSetValues(invA22, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(invA22, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,rowIndex,&rowSize,&colInd,&colVal); } delete [] newColInd; @@ -2353,13 +2352,13 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() { ncnt = 0; MPI_Barrier(comm_); - while ( ncnt < numProcs_ ) + while ( ncnt < numProcs_ ) { - if ( mypid_ == ncnt ) + if ( mypid_ == ncnt ) { printf("====================================================\n"); printf("%4d : SlideReduction - invA22 \n", mypid_); - for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) + for ( i = A21StartRow; i < A21StartRow+2*nConstraints_; i++ ) { HYPRE_ParCSRMatrixGetRow(invA22_csr,i,&rowSize,&colInd, &colVal); @@ -2387,7 +2386,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() hypre_BoomerAMGBuildCoarseOperator( (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix *) invA22_csr, - (hypre_ParCSRMatrix *) A21_csr, + (hypre_ParCSRMatrix *) A21_csr, (hypre_ParCSRMatrix **) &RAP_csr); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) @@ -2401,7 +2400,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() { if ( mypid_ == ncnt ) { - for ( i = A21StartRow; i < A21StartRow+A21NCols; i++ ) + for ( i = A21StartRow; i < A21StartRow+A21NCols; i++ ) { HYPRE_ParCSRMatrixGetRow(RAP_csr,i,&rowSize,&colInd, &colVal); printf("RAP ROW = %6d (%d)\n", i, rowSize); @@ -2429,7 +2428,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, A21StartCol, A21StartCol+newNRows-1, A21StartCol, A21StartCol+newNRows-1, &reducedA); ierr += HYPRE_IJMatrixSetObjectType(reducedA, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // set up reducedA with proper sizes @@ -2440,36 +2439,36 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() rowCount = reducedAStartRow; rowIndex = 0; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); - if ( searchIndex < 0 ) + searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); + if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixGetRow(RAP_csr,rowCount,&rowSize2, &colInd2, &colVal2); - assert( !ierr ); + hypre_assert( !ierr ); newRowSize = rowSize + rowSize2; newColInd = new int[newRowSize]; - for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; + for (j = 0; j < rowSize; j++) newColInd[j] = colInd[j]; for (j = 0; j < rowSize2; j++) newColInd[rowSize+j] = colInd2[j]; hypre_qsort0(newColInd, 0, newRowSize-1); ncnt = 0; - for ( j = 0; j < newRowSize; j++ ) + for ( j = 0; j < newRowSize; j++ ) { - if ( newColInd[j] != newColInd[ncnt] ) + if ( newColInd[j] != newColInd[ncnt] ) { ncnt++; newColInd[ncnt] = newColInd[j]; - } + } } reducedAMatSize[rowIndex++] = ncnt; - + HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); ierr = HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowCount,&rowSize2, &colInd2,&colVal2); delete [] newColInd; - assert( !ierr ); + hypre_assert( !ierr ); rowCount++; } else @@ -2485,17 +2484,17 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixSetRowSizes(reducedA, reducedAMatSize); ierr += HYPRE_IJMatrixInitialize(reducedA); - assert(!ierr); + hypre_assert(!ierr); delete [] reducedAMatSize; //------------------------------------------------------------------ - // load the reducedA matrix + // load the reducedA matrix //------------------------------------------------------------------ rowCount = reducedAStartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); + searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr, i, &rowSize, &colInd, &colVal); @@ -2505,58 +2504,58 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() newColInd = new int[newRowSize]; newColVal = new double[newRowSize]; ncnt = 0; - - for ( j = 0; j < rowSize; j++ ) + + for ( j = 0; j < rowSize; j++ ) { colIndex = colInd[j]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) + if ( procIndex == numProcs_ ) ubound = globalNRows-(globalNConstr-ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex < ubound ) + if ( colIndex < ubound ) { if ( colIndex >= StartRow && colIndex <= EndRow ) - searchIndex = HYPRE_LSI_Search(selectedList_,colIndex, - nSelected); + searchIndex = HYPRE_LSI_Search(selectedList_,colIndex, + nSelected); else - searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, - globalNSelected); + searchIndex = HYPRE_LSI_Search(globalSelectedList,colIndex, + globalNSelected); - if ( searchIndex < 0 ) + if ( searchIndex < 0 ) { newColInd[ncnt] = colIndex - ProcNConstr[procIndex]; - newColVal[ncnt++] = colVal[j]; + newColVal[ncnt++] = colVal[j]; } } } - for ( j = 0; j < rowSize2; j++ ) + for ( j = 0; j < rowSize2; j++ ) { - newColInd[ncnt+j] = colInd2[j]; - newColVal[ncnt+j] = - colVal2[j]; + newColInd[ncnt+j] = colInd2[j]; + newColVal[ncnt+j] = - colVal2[j]; } newRowSize = ncnt + rowSize2; hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncnt = 0; - for ( j = 0; j < newRowSize; j++ ) + for ( j = 0; j < newRowSize; j++ ) { - if ( j != ncnt && newColInd[j] == newColInd[ncnt] ) + if ( j != ncnt && newColInd[j] == newColInd[ncnt] ) newColVal[ncnt] += newColVal[j]; - else if ( newColInd[j] != newColInd[ncnt] ) + else if ( newColInd[j] != newColInd[ncnt] ) { ncnt++; newColVal[ncnt] = newColVal[j]; newColInd[ncnt] = newColInd[j]; - } - } + } + } newRowSize = ncnt + 1; - ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); HYPRE_ParCSRMatrixRestoreRow(RAP_csr,rowCount,&rowSize2,&colInd2, &colVal2); @@ -2571,10 +2570,10 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() newColVal = new double[newRowSize]; newColInd[0] = rowCount; newColVal[0] = 1.0; - ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(reducedA, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); rowCount++; delete [] newColInd; delete [] newColVal; @@ -2587,7 +2586,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() HYPRE_IJMatrixAssemble(reducedA); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) - printf("%4d : SlideReduction2 - reducedA - StartRow = %d\n", + printf("%4d : SlideReduction2 - reducedA - StartRow = %d\n", mypid_, reducedAStartRow); HYPRE_IJMatrixGetObject(reducedA, (void **) &reducedA_csr); @@ -2601,7 +2600,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if ( mypid_ == ncnt ) { printf("====================================================\n"); - for ( i = reducedAStartRow; + for ( i = reducedAStartRow; i < reducedAStartRow+nRows-2*nConstraints_; i++ ) { printf("%d : reducedA ROW %d\n", mypid_, i); @@ -2632,26 +2631,26 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() HYPRE_IJVectorCreate(comm_, A21StartRow, A21StartRow+A21NRows-1, &f2); HYPRE_IJVectorSetObjectType(f2, HYPRE_PARCSR); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) - printf("%4d : SlideReduction2 - A21 dims = %d %d %d\n", mypid_, + printf("%4d : SlideReduction2 - A21 dims = %d %d %d\n", mypid_, A21StartRow, A21NRows, A21GlobalNRows); ierr += HYPRE_IJVectorInitialize(f2); ierr += HYPRE_IJVectorAssemble(f2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJVectorCreate(comm_, A21StartRow, A21StartRow+A21NRows-1, &f2hat); HYPRE_IJVectorSetObjectType(f2hat, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(f2hat); ierr += HYPRE_IJVectorAssemble(f2hat); - assert(!ierr); + hypre_assert(!ierr); colInd = new int[nSelected*2]; colVal = new double[nSelected*2]; - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { - for ( j = 0; j < nSelected; j++ ) + for ( j = 0; j < nSelected; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { colInd[i] = selectedList_[j]; break; @@ -2664,21 +2663,21 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() exit(1); } } - for ( i = 0; i < nSelected; i++ ) + for ( i = 0; i < nSelected; i++ ) { colInd[nSelected+i] = EndRow - nConstraints_ + i + 1; } HYPRE_IJVectorGetValues(HYb_, 2*nSelected, colInd, colVal); for ( i = 0; i < nSelected*2; i++ ) colInd[i] = A21StartRow + i; - ierr = HYPRE_IJVectorSetValues(f2, 2*nSelected, (const int *) colInd, + ierr = HYPRE_IJVectorSetValues(f2, 2*nSelected, (const int *) colInd, (const double *) colVal); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(f2, (void **) &f2_csr); HYPRE_IJVectorGetObject(f2hat, (void **) &f2hat_csr); HYPRE_ParCSRMatrixMatvec( 1.0, invA22_csr, f2_csr, 0.0, f2hat_csr ); delete [] colVal; delete [] colInd; - HYPRE_IJVectorDestroy(f2); + HYPRE_IJVectorDestroy(f2); // ***************************************************************** // set up A12 with proper sizes before forming f2til = A12 * f2hat @@ -2696,9 +2695,9 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() A12StartRow = ProcNRows[mypid_] - ProcNConstr[mypid_]; if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { - printf("%4d : SlideReduction2 - A12GlobalDim = %d %d\n", mypid_, + printf("%4d : SlideReduction2 - A12GlobalDim = %d %d\n", mypid_, A12GlobalNRows, A12GlobalNCols); - printf("%4d : SlideReduction2 - A12LocalDim = %d %d\n", mypid_, + printf("%4d : SlideReduction2 - A12LocalDim = %d %d\n", mypid_, A12NRows, A12NCols); } @@ -2709,7 +2708,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJMatrixCreate(comm_, A21StartCol, A21StartCol+A12NRows-1, A21StartRow, A21StartRow+A12NCols-1, &A12); ierr += HYPRE_IJMatrixSetObjectType(A12, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); //------------------------------------------------------------------ // compute the number of nonzeros in each row of A12 @@ -2720,28 +2719,28 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() rowIndex = 0; nnzA12 = 0; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); - if ( searchIndex < 0 ) + searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); + if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr,i,&rowSize,&colInd,&colVal); newRowSize = 0; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { if ( colVal[j] != 0.0 ) { colIndex = colInd[j]; for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) + if ( procIndex == numProcs_ ) ubound = globalNRows - (globalNConstr-ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex >= ubound ) newRowSize++; + if ( colIndex >= ubound ) newRowSize++; else if (colIndex >= StartRow && colIndex <= EndRow) { if (hypre_BinarySearch(selectedList_,colIndex,nSelected)>=0) @@ -2749,7 +2748,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } else { - if (hypre_BinarySearch(globalSelectedList,colIndex, + if (hypre_BinarySearch(globalSelectedList,colIndex, globalNSelected) >= 0) newRowSize++; } @@ -2766,7 +2765,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() nnzA12--; } } - + //------------------------------------------------------------------ // after fetching the row sizes, set up A12 with such sizes //------------------------------------------------------------------ @@ -2774,83 +2773,83 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() for ( i = 0; i < A12NRows; i++ ) nnzA12 += A12MatSize[i]; ierr = HYPRE_IJMatrixSetRowSizes(A12, A12MatSize); ierr += HYPRE_IJMatrixInitialize(A12); - assert(!ierr); + hypre_assert(!ierr); delete [] A12MatSize; //------------------------------------------------------------------ - // load the A12 matrix + // load the A12 matrix //------------------------------------------------------------------ rowCount = A12StartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); + searchIndex = hypre_BinarySearch(selectedList_, i, nSelected); if ( searchIndex < 0 ) { HYPRE_ParCSRMatrixGetRow(A_csr, i, &rowSize, &colInd, &colVal); newRowSize = 0; newColInd = new int[rowSize]; newColVal = new double[rowSize]; - for (j = 0; j < rowSize; j++) + for (j = 0; j < rowSize; j++) { colIndex = colInd[j]; if ( colVal[j] != 0.0 ) { for ( procIndex = 0; procIndex < numProcs_; procIndex++ ) if ( ProcNRows[procIndex] > colIndex ) break; - if ( procIndex == numProcs_ ) + if ( procIndex == numProcs_ ) ubound = globalNRows - (globalNConstr - ProcNConstr[numProcs_-1]); else - ubound = ProcNRows[procIndex] - + ubound = ProcNRows[procIndex] - (ProcNConstr[procIndex]-ProcNConstr[procIndex-1]); procIndex--; - if ( colIndex >= ubound ) { - if ( procIndex != numProcs_ - 1 ) + if ( colIndex >= ubound ) { + if ( procIndex != numProcs_ - 1 ) { - newColInd[newRowSize] = colInd[j] - ubound + + newColInd[newRowSize] = colInd[j] - ubound + ProcNConstr[procIndex] + ProcNConstr[procIndex+1]; } - else + else { - newColInd[newRowSize] = colInd[j] - ubound + + newColInd[newRowSize] = colInd[j] - ubound + ProcNConstr[procIndex] + globalNConstr; } - if ( newColInd[newRowSize] < 0 || + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= A12GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) { printf("%4d : SlideReduction WARNING - A12 col index", mypid_); - printf(" out of range %d %d(%d)\n", i, + printf(" out of range %d %d(%d)\n", i, newColInd[newRowSize], A12GlobalNCols); } } newColVal[newRowSize++] = colVal[j]; - } + } else { - if ( colInd[j] >= StartRow && colInd[j] <= EndRow ) + if ( colInd[j] >= StartRow && colInd[j] <= EndRow ) { searchIndex = HYPRE_LSI_Search(selectedList_,colInd[j], nSelected); - if ( searchIndex >= 0 ) - searchIndex = selectedListAux_[searchIndex] + + if ( searchIndex >= 0 ) + searchIndex = selectedListAux_[searchIndex] + ProcNConstr[mypid_]; } else { searchIndex = HYPRE_LSI_Search(globalSelectedList, colInd[j], globalNSelected); - if ( searchIndex >= 0 ) - searchIndex = globalSelectedListAux[searchIndex]; + if ( searchIndex >= 0 ) + searchIndex = globalSelectedListAux[searchIndex]; } - if ( searchIndex >= 0) + if ( searchIndex >= 0) { - newColInd[newRowSize] = searchIndex + - ProcNConstr[procIndex]; - if ( newColInd[newRowSize] < 0 || + newColInd[newRowSize] = searchIndex + + ProcNConstr[procIndex]; + if ( newColInd[newRowSize] < 0 || newColInd[newRowSize] >= A12GlobalNCols ) { if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 ) @@ -2866,10 +2865,10 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } } } - ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParCSRMatrixRestoreRow(A_csr,i,&rowSize,&colInd,&colVal); rowCount++; delete [] newColInd; @@ -2882,10 +2881,10 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() newColVal = new double[newRowSize]; newColInd[0] = A21StartRow; newColVal[0] = 0.0; - ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, - (const int *) &rowCount, (const int *) newColInd, + ierr = HYPRE_IJMatrixSetValues(A12, 1, &newRowSize, + (const int *) &rowCount, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); rowCount++; delete [] newColInd; delete [] newColVal; @@ -2893,11 +2892,11 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } //------------------------------------------------------------------ - // assemble the A12 matrix + // assemble the A12 matrix //------------------------------------------------------------------ ierr = HYPRE_IJMatrixAssemble(A12); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(A12, (void **) &A12_csr); if ( HYOutputLevel_ & HYFEI_SLIDEREDUCE3 ) @@ -2938,21 +2937,21 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr += HYPRE_IJVectorSetObjectType(reducedB_, HYPRE_PARCSR); ierr += HYPRE_IJVectorInitialize(reducedB_); ierr += HYPRE_IJVectorAssemble(reducedB_); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJVectorGetObject(reducedB_, (void **) &reducedB_csr); HYPRE_ParCSRMatrixMatvec( -1.0, A12_csr, f2hat_csr, 0.0, reducedB_csr ); - HYPRE_IJMatrixDestroy(A12); - HYPRE_IJVectorDestroy(f2hat); + HYPRE_IJMatrixDestroy(A12); + HYPRE_IJVectorDestroy(f2hat); //------------------------------------------------------------------ // finally form reducedB = f1 - f2til //------------------------------------------------------------------ rowCount = reducedAStartRow; - for ( i = StartRow; i <= newEndRow; i++ ) + for ( i = StartRow; i <= newEndRow; i++ ) { - if ( hypre_BinarySearch(selectedList_, i, nSelected) < 0 ) + if ( hypre_BinarySearch(selectedList_, i, nSelected) < 0 ) { HYPRE_IJVectorGetValues(HYb_, 1, &i, &ddata); HYPRE_IJVectorAddToValues(reducedB_, 1, (const int *) &rowCount, @@ -2978,14 +2977,14 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() ierr = HYPRE_IJVectorSetObjectType(reducedX_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedX_); ierr = HYPRE_IJVectorAssemble(reducedX_); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJVectorCreate(comm_, reducedAStartRow, reducedAStartRow+newNRows-1, &reducedR_); ierr = HYPRE_IJVectorSetObjectType(reducedR_, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(reducedR_); ierr = HYPRE_IJVectorAssemble(reducedR_); - assert(!ierr); + hypre_assert(!ierr); currA_ = reducedA_; currB_ = reducedB_; @@ -2996,8 +2995,8 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() // save A21 and invA22 for solution recovery //------------------------------------------------------------------ - HYA21_ = A21; - HYinvA22_ = invA22; + HYA21_ = A21; + HYinvA22_ = invA22; //------------------------------------------------------------------ // final clean up @@ -3022,7 +3021,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() if ( colValues_[j] != NULL ) delete [] colValues_[j]; delete [] colValues_; colValues_ = NULL; - if ( rowLengths_ != NULL ) + if ( rowLengths_ != NULL ) { delete [] rowLengths_; rowLengths_ = NULL; @@ -3030,7 +3029,7 @@ void HYPRE_LinSysCore::buildSlideReducedSystem2() } //------------------------------------------------------------------ - // checking + // checking //------------------------------------------------------------------ MPI_Allreduce(&nnzA12,&ncnt,1,MPI_INT,MPI_SUM,comm_); @@ -3053,8 +3052,8 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() double ddata, rnorm; HYPRE_ParCSRMatrix A_csr, A21_csr, A22_csr; HYPRE_ParVector x_csr, x2_csr, r_csr, b_csr; - HYPRE_IJVector R1, x2; - + HYPRE_IJVector R1, x2; + if ( HYA21_ == NULL || HYinvA22_ == NULL ) { printf("buildSlideReducedSoln WARNING : A21 or A22 absent.\n"); @@ -3084,7 +3083,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() ierr = HYPRE_IJVectorSetObjectType(R1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(R1); ierr = HYPRE_IJVectorAssemble(R1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYA21_, (void **) &A21_csr); HYPRE_IJVectorGetObject(currX_, (void **) &x_csr); HYPRE_IJVectorGetObject(R1, (void **) &r_csr); @@ -3096,11 +3095,11 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() for ( i = 0; i < nConstraints_; i++ ) { - for ( j = 0; j < nConstraints_; j++ ) + for ( j = 0; j < nConstraints_; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - index = selectedList_[j]; + index = selectedList_[j]; break; } } @@ -3115,7 +3114,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() HYPRE_IJVectorAddToValues(R1, 1, (const int *) &rowNum, (const double *) &ddata); rowNum++; - } + } //------------------------------------------------------------- // inv(A22) * (f2 - A21 * sol) @@ -3125,7 +3124,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() ierr = HYPRE_IJVectorSetObjectType(x2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(x2); ierr = HYPRE_IJVectorAssemble(x2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYinvA22_, (void **) &A22_csr); HYPRE_IJVectorGetObject(R1, (void **) &r_csr); HYPRE_IJVectorGetObject(x2, (void **) &x2_csr); @@ -3148,15 +3147,15 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() } for ( i = 0; i < nConstraints_; i++ ) { - for ( j = 0; j < nConstraints_; j++ ) + for ( j = 0; j < nConstraints_; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - index = selectedList_[j]; + index = selectedList_[j]; break; } } - j = i + startRow; + j = i + startRow; HYPRE_IJVectorGetValues(x2, 1, &j, &ddata); HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &index, (const double *) &ddata); @@ -3168,10 +3167,10 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() index = localEndRow_ - 2 * nConstraints_ + i; HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &index, (const double *) &ddata); - } + } //------------------------------------------------------------- - // residual norm check + // residual norm check //------------------------------------------------------------- HYPRE_IJMatrixGetObject(HYA_, (void **) &A_csr); @@ -3184,15 +3183,15 @@ double HYPRE_LinSysCore::buildSlideReducedSoln() rnorm = sqrt( rnorm ); if ( mypid_ == 0 && ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 )) printf("buildSlideReducedSoln::final residual norm = %e\n", rnorm); - } + } currX_ = HYx_; //**************************************************************** // clean up //---------------------------------------------------------------- - HYPRE_IJVectorDestroy(R1); - HYPRE_IJVectorDestroy(x2); + HYPRE_IJVectorDestroy(R1); + HYPRE_IJVectorDestroy(x2); return rnorm; } @@ -3209,8 +3208,8 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() double ddata, rnorm; HYPRE_ParCSRMatrix A_csr, A21_csr, A22_csr; HYPRE_ParVector x_csr, x2_csr, r_csr, b_csr; - HYPRE_IJVector R1, x2; - + HYPRE_IJVector R1, x2; + if ( HYA21_ == NULL || HYinvA22_ == NULL ) { printf("buildSlideReducedSoln2 WARNING : A21 or A22 absent.\n"); @@ -3240,7 +3239,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() ierr = HYPRE_IJVectorSetObjectType(R1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(R1); ierr = HYPRE_IJVectorAssemble(R1); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYA21_, (void **) &A21_csr); HYPRE_IJVectorGetObject(currX_, (void **) &x_csr); HYPRE_IJVectorGetObject(R1, (void **) &r_csr); @@ -3252,11 +3251,11 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() for ( i = 0; i < nConstraints_; i++ ) { - for ( j = 0; j < nConstraints_; j++ ) + for ( j = 0; j < nConstraints_; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - index = selectedList_[j]; + index = selectedList_[j]; break; } } @@ -3271,7 +3270,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() HYPRE_IJVectorAddToValues(R1, 1, (const int *) &rowNum, (const double *) &ddata); rowNum++; - } + } //------------------------------------------------------------- // inv(A22) * (f2 - A21 * sol) @@ -3281,7 +3280,7 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() ierr = HYPRE_IJVectorSetObjectType(x2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(x2); ierr = HYPRE_IJVectorAssemble(x2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(HYinvA22_, (void **) &A22_csr ); HYPRE_IJVectorGetObject(R1, (void **) &r_csr ); HYPRE_IJVectorGetObject(x2, (void **) &x2_csr ); @@ -3302,15 +3301,15 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() } for ( i = 0; i < nConstraints_; i++ ) { - for ( j = 0; j < nConstraints_; j++ ) + for ( j = 0; j < nConstraints_; j++ ) { - if ( selectedListAux_[j] == i ) + if ( selectedListAux_[j] == i ) { - index = selectedList_[j]; + index = selectedList_[j]; break; } } - j = i + startRow; + j = i + startRow; HYPRE_IJVectorGetValues(x2, 1, &j, &ddata); HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &index, (const double *) &ddata); @@ -3322,10 +3321,10 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() index = localEndRow_ - 2 * nConstraints_ + i; HYPRE_IJVectorSetValues(HYx_, 1, (const int *) &index, (const double *) &ddata); - } + } //------------------------------------------------------------- - // residual norm check + // residual norm check //------------------------------------------------------------- HYPRE_IJMatrixGetObject(HYA_, (void **) &A_csr); @@ -3338,15 +3337,15 @@ double HYPRE_LinSysCore::buildSlideReducedSoln2() rnorm = sqrt( rnorm ); if ( mypid_ == 0 && ( HYOutputLevel_ & HYFEI_SLIDEREDUCE1 )) printf("buildSlideReducedSoln::final residual norm = %e\n", rnorm); - } + } currX_ = HYx_; //**************************************************************** // clean up //---------------------------------------------------------------- - HYPRE_IJVectorDestroy(R1); - HYPRE_IJVectorDestroy(x2); + HYPRE_IJVectorDestroy(R1); + HYPRE_IJVectorDestroy(x2); return rnorm; } diff --git a/src/FEI_mv/fei-hypre/ml_maxwell.cxx b/src/FEI_mv/fei-hypre/ml_maxwell.cxx index 9abd41339..18cd0e345 100644 --- a/src/FEI_mv/fei-hypre/ml_maxwell.cxx +++ b/src/FEI_mv/fei-hypre/ml_maxwell.cxx @@ -16,7 +16,6 @@ #include #include #include -#include //************************************************************************** // HYPRE includes @@ -36,12 +35,12 @@ //--------------------------------------------------------------------------- void fei_hypre_test(int, char **); -void hypre_read_matrix(double **val, int **ia, int **ja, int *N, int *M, +void hypre_read_matrix(double **val, int **ia, int **ja, int *N, int *M, char *matfile); void hypre_read_rhs(double **val, int *N, char *rhsfile); //*************************************************************************** -// main program +// main program //*************************************************************************** int main(int argc, char *argv[]) @@ -50,7 +49,7 @@ int main(int argc, char *argv[]) } //*************************************************************************** -// a test program +// a test program //*************************************************************************** void fei_hypre_test(int argc, char *argv[]) @@ -81,7 +80,7 @@ void fei_hypre_test(int argc, char *argv[]) H.createMatricesAndVectors(nrows, 1, nrows); rowLengths = new int[nrows]; colIndices = new int*[nrows]; - for (i = 0; i < nrows; i++) + for (i = 0; i < nrows; i++) { ncnt = ia[i+1] - ia[i]; rowLengths[i] = ncnt; @@ -114,7 +113,7 @@ void fei_hypre_test(int argc, char *argv[]) G.createMatricesAndVectors(nrows, 1, nrows); rowLengths = new int[nrows]; colIndices = new int*[nrows]; - for (i = 0; i < nrows; i++) + for (i = 0; i < nrows; i++) { ncnt = ia[i+1] - ia[i]; rowLengths[i] = ncnt; @@ -142,9 +141,9 @@ void fei_hypre_test(int argc, char *argv[]) data.setTypeName(tname); H.copyInMatrix(1.0, data); G.HYA_ = NULL; - + //------------------------------------------------------------------ - // load the right hand side + // load the right hand side //------------------------------------------------------------------ hypre_read_rhs(&rhs, &i, "rhs.ij"); @@ -191,7 +190,7 @@ void fei_hypre_test(int argc, char *argv[]) delete [] sol; //------------------------------------------------------------------ - // clean up + // clean up //------------------------------------------------------------------ MPI_Finalize(); @@ -204,7 +203,7 @@ void fei_hypre_test(int argc, char *argv[]) #endif //*************************************************************************** -// read a matrix +// read a matrix //*************************************************************************** void hypre_read_matrix(double **val, int **ia, int **ja, int *N, int *M, @@ -214,7 +213,7 @@ void hypre_read_matrix(double **val, int **ia, int **ja, int *N, int *M, int *mat_ia, *mat_ja; double *mat_a, value; FILE *fp; - + /*------------------------------------------------------------------*/ /* read matrix file */ /*------------------------------------------------------------------*/ @@ -224,14 +223,14 @@ void hypre_read_matrix(double **val, int **ia, int **ja, int *N, int *M, if (fp == NULL) { printf("File not found = %s \n", matfile); - exit(1); + exit(1); } fscanf(fp, "%d %d %d", &nnz, &nrows, &ncols); mat_ia = new int[nrows+1]; mat_ja = new int[nnz]; mat_a = new double[nnz]; mat_ia[0] = 0; - + curr_row = 0; icount = 0; for (i = 0; i < nnz; i++) @@ -268,7 +267,7 @@ void hypre_read_rhs(double **val, int *N, char *rhsfile) int i, nrows, rowindex; double *rhs, value; FILE *fp; - + /*------------------------------------------------------------------*/ /* read matrix file */ /*------------------------------------------------------------------*/ @@ -283,7 +282,7 @@ void hypre_read_rhs(double **val, int *N, char *rhsfile) } fscanf(fp, "%d", &nrows); rhs = new double[nrows]; - + for (i = 0; i < nrows; i++) { fscanf(fp, "%d %lg", &rowindex, &value); diff --git a/src/FEI_mv/femli/driver_util.c b/src/FEI_mv/femli/driver_util.c index 7f20d7f9e..d4ba5edf9 100644 --- a/src/FEI_mv/femli/driver_util.c +++ b/src/FEI_mv/femli/driver_util.c @@ -15,7 +15,7 @@ #include #include "mli_utils.h" -extern int mli_computespectrum_(int *,int *,double *, double *, int *, +extern int mli_computespectrum_(int *,int *,double *, double *, int *, double *, double *, double *, int *); void testEigen(); void testMergeSort(); @@ -52,7 +52,7 @@ void testEigen() printf("testEigen ERROR : file not found.\n"); exit(1); } - for ( i = 0; i < mDim*mDim; i++ ) fscanf(fp, "%lg", &(matrix[i])); + for ( i = 0; i < mDim*mDim; i++ ) fscanf(fp, "%lg", &(matrix[i])); evectors = hypre_TAlloc(double, mDim * mDim , HYPRE_MEMORY_HOST); evalues = hypre_TAlloc(double, mDim , HYPRE_MEMORY_HOST); daux1 = hypre_TAlloc(double, mDim , HYPRE_MEMORY_HOST); @@ -60,11 +60,11 @@ void testEigen() mli_computespectrum_(&mDim, &mDim, matrix, evalues, &matz, evectors, daux1, daux2, &ierr); for ( i = 0; i < mDim; i++ ) printf("eigenvalue = %e\n", evalues[i]); - free(matrix); - free(evectors); - free(evalues); - free(daux1); - free(daux2); + hypre_TFree(matrix, HYPRE_MEMORY_HOST); + hypre_TFree(evectors, HYPRE_MEMORY_HOST); + hypre_TFree(evalues, HYPRE_MEMORY_HOST); + hypre_TFree(daux1, HYPRE_MEMORY_HOST); + hypre_TFree(daux2, HYPRE_MEMORY_HOST); } /****************************************************************************** @@ -79,7 +79,7 @@ void testMergeSort() listLengs = hypre_TAlloc(int, nlist , HYPRE_MEMORY_HOST); list = hypre_TAlloc(int*, nlist , HYPRE_MEMORY_HOST); list2 = hypre_TAlloc(int*, nlist , HYPRE_MEMORY_HOST); - for ( i = 0; i < nlist; i++ ) + for ( i = 0; i < nlist; i++ ) { list[i] = hypre_TAlloc(int, maxLeng , HYPRE_MEMORY_HOST); list2[i] = hypre_TAlloc(int, maxLeng , HYPRE_MEMORY_HOST); @@ -113,38 +113,38 @@ void testMergeSort() list[6][1] = 8; list[6][2] = 9; checkN = 0; - for ( i = 0; i < nlist; i++ ) + for ( i = 0; i < nlist; i++ ) for ( j = 0; j < listLengs[i]; j++ ) list2[i][j] = checkN++; - for ( i = 0; i < nlist; i++ ) + for ( i = 0; i < nlist; i++ ) MLI_Utils_IntQSort2(list[i], NULL, 0, listLengs[i]-1); - for ( i = 0; i < nlist; i++ ) - for ( j = 0; j < listLengs[i]; j++ ) + for ( i = 0; i < nlist; i++ ) + for ( j = 0; j < listLengs[i]; j++ ) printf("original %5d %5d = %d\n", i, j, list[i][j]); printf("MergeSort begins...\n"); MLI_Utils_IntMergeSort(nlist, listLengs, list, list2, &newNList, &newList); - for ( i = 0; i < newNList; i++ ) + for ( i = 0; i < newNList; i++ ) printf("after %5d = %d\n", i, newList[i]); printf("MergeSort ends.\n"); /* - for ( i = 0; i < newNList; i++ ) + for ( i = 0; i < newNList; i++ ) printf("Merge List %5d = %d\n", i, newList[i]); checkList = hypre_TAlloc(int, nlist * maxLeng , HYPRE_MEMORY_HOST); - for ( i = 0; i < nlist; i++ ) + for ( i = 0; i < nlist; i++ ) for ( j = 0; j < maxLeng; j++ ) checkList[i*maxLeng+j] = list[i][j]; printf("QSort begins...\n"); MLI_Utils_IntQSort2(checkList, NULL, 0, nlist*maxLeng-1); printf("QSort ends.\n"); checkN = 1; - for ( i = 1; i < nlist*maxLeng; i++ ) + for ( i = 1; i < nlist*maxLeng; i++ ) if ( checkList[checkN-1] != checkList[i] ) checkList[checkN++] = checkList[i]; if ( checkN != newNList ) printf("MergeSort and QSort lengths = %d %d\n", newNList, checkN); checkFlag = 0; - for ( i = 0; i < newNList; i++ ) + for ( i = 0; i < newNList; i++ ) { - if ( checkList[i] != newList[i] ) + if ( checkList[i] != newList[i] ) { printf("MergeSort and QSort discrepancy %5d = %5d %5d\n", i, newList[i], checkList[i]); @@ -154,17 +154,17 @@ void testMergeSort() printf("MergeSort and QSort lengths = %d %d\n", newNList, checkN); if ( checkFlag == 0 ) printf("MergeSort and QSort gives same result.\n"); - - for ( i = 0; i < nlist; i++ ) + + for ( i = 0; i < nlist; i++ ) { - free( list[i] ); - free( list2[i] ); + hypre_TFree(list[i], HYPRE_MEMORY_HOST); + hypre_TFree(list2[i], HYPRE_MEMORY_HOST); } - free( checkList ); + hypre_TFree(checkList , HYPRE_MEMORY_HOST); */ - free( listLengs ); - free( list ); - free( list2 ); - free( newList ); + hypre_TFree(listLengs, HYPRE_MEMORY_HOST); + hypre_TFree(list, HYPRE_MEMORY_HOST); + hypre_TFree(list2, HYPRE_MEMORY_HOST); + hypre_TFree(newList, HYPRE_MEMORY_HOST); } diff --git a/src/FEI_mv/femli/mli_amgsa_calib.cxx b/src/FEI_mv/femli/mli_amgsa_calib.cxx index 228415be5..7c1737ca3 100644 --- a/src/FEI_mv/femli/mli_amgsa_calib.cxx +++ b/src/FEI_mv/femli/mli_amgsa_calib.cxx @@ -21,7 +21,6 @@ // --------------------------------------------------------------------- #include -#include #include "HYPRE.h" #include "_hypre_utilities.h" @@ -31,12 +30,12 @@ #include "mli_method_amgsa.h" #include "mli_utils.h" - + /*********************************************************************** * generate multilevel structure using an adaptive method * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setupCalibration( MLI *mli ) +int MLI_Method_AMGSA::setupCalibration( MLI *mli ) { int mypid, nprocs, *partition, ndofs, nrows, n_null; int i, j, k, level, local_nrows, relax_num, targc, calib_size_tmp; @@ -71,11 +70,11 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) /* create trial vectors for calibration (trial_sol, zero_rhs) */ /* --------------------------------------------------------------- */ - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); trial_sol = hypre_ParVectorCreate(comm, partition[nprocs], partition); hypre_ParVectorInitialize( trial_sol ); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); local_nrows = partition[mypid+1] - partition[mypid]; zero_rhs = hypre_ParVectorCreate(comm, partition[nprocs], partition); @@ -93,17 +92,17 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) { dble_array = nullspace_store; nullspace_store = new double[nrows*(n_null+calibrationSize_)]; - for (i = 0; i < nrows*n_null; i++) nullspace_store[i] = dble_array[i]; + for (i = 0; i < nrows*n_null; i++) nullspace_store[i] = dble_array[i]; delete [] dble_array; } else { nrows = local_nrows; nullspace_store = new double[nrows*(n_null+calibrationSize_)]; - for ( j = 0; j < n_null; j++ ) + for ( j = 0; j < n_null; j++ ) { - for ( k = 0; k < nrows; k++ ) - if ( k % n_null == j ) nullspace_store[j*nrows+k] = 1.0; + for ( k = 0; k < nrows; k++ ) + if ( k % n_null == j ) nullspace_store[j*nrows+k] = 1.0; else nullspace_store[j*nrows+k] = 0.0; } } @@ -144,10 +143,10 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) sprintf( param_string, "setNullSpace" ); targc = 4; - targv[0] = (char *) &ndofs; - targv[1] = (char *) &n_null; - targv[2] = (char *) nullspace_store; - targv[3] = (char *) &nrows; + targv[0] = (char *) &ndofs; + targv[1] = (char *) &n_null; + targv[2] = (char *) nullspace_store; + targv[3] = (char *) &nrows; new_amgsa->setParams( param_string, targc, targv ); dtime = time_getWallclockSeconds(); @@ -179,7 +178,7 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) for ( j = 0; j < nrows*n_null; j++ ) Q_array[j] = nullspace_store[j]; #if 0 MLI_Utils_QR( Q_array, R_array, nrows, n_null ); - for ( j = 0; j < n_null; j++ ) + for ( j = 0; j < n_null; j++ ) printf("P%d : Norm of Null %d = %e\n", mypid,j,R_array[j*n_null+j]); #endif } @@ -220,7 +219,7 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) * generate multilevel structure using an adaptive method (not done yet) * --------------------------------------------------------------------- */ #if 0 -int MLI_Method_AMGSA::setupCalibration( MLI *mli ) +int MLI_Method_AMGSA::setupCalibration( MLI *mli ) { int mypid, nprocs, *partition, ndofs, nrows, n_null; int i, j, k, level, local_nrows, relax_num, targc, calib_size_tmp; @@ -250,7 +249,7 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) mli_Amat = mli->getSystemMatrix( 0 ); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); targv = new char*[4]; - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); local_nrows = partition[mypid+1] - partition[mypid]; free( partition ); @@ -268,13 +267,13 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) /* create trial vectors for calibration (trial_sol, zero_rhs) */ /* --------------------------------------------------------------- */ - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); trial_sol = hypre_ParVectorCreate(comm, partition[nprocs], partition); hypre_ParVectorInitialize( trial_sol ); hypre_ParVectorSetRandomValues( trial_sol, (int) dtime ); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); zero_rhs = hypre_ParVectorCreate(comm, partition[nprocs], partition); hypre_ParVectorInitialize( zero_rhs ); @@ -299,7 +298,7 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) dtime = time_getWallclockSeconds(); hypre_ParVectorSetRandomValues( trial_sol, (int) dtime ); - if ( i == 0 ) + if ( i == 0 ) { smoother_ptr->solve(mli_rhs, mli_sol); } @@ -329,16 +328,16 @@ int MLI_Method_AMGSA::setupCalibration( MLI *mli ) /* ------------------------------------------------------------ */ offset = local_nrows * n_null; - for (i = offset; i < offset+local_nrows; i++) - nullspace_store[i] = sol_data[i-offset]; + for (i = offset; i < offset+local_nrows; i++) + nullspace_store[i] = sol_data[i-offset]; n_null++; sprintf( param_string, "setNullSpace" ); targc = 4; - targv[0] = (char *) &ndofs; - targv[1] = (char *) &n_null; - targv[2] = (char *) nullspace_store; - targv[3] = (char *) &nrows; + targv[0] = (char *) &ndofs; + targv[1] = (char *) &n_null; + targv[2] = (char *) nullspace_store; + targv[3] = (char *) &nrows; new_amgsa->setParams( param_string, targc, targv ); if ( i < calibrationSize_-1 ) new_mli->setup(); diff --git a/src/FEI_mv/femli/mli_amgsa_coarsen1.cxx b/src/FEI_mv/femli/mli_amgsa_coarsen1.cxx index 8900f2cc1..8a4237f64 100644 --- a/src/FEI_mv/femli/mli_amgsa_coarsen1.cxx +++ b/src/FEI_mv/femli/mli_amgsa_coarsen1.cxx @@ -14,7 +14,6 @@ // --------------------------------------------------------------------- #include -#include #include #include "HYPRE.h" #include "_hypre_utilities.h" @@ -27,7 +26,7 @@ #include "mli_utils.h" #include "mli_solver.h" #include "mli_solver_sgs.h" - + // ********************************************************************* // local defines // --------------------------------------------------------------------- @@ -39,9 +38,9 @@ #define habs(x) ((x > 0 ) ? x : -(x)) -// ********************************************************************* -// Purpose : Given Amat and aggregation information, create the -// corresponding Pmat using the local aggregation scheme +// ********************************************************************* +// Purpose : Given Amat and aggregation information, create the +// corresponding Pmat using the local aggregation scheme // --------------------------------------------------------------------- double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, @@ -90,7 +89,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, AGlobalNRows = partition[numProcs]; ALocalNRows = AEndRow - AStartRow + 1; free( partition ); - if ( AGlobalNRows < minCoarseSize_ ) + if ( AGlobalNRows < minCoarseSize_ ) { if ( mypid == 0 && outputLevel_ > 2 ) { @@ -133,7 +132,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, if ( initAggr == NULL ) { blkSize = currNodeDofs_; - if (blkSize > 1 && scalar_ == 0) + if (blkSize > 1 && scalar_ == 0) { MLI_Matrix_Compress(mli_Amat, blkSize, &mli_A2mat); if ( saLabels_ != NULL && saLabels_[currLevel_] != NULL ) @@ -144,7 +143,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, } else localLabels = NULL; } - else + else { mli_A2mat = mli_Amat; if ( saLabels_ != NULL && saLabels_[currLevel_] != NULL ) @@ -172,32 +171,32 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, * perform coarsening (If aggregate information is not given, then * if dimension of A is small enough and hybrid is on, switch to * global coarsening. Otherwise if the scheme is local, do local - * coarsening). + * coarsening). *-----------------------------------------------------------------*/ - - if ( initAggr == NULL ) + + if ( initAggr == NULL ) { GGlobalNRows = hypre_ParCSRMatrixGlobalNumRows(A2mat); - if ( GGlobalNRows <= minAggrSize_*numProcs ) + if ( GGlobalNRows <= minAggrSize_*numProcs ) { formGlobalGraph(A2mat, &Gmat); coarsenGlobal(Gmat, &naggr, &node2aggr); hypre_ParCSRMatrixDestroy(Gmat); } - else if ( GGlobalNRows > minAggrSize_*numProcs ) + else if ( GGlobalNRows > minAggrSize_*numProcs ) { formLocalGraph(A2mat, &Gmat, localLabels); coarsenLocal(Gmat, &naggr, &node2aggr); hypre_ParCSRMatrixDestroy(Gmat); } - if ( blkSize > 1 && scalar_ == 0 ) + if ( blkSize > 1 && scalar_ == 0 ) { if ( saLabels_ != NULL && saLabels_[currLevel_] != NULL ) if (localLabels != NULL) delete [] localLabels; if (mli_A2mat != NULL) delete mli_A2mat; } } - else + else { blkSize = currNodeDofs_; naggr = initCount; @@ -206,12 +205,12 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, } /*----------------------------------------------------------------- - * create global P + * create global P *-----------------------------------------------------------------*/ - if ( (initAggr == NULL) & (numSmoothVec_ == 0) ) + if ( (initAggr == NULL) & (numSmoothVec_ == 0) ) { - if ( GGlobalNRows <= minAggrSize_*numProcs ) + if ( GGlobalNRows <= minAggrSize_*numProcs ) { genPGlobal(Amat, Pmat_out, naggr, node2aggr); if (node2aggr != NULL) delete [] node2aggr; @@ -244,7 +243,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1, PStartCol,PStartCol+PLocalNCols-1,&IJPmat); ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * expand the aggregation information if block size > 1 ==> eqn2aggr @@ -258,7 +257,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, delete [] node2aggr; } else eqn2aggr = node2aggr; - + /*----------------------------------------------------------------- * construct the next set of labels for the next level *-----------------------------------------------------------------*/ @@ -267,7 +266,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, { if ( (currLevel_+1) < maxLevels_ ) { - if ( saLabels_[currLevel_+1] != NULL ) + if ( saLabels_[currLevel_+1] != NULL ) delete [] saLabels_[currLevel_+1]; saLabels_[currLevel_+1] = new int[PLocalNCols]; for ( i = 0; i < PLocalNCols; i++ ) saLabels_[currLevel_+1][i] = -1; @@ -276,12 +275,12 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( j = 0; j < ALocalNRows; j++ ) if ( eqn2aggr[j] == i ) break; for ( k = 0; k < nullspaceDim_; k++ ) - saLabels_[currLevel_+1][i*nullspaceDim_+k] = + saLabels_[currLevel_+1][i*nullspaceDim_+k] = saLabels_[currLevel_][j]; } - for ( i = 0; i < PLocalNCols; i++ ) + for ( i = 0; i < PLocalNCols; i++ ) if ( saLabels_[currLevel_+1][i] < 0 || - saLabels_[currLevel_+1][i] >= naggr ) + saLabels_[currLevel_+1][i] >= naggr ) printf("saLabels[%d][%d] = %d (%d)\n",currLevel_+1,i, saLabels_[currLevel_+1][i], naggr); } @@ -291,7 +290,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, * compute smoothing factor for the prolongation smoother *-----------------------------------------------------------------*/ - if ( (currLevel_ >= SPLevel_ && Pweight_ != 0.0) || + if ( (currLevel_ >= SPLevel_ && Pweight_ != 0.0) || !strcmp(preSmoother_, "MLS") || !strcmp(postSmoother_, "MLS")) { @@ -299,7 +298,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, maxEigen = ritzValues[0]; if ( mypid == 0 && outputLevel_ > 1 ) printf("\tEstimated spectral radius of A = %e\n", maxEigen); - assert ( maxEigen > 0.0 ); + hypre_assert ( maxEigen > 0.0 ); alpha = Pweight_ / maxEigen; } @@ -315,7 +314,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, smoothTwice(mli_Amat); /*----------------------------------------------------------------- - * create a compact form for the null space vectors + * create a compact form for the null space vectors * (get ready to perform QR on them) *-----------------------------------------------------------------*/ @@ -362,13 +361,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( irow = 0; irow < PLocalNRows; irow++ ) if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++; maxAggSize = 0; - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) if (aggCntArray[i] > maxAggSize) maxAggSize = aggCntArray[i]; /* ------ register which equation is in which aggregate ------ */ aggIndArray = new int*[naggr]; - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) { aggIndArray[i] = new int[aggCntArray[i]]; aggCntArray[i] = 0; @@ -384,11 +383,11 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, qArray = new double[maxAggSize * nullspaceDim_]; rArray = new double[nullspaceDim_ * nullspaceDim_]; - newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; + newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; /* ------ perform QR on each aggregate ------ */ - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) { aggSize = aggCntArray[i]; @@ -400,13 +399,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, printf(" aggr size is %d\n", aggSize); exit(1); } - + /* ------ put data into the temporary array ------ */ - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) { - for ( k = 0; k < nullspaceDim_; k++ ) - qArray[aggSize*k+j] = P_vecs[k][aggIndArray[i][j]]; + for ( k = 0; k < nullspaceDim_; k++ ) + qArray[aggSize*k+j] = P_vecs[k][aggIndArray[i][j]]; } /* ------ call QR function ------ */ @@ -414,10 +413,10 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, #if 0 if ( mypid == 0 && i == 0) { - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) { printf("%5d : (size=%d)\n", aggIndArray[i][j], aggSize); - for ( k = 0; k < nullspaceDim_; k++ ) + for ( k = 0; k < nullspaceDim_; k++ ) printf("%10.3e ", qArray[aggSize*k+j]); printf("\n"); } @@ -425,7 +424,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, #endif if ( currLevel_ < (numLevels_-1) ) { - info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_); + info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_); if (info != 0) { printf("%4d : Aggregation WARNING : QR returns non-zero for\n", @@ -433,18 +432,18 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, printf(" aggregate %d, size = %d, info = %d\n",i,aggSize,info); #if 0 /* - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) { - for ( k = 0; k < nullspaceDim_; k++ ) - qArray[aggSize*k+j] = P_vecs[k][aggIndArray[i][j]]; + for ( k = 0; k < nullspaceDim_; k++ ) + qArray[aggSize*k+j] = P_vecs[k][aggIndArray[i][j]]; } */ printf("PArray : \n"); - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) { index = aggIndArray[i][j];; printf("%5d : ", index); - for ( k = 0; k < nullspaceDim_; k++ ) + for ( k = 0; k < nullspaceDim_; k++ ) printf("%16.8e ", P_vecs[k][index]); printf("\n"); } @@ -460,13 +459,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, } else { - for ( k = 0; k < nullspaceDim_; k++ ) + for ( k = 0; k < nullspaceDim_; k++ ) { dtemp = 0.0; - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) dtemp += qArray[aggSize*k+j] * qArray[aggSize*k+j]; dtemp = 1.0 / sqrt(dtemp); - for ( j = 0; j < aggSize; j++ ) + for ( j = 0; j < aggSize; j++ ) qArray[aggSize*k+j] *= dtemp; } } @@ -475,7 +474,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( j = 0; j < nullspaceDim_; j++ ) for ( k = 0; k < nullspaceDim_; k++ ) - newNull[i*nullspaceDim_+j+k*naggr*nullspaceDim_] = + newNull[i*nullspaceDim_+j+k*naggr*nullspaceDim_] = rArray[j+nullspaceDim_*k]; /* ------ put the P to P_vecs ------ */ @@ -487,7 +486,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, index = aggIndArray[i][j]; P_vecs[k][index] = qArray[ k*aggSize + j ]; } - } + } } for ( i = 0; i < naggr; i++ ) delete [] aggIndArray[i]; delete [] aggIndArray; @@ -508,13 +507,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( irow = 0; irow < PLocalNRows; irow++ ) if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++; maxAggSize = 0; - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) if (aggCntArray[i] > maxAggSize) maxAggSize = aggCntArray[i]; /* ------ register which equation is in which aggregate ------ */ aggIndArray = new int*[naggr]; - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) { aggIndArray[i] = new int[aggCntArray[i]]; aggCntArray[i] = 0; @@ -535,7 +534,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, sArray = new double[MIN(maxAggSize, numSmoothVec_)]; vtArray = new double[MIN(maxAggSize, numSmoothVec_) * numSmoothVec_]; workArray = new double[5*(maxAggSize + numSmoothVec_)]; - newNull = new double[naggr*nullspaceDim_*numSmoothVec_]; + newNull = new double[naggr*nullspaceDim_*numSmoothVec_]; #if 0 // print members of each aggregate @@ -551,7 +550,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, /* ------ perform SVD on each aggregate ------ */ - for ( i = 0; i < naggr; i++ ) + for ( i = 0; i < naggr; i++ ) { aggSize = aggCntArray[i]; @@ -563,13 +562,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, printf(" aggr size is %d\n", aggSize); exit(1); } - + /* ------ put data into the temporary array ------ */ - for ( k = 0; k < numSmoothVec_; k++ ) + for ( k = 0; k < numSmoothVec_; k++ ) { - for ( j = 0; j < aggSize; j++ ) - uArray[aggSize*k+j] = + for ( j = 0; j < aggSize; j++ ) + uArray[aggSize*k+j] = nullspaceVec_[PLocalNRows*k+aggIndArray[i][j]]; } @@ -582,8 +581,8 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, /* ------ call SVD function ------ */ - info = MLI_Utils_SVD(uArray, sArray, vtArray, workArray, - aggSize, numSmoothVec_, 5*(maxAggSize + numSmoothVec_)); + info = MLI_Utils_SVD(uArray, sArray, vtArray, workArray, + aggSize, numSmoothVec_, 5*(maxAggSize + numSmoothVec_)); if (info != 0) { @@ -603,7 +602,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( k = 0; k < numSmoothVec_; k++ ) for ( j = 0; j < nullspaceDim_; j++ ) - newNull[i*nullspaceDim_ + j + k*naggr*nullspaceDim_] = + newNull[i*nullspaceDim_ + j + k*naggr*nullspaceDim_] = sArray[j] * vtArray[j+MIN(aggSize, numSmoothVec_)*k]; /* ------ store into P_vecs ------ */ @@ -615,7 +614,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, index = aggIndArray[i][j]; P_vecs[k][index] = uArray[ k*aggSize + j ]; } - } + } } for ( i = 0; i < naggr; i++ ) delete [] aggIndArray[i]; delete [] aggIndArray; @@ -644,18 +643,18 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, if ( currLevel_ < SPLevel_ || Pweight_ == 0.0 ) { /*-------------------------------------------------------------- - * create and initialize Pmat + * create and initialize Pmat *--------------------------------------------------------------*/ rowLengths = new int[PLocalNRows]; for ( i = 0; i < PLocalNRows; i++ ) rowLengths[i] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*----------------------------------------------------------------- - * load and assemble Pmat + * load and assemble Pmat *-----------------------------------------------------------------*/ colInd = new int[nullspaceDim_]; @@ -674,13 +673,13 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, } } rowNum = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, - (const int *) &rowNum, (const int *) colInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, + (const int *) &rowNum, (const int *) colInd, (const double *) colVal); } } ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat); comm_pkg = hypre_ParCSRMatrixCommPkg(Amat); @@ -703,7 +702,7 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, for ( i = 0; i < PLocalNRows; i++ ) rowLengths[i] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; colInd = new int[nullspaceDim_]; colVal = new double[nullspaceDim_]; @@ -721,20 +720,20 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, } } rowNum = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, - (const int *) &rowNum, (const int *) colInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, + (const int *) &rowNum, (const int *) colInd, (const double *) colVal); } } ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat2); HYPRE_IJMatrixSetObjectType(IJPmat, -1); HYPRE_IJMatrixDestroy( IJPmat ); delete [] colInd; delete [] colVal; Pmat = hypre_ParMatmul( Jmat, Pmat2); - hypre_ParCSRMatrixOwnsRowStarts(Jmat) = 0; + hypre_ParCSRMatrixOwnsRowStarts(Jmat) = 0; hypre_ParCSRMatrixOwnsColStarts(Pmat2) = 0; hypre_ParCSRMatrixDestroy(Pmat2); delete mli_Jmat; @@ -749,30 +748,30 @@ double MLI_Method_AMGSA::genP(MLI_Matrix *mli_Amat, *-----------------------------------------------------------------*/ if ( P_cols != NULL ) delete [] P_cols; - if ( P_vecs != NULL ) + if ( P_vecs != NULL ) { - for (i = 0; i < nullspaceDim_; i++) + for (i = 0; i < nullspaceDim_; i++) if ( P_vecs[i] != NULL ) delete [] P_vecs[i]; delete [] P_vecs; } delete [] eqn2aggr; /*----------------------------------------------------------------- - * set up and return the Pmat + * set up and return the Pmat *-----------------------------------------------------------------*/ funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr ); (*Pmat_out) = mli_Pmat; delete funcPtr; return maxEigen; } -// ********************************************************************* -// Purpose : Given Amat and aggregation information, create the -// corresponding Pmat using the global aggregation scheme +// ********************************************************************* +// Purpose : Given Amat and aggregation information, create the +// corresponding Pmat using the global aggregation scheme // --------------------------------------------------------------------- double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, @@ -798,7 +797,7 @@ double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, comm = hypre_ParCSRMatrixComm(Amat); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&nprocs); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) Amat, &partition); PStartRow = partition[mypid]; PLocalNRows = partition[mypid+1] - PStartRow; @@ -806,14 +805,14 @@ double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, if ( nAggr > 0 ) aggrCnt = new int[nAggr]; for ( irow = 0; irow < nAggr; irow++ ) aggrCnt[irow] = -1; for ( irow = 0; irow < nprocs; irow++ ) - if ( aggrCnt[aggrMap[irow]] == -1 ) aggrCnt[aggrMap[irow]] = irow; + if ( aggrCnt[aggrMap[irow]] == -1 ) aggrCnt[aggrMap[irow]] = irow; PStartCol = 0; for ( irow = 0; irow < mypid; irow++ ) if ( aggrCnt[aggrMap[irow]] == irow ) PStartCol += nullspaceDim_; if ( aggrCnt[aggrMap[mypid]] == mypid ) PLocalNCols = nullspaceDim_; else PLocalNCols = 0; if ( nAggr > 0 ) delete [] aggrCnt; - + /*----------------------------------------------------------------- * initialize P matrix *-----------------------------------------------------------------*/ @@ -821,13 +820,13 @@ double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1, PStartCol,PStartCol+PLocalNCols-1,&IJPmat); ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengths = new int[PLocalNRows]; for (irow = 0; irow < PLocalNRows; irow++) rowLengths[irow] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*----------------------------------------------------------------- @@ -878,7 +877,7 @@ double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, } } rowInd = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, (const int *) &rowInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, (const int *) &rowInd, (const int *) colInd, (const double *) colVal); } delete [] colInd; @@ -891,7 +890,7 @@ double MLI_Method_AMGSA::genPGlobal(hypre_ParCSRMatrix *Amat, *-----------------------------------------------------------------*/ ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat); commPkg = hypre_ParCSRMatrixCommPkg(Amat); @@ -931,7 +930,7 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, comm = hypre_ParCSRMatrixComm(hypre_graph); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&numProcs); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypre_graph, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypre_graph, &partition); startRow = partition[mypid]; endRow = partition[mypid+1] - 1; @@ -953,14 +952,14 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, node2aggr = new int[localNRows]; aggrSizes = new int[localNRows]; nodeStat = new int[localNRows]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { aggrSizes[irow] = 0; node2aggr[irow] = -1; nodeStat[irow] = MLI_METHOD_AMGSA_READY; rowNum = startRow + irow; hypre_ParCSRMatrixGetRow(hypre_graph,rowNum,&rowLeng,NULL,NULL); - if (rowLeng <= 0) + if (rowLeng <= 0) { nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED; nNotSelected++; @@ -1072,7 +1071,7 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; nSelected++; } - } + } } itmp[0] = naggr; itmp[1] = nSelected; @@ -1104,7 +1103,7 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY ) count++; } } - if ( count > 1 && count >= minAggrSize_ ) + if ( count > 1 && count >= minAggrSize_ ) { node2aggr[irow] = naggr; nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; @@ -1197,7 +1196,7 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, nUndone--; nSelected++; icol++; - if ( icol >= minAggrSize_ && naggr < count-1 ) + if ( icol >= minAggrSize_ && naggr < count-1 ) { icol = 0; naggr++; @@ -1233,7 +1232,7 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, for ( icol = 0; icol < rowLeng; icol++ ) { colNum = cols[icol]; - printf("ERROR : neighbor of unselected node %9d = %9d\n", + printf("ERROR : neighbor of unselected node %9d = %9d\n", rowNum, colNum); } } @@ -1242,11 +1241,11 @@ int MLI_Method_AMGSA::coarsenLocal(hypre_ParCSRMatrix *hypre_graph, } /*----------------------------------------------------------------- - * clean up and initialize the output arrays + * clean up and initialize the output arrays *-----------------------------------------------------------------*/ - if ( localNRows > 0 ) delete [] aggrSizes; - if ( localNRows > 0 ) delete [] nodeStat; + if ( localNRows > 0 ) delete [] aggrSizes; + if ( localNRows > 0 ) delete [] nodeStat; if ( localNRows == 1 && naggr == 0 ) { node2aggr[0] = 0; @@ -1283,13 +1282,13 @@ int MLI_Method_AMGSA::coarsenGlobal(hypre_ParCSRMatrix *Gmat, MPI_Comm_rank(comm, &mypid); MPI_Comm_size(comm, &nprocs); - commGraphI = new int[nprocs+1]; + commGraphI = new int[nprocs+1]; recvCounts = new int[nprocs]; MPI_Allgather(&nRecvs, 1, MPI_INT, recvCounts, 1, MPI_INT, comm); commGraphI[0] = 0; for ( i = 1; i <= nprocs; i++ ) commGraphI[i] = commGraphI[i-1] + recvCounts[i-1]; - commGraphJ = new int[commGraphI[nprocs]]; + commGraphJ = new int[commGraphI[nprocs]]; MPI_Allgatherv(recvProcs, nRecvs, MPI_INT, commGraphJ, recvCounts, commGraphI, MPI_INT, comm); delete [] recvCounts; @@ -1315,7 +1314,7 @@ int MLI_Method_AMGSA::coarsenGlobal(hypre_ParCSRMatrix *Gmat, } if ( aggrCnts[nAggr] >= minAggrSize_ ) { - aggrInds[i] = nAggr; + aggrInds[i] = nAggr; for ( j = commGraphI[i]; j < commGraphI[i+1]; j++ ) { pIndex = commGraphJ[j]; @@ -1341,7 +1340,7 @@ int MLI_Method_AMGSA::coarsenGlobal(hypre_ParCSRMatrix *Gmat, } if ( outputLevel_ > 2 && mypid == 0 ) { - printf("\tMETHOD_AMGSA::coarsenGlobal - nAggr = %d\n", nAggr); + printf("\tMETHOD_AMGSA::coarsenGlobal - nAggr = %d\n", nAggr); } if ( mypid == 0 && outputLevel_ > 1 ) { @@ -1379,7 +1378,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, * fetch machine and matrix parameters *-----------------------------------------------------------------*/ - assert( Amat != NULL ); + hypre_assert( Amat != NULL ); comm = hypre_ParCSRMatrixComm(Amat); MPI_Comm_rank(comm,&mypid); @@ -1406,7 +1405,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, jj = 1; for ( i = 1; i < AdiagNRows/currNodeDofs_; i++ ) if (partition[i] != partition[i-1]) jj++; - if ( jj * currNodeDofs_ < AdiagNRows/2 ) + if ( jj * currNodeDofs_ < AdiagNRows/2 ) { for ( i = 0; i < AdiagNRows; i++ ) saLabels_[currLevel_][i] = 0; @@ -1418,9 +1417,9 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, } } #endif - + /*----------------------------------------------------------------- - * construct the diagonal array (diagData) + * construct the diagonal array (diagData) *-----------------------------------------------------------------*/ if ( threshold_ > 0.0 ) @@ -1450,7 +1449,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, ierr = HYPRE_IJMatrixCreate(comm, startRow, endRow, startRow, endRow, &IJGraph); ierr = HYPRE_IJMatrixSetObjectType(IJGraph, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * find and initialize the length of each row in the graph @@ -1489,7 +1488,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, } } } - else + else { for (j = AdiagRPtr[irow]; j < AdiagRPtr[irow+1]; j++) { @@ -1508,7 +1507,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, } ierr = HYPRE_IJMatrixSetRowSizes(IJGraph, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJGraph); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*----------------------------------------------------------------- @@ -1536,7 +1535,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, if ( dcomp1 > 0.0 ) { dcomp2 = habs(diagData[irow] * diagData[jj]); - if ( (dcomp1 >= epsilon * dcomp2) && (labeli == labelj) ) + if ( (dcomp1 >= epsilon * dcomp2) && (labeli == labelj) ) { colVal[length] = dcomp1 / dcomp2; colInd[length++] = jj + startRow; @@ -1545,7 +1544,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, } } } - else + else { for (j = AdiagRPtr[irow]; j < AdiagRPtr[irow+1]; j++) { @@ -1554,7 +1553,7 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, else labelj = 0; if ( jj != irow ) { - if (AdiagVals[j] != 0.0 && (labeli == labelj)) + if (AdiagVals[j] != 0.0 && (labeli == labelj)) { colVal[length] = AdiagVals[j]; colInd[length++] = jj + startRow; @@ -1562,11 +1561,11 @@ int MLI_Method_AMGSA::formLocalGraph( hypre_ParCSRMatrix *Amat, } } } - HYPRE_IJMatrixSetValues(IJGraph, 1, &length, (const int *) &index, + HYPRE_IJMatrixSetValues(IJGraph, 1, &length, (const int *) &index, (const int *) colInd, (const double *) colVal); } ierr = HYPRE_IJMatrixAssemble(IJGraph); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * return the graph and clean up @@ -1603,7 +1602,7 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, * fetch machine and matrix parameters *-----------------------------------------------------------------*/ - assert( Amat != NULL ); + hypre_assert( Amat != NULL ); comm = hypre_ParCSRMatrixComm(Amat); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&nprocs); @@ -1622,7 +1621,7 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, AOffdI = hypre_CSRMatrixI(AOffd); AOffdJ = hypre_CSRMatrixJ(AOffd); AOffdA = hypre_CSRMatrixData(AOffd); - + /*----------------------------------------------------------------- * initialize the graph *-----------------------------------------------------------------*/ @@ -1630,7 +1629,7 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, ierr = HYPRE_IJMatrixCreate(comm, startRow, endRow, startRow, endRow, &IJGraph); ierr = HYPRE_IJMatrixSetObjectType(IJGraph, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * find and initialize the length of each row in the graph @@ -1659,7 +1658,7 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, } ierr = HYPRE_IJMatrixSetRowSizes(IJGraph, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJGraph); - assert(!ierr); + hypre_assert(!ierr); if (localNRows > 0) delete [] rowLengths; /*----------------------------------------------------------------- @@ -1676,7 +1675,7 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { cInd = ADiagJ[jcol]; - if ( cInd != irow && ADiagA[jcol] != 0.0) + if ( cInd != irow && ADiagA[jcol] != 0.0) { colVal[length] = ADiagA[jcol]; colInd[length++] = cInd + startRow; @@ -1687,18 +1686,18 @@ int MLI_Method_AMGSA::formGlobalGraph( hypre_ParCSRMatrix *Amat, for (jcol = AOffdI[irow]; jcol < AOffdI[irow+1]; jcol++) { cInd = AOffdJ[jcol]; - if ( AOffdA[jcol] != 0.0) + if ( AOffdA[jcol] != 0.0) { colVal[length] = AOffdA[jcol]; colInd[length++] = colMapOffd[cInd]; } } } - HYPRE_IJMatrixSetValues(IJGraph, 1, &length, (const int *) &index, + HYPRE_IJMatrixSetValues(IJGraph, 1, &length, (const int *) &index, (const int *) colInd, (const double *) colVal); } ierr = HYPRE_IJMatrixAssemble(IJGraph); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * return the graph and clean up diff --git a/src/FEI_mv/femli/mli_amgsa_dd_fedata.cxx b/src/FEI_mv/femli/mli_amgsa_dd_fedata.cxx index f174999e9..a41678186 100644 --- a/src/FEI_mv/femli/mli_amgsa_dd_fedata.cxx +++ b/src/FEI_mv/femli/mli_amgsa_dd_fedata.cxx @@ -14,7 +14,6 @@ // --------------------------------------------------------------------- #include -#include // ********************************************************************* // HYPRE includes external to MLI @@ -177,9 +176,9 @@ int MLI_Method_AMGSA::setupFEDataBasedNullSpaces( MLI *mli ) csrNrows = newNNodes * blockSize; csrIA = new int[csrNrows+1]; csrJA = new int[csrNrows*rowSize]; - assert( ((long) csrJA) ); + hypre_assert( ((long) csrJA) ); csrAA = new double[csrNrows*rowSize]; - assert( ((long) csrAA) ); + hypre_assert( ((long) csrAA) ); csrIA[0] = 0; for ( i = 1; i < csrNrows; i++ ) csrIA[i] = csrIA[i-1] + rowSize; @@ -298,7 +297,7 @@ int MLI_Method_AMGSA::setupFEDataBasedNullSpaces( MLI *mli ) eigenR = new double[nullspaceDim_+1]; eigenI = new double[nullspaceDim_+1]; eigenV = new double[csrNrows*(nullspaceDim_+1)]; - assert((long) eigenV); + hypre_assert((long) eigenV); #ifdef MLI_ARPACK sigmaR = 1.0e-5; diff --git a/src/FEI_mv/femli/mli_amgsa_dd_sfei.cxx b/src/FEI_mv/femli/mli_amgsa_dd_sfei.cxx index 28717b7f7..6dbd2f07e 100644 --- a/src/FEI_mv/femli/mli_amgsa_dd_sfei.cxx +++ b/src/FEI_mv/femli/mli_amgsa_dd_sfei.cxx @@ -14,7 +14,6 @@ // --------------------------------------------------------------------- #include -#include #define MABS(x) ((x) > 0 ? (x) : (-(x))) @@ -51,17 +50,17 @@ #include "mli_matrix.h" #include "mli_matrix_misc.h" #include "mli_solver.h" - + // ********************************************************************* -// functions external to MLI +// functions external to MLI // --------------------------------------------------------------------- extern "C" { /* ARPACK function to compute eigenvalues/eigenvectors */ - void dnstev_(int *n, int *nev, char *which, double *sigmar, - double *sigmai, int *colptr, int *rowind, double *nzvals, + void dnstev_(int *n, int *nev, char *which, double *sigmar, + double *sigmai, int *colptr, int *rowind, double *nzvals, double *dr, double *di, double *z, int *ldz, int *info, double *tol); } @@ -72,7 +71,7 @@ extern "C" * compute initial null spaces (for the subdomain only) using FEData * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) +int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) { int k, iN, iD, iR, level, mypid, nElems, elemNNodes; int iE, iN2, **elemNodeLists, *elemNodeList1D, totalNNodes; @@ -126,7 +125,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) MPI_Comm_rank(comm, &mypid); mliAmat = mli->getSystemMatrix(level); hypreA = (hypre_ParCSRMatrix *) mliAmat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); localStartRow = partition[mypid]; localNRows = partition[mypid+1] - localStartRow; @@ -153,15 +152,15 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) /* --------------------------------------------------------------- */ /* initialize null space vector and aggregation label */ /* --------------------------------------------------------------- */ - + //if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_; - if (nullspaceVec_ != NULL) assert( nullspaceLen_ == localNRows ); - if (nullspaceVec_ == NULL) + if (nullspaceVec_ != NULL) hypre_assert( nullspaceLen_ == localNRows ); + if (nullspaceVec_ == NULL) { nullspaceLen_ = localNRows; nullspaceVec_ = new double[localNRows*nullspaceDim_]; } - if (saLabels_ == NULL) + if (saLabels_ == NULL) { saLabels_ = new int*[maxLevels_]; for (k = 0; k < maxLevels_; k++) saLabels_[k] = NULL; @@ -186,14 +185,14 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) if (fp != NULL) fprintf(fp, "%d\n", nElems); elemNNodes = sfei->getBlockElemNEqns(iD); elemNodeLists = sfei->getBlockElemEqnLists(iD); - elemMatrices = sfei->getBlockElemStiffness(iD); + elemMatrices = sfei->getBlockElemStiffness(iD); totalNNodes = nElems * elemNNodes; elemNodeList1D = new int[totalNNodes]; count = 0; - for (iE = 0; iE < nElems; iE++) - for (iN = 0; iN < elemNNodes; iN++) + for (iE = 0; iE < nElems; iE++) + for (iN = 0; iN < elemNNodes; iN++) elemNodeList1D[count++] = elemNodeLists[iE][iN]; - + /* ------------------------------------------------------ */ /* find the number of nodes in local subdomain (including */ /* external nodes) */ @@ -201,7 +200,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) orderArray = new int[totalNNodes]; newElemNodeList = new int[totalNNodes]; - for ( iN = 0; iN < totalNNodes; iN++ ) + for ( iN = 0; iN < totalNNodes; iN++ ) { orderArray[iN] = iN; newElemNodeList[iN] = elemNodeList1D[iN]; @@ -211,9 +210,9 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) newNNodes = 0; for (iN = 1; iN < totalNNodes; iN++) { - if (newElemNodeList[iN] == newElemNodeList[newNNodes]) + if (newElemNodeList[iN] == newElemNodeList[newNNodes]) elemNodeList1D[orderArray[iN]] = newNNodes; - else + else { newNNodes++; elemNodeList1D[orderArray[iN]] = newNNodes; @@ -233,9 +232,9 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) csrNrows = newNNodes; csrIA = new int[csrNrows+1]; csrJA = new int[csrNrows*rowSize]; - assert( csrJA != NULL ); + hypre_assert( csrJA != NULL ); csrAA = new double[csrNrows*rowSize]; - assert(csrAA != NULL); + hypre_assert(csrAA != NULL); for (iR = 0; iR < csrNrows; iR++) csrIA[iR] = iR * rowSize; /* -------------------------------------------------------- */ @@ -274,7 +273,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) { printf("MLI_Method_AMGSA::setupSFEIBasedNullSpaces "); printf("ERROR : rowSize too large (increase it). \n"); - printf(" => allowed = %d, actual = %d\n",rowSize, + printf(" => allowed = %d, actual = %d\n",rowSize, csrIA[iR]-rowSize*iR); exit(1); } @@ -286,7 +285,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) count = start; for (k = start+1; k < start+rowLeng; k++) { - if (csrJA[k] == csrJA[count]) csrAA[count] += csrAA[k]; + if (csrJA[k] == csrJA[count]) csrAA[count] += csrAA[k]; else { count++; @@ -320,12 +319,12 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) eigenR = new double[nullspaceDim_+1]; eigenI = new double[nullspaceDim_+1]; eigenV = new double[csrNrows*(nullspaceDim_+1)]; - assert((long) eigenV); + hypre_assert((long) eigenV); #ifdef MLI_ARPACK sigmaR = 1.0e-6; sigmaI = 0.0e0; - dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI, + dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI, csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info, &arpackTol_); if (outputLevel_ > 2) @@ -343,7 +342,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) // strcpy( which, "destroy" ); #ifdef MLI_ARPACK -// dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI, +// dnstev_(&csrNrows, &nullspaceDim_, which, &sigmaR, &sigmaI, // csrIA, csrJA, csrAA, eigenR, eigenI, eigenV, &csrNrows, &info, // &arpackTol_); #else @@ -362,7 +361,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) /* -------------------------------------------------------- */ if (nullspaceLen_ == 0) nullspaceLen_ = localNRows; - if (nullspaceVec_ == NULL) + if (nullspaceVec_ == NULL) nullspaceVec_ = new double[nullspaceLen_ * nullspaceDim_]; for (iE = 0; iE < nElems; iE++) { @@ -376,7 +375,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) saLabels_[0][rowInd] = iD; colInd = elemNodeList1D[iE*elemNNodes+iN]; for (k = startCol; k < nullspaceDim_; k++) - nullspaceVec_[rowInd+k*nullspaceLen_] = + nullspaceVec_[rowInd+k*nullspaceLen_] = eigenV[colInd+k*csrNrows]; } } @@ -399,7 +398,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) fprintf(fp," %d %d\n", nullspaceLen_, nullspaceDim_); for ( iN = 0; iN < nullspaceLen_; iN++ ) { - for ( k = 0; k < nullspaceDim_; k++ ) + for ( k = 0; k < nullspaceDim_; k++ ) fprintf(fp,"%17.9e ",nullspaceVec_[nullspaceLen_*k+iN]); fprintf(fp,"\n"); } @@ -420,7 +419,7 @@ int MLI_Method_AMGSA::setupSFEIBasedNullSpaces(MLI *mli) * the same aggregate number 0 * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli) +int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli) { int iR, iD, level, mypid, *partition, localNRows, *aggrMap; int nSubdomains, nElems, elemNNodes, **elemNodeLists; @@ -465,7 +464,7 @@ int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli) MPI_Comm_size(comm, &nprocs); mliAmat = mli->getSystemMatrix(level); hypreA = (hypre_ParCSRMatrix *) mliAmat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); localStartRow = partition[mypid]; localNRows = partition[mypid+1] - localStartRow; @@ -495,37 +494,37 @@ int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli) nElems = sfei->getBlockNumElems(iD); elemNNodes = sfei->getBlockElemNEqns(iD); elemNodeLists = sfei->getBlockElemEqnLists(iD); - for (iE = 0; iE < nElems; iE++) + for (iE = 0; iE < nElems; iE++) { - for (iN = 0; iN < elemNNodes; iN++) + for (iN = 0; iN < elemNNodes; iN++) { index = elemNodeLists[iE][iN] - localStartRow; - if (index >= 0 && index < localNRows && aggrMap[index] < 0) + if (index >= 0 && index < localNRows && aggrMap[index] < 0) aggrMap[index] = iD; if (index >= 0 && index < localNRows) aggrMap2[index] = iD; } } count = 0; - for (iR = 0; iR < localNRows; iR++) if (aggrMap2[iR] >= 0) count++; + for (iR = 0; iR < localNRows; iR++) if (aggrMap2[iR] >= 0) count++; saDataAux_[0][iD+1] = count; saDataAux_[iD+1] = new int[count]; count = 0; - for (iR = 0; iR < localNRows; iR++) - if (aggrMap2[iR] >= 0) saDataAux_[iD+1][count++] = iR; + for (iR = 0; iR < localNRows; iR++) + if (aggrMap2[iR] >= 0) saDataAux_[iD+1][count++] = iR; } #if 0 /* force non-overlapped aggregates */ for ( iD = 0; iD < nSubdomains; iD++ ) { count = 0; - for (iR = 0; iR < localNRows; iR++) if (aggrMap[iR] == iD) count++; + for (iR = 0; iR < localNRows; iR++) if (aggrMap[iR] == iD) count++; saDataAux_[0][iD+1] = count; if (saDataAux_[iD+1] != NULL) delete [] saDataAux_[iD+1]; saDataAux_[iD+1] = new int[count]; count = 0; - for (iR = 0; iR < localNRows; iR++) - if (aggrMap[iR] == iD) saDataAux_[iD+1][count++] = iR; + for (iR = 0; iR < localNRows; iR++) + if (aggrMap[iR] == iD) saDataAux_[iD+1][count++] = iR; } #endif delete [] aggrMap2; @@ -546,7 +545,7 @@ int MLI_Method_AMGSA::setupSFEIBasedAggregates(MLI *mli) * (based on Bank-Lu-Tong-Vassilevski method but with aggregation) * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) +int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) { MLI_Function *funcPtr; @@ -585,7 +584,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) level = 0; mli_Amat = mli->getSystemMatrix( level ); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partition); ANRows = partition[mypid+1] - partition[mypid]; free( partition ); @@ -634,7 +633,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) MLI_Matrix_ComputePtAP(mli_Pmat, mli_Amat, &mli_cAmat); hypreAc = (hypre_ParCSRMatrix *) mli_cAmat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreAc, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreAc, &ACPartition); ACStart = ACPartition[mypid]; ACNRows = ACPartition[mypid+1] - ACStart; @@ -674,7 +673,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) recvLengs[iP] = ACPartition[proc+1] - ACPartition[proc]; } PEPartition = new int[nprocs+1]; - MPI_Allgather(&PENCols,1,MPI_INT,&(PEPartition[1]),1,MPI_INT,comm); + MPI_Allgather(&PENCols,1,MPI_INT,&(PEPartition[1]),1,MPI_INT,comm); PEPartition[0] = 0; for (iP = 2; iP <= nprocs; iP++) PEPartition[iP] += PEPartition[iP-1]; @@ -724,18 +723,18 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) ierr = HYPRE_IJMatrixCreate(comm,ACStart,ACStart+ACNRows-1, PECStart,PECStart+PENCols-1,&IJ_PE); ierr += HYPRE_IJMatrixSetObjectType(IJ_PE, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if (ACNRows > 0) rowSizes = new int[ACNRows]; for (iD = 0; iD < ACNRows; iD++) rowSizes[iD] = nSends; ierr = HYPRE_IJMatrixSetRowSizes(IJ_PE, rowSizes); ierr += HYPRE_IJMatrixInitialize(IJ_PE); - assert(!ierr); + hypre_assert(!ierr); if (ACNRows > 0) delete [] rowSizes; if (nSends > 0) { colInds = new int[nSends]; colVals = new double[nSends]; - for (iP = 0; iP < nSends; iP++) colVals[iP] = 1.0; + for (iP = 0; iP < nSends; iP++) colVals[iP] = 1.0; } for (iD = 0; iD < ACNRows; iD++) { @@ -790,7 +789,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) genP_Selective(mli_AExt, &mli_Pmat2, PENCols, bdryData); delete [] bdryData; - // compute Aco Poo Po2 + // compute Aco Poo Po2 hypre_ParCSRMatrix *hypreP2, *hypreP3, *hypreAP2; hypreP2 = (hypre_ParCSRMatrix *) mli_Pmat2->getMatrix(); hypreAP2 = hypre_ParMatmul(hypreAP, hypreP2); @@ -801,7 +800,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) MLI_Matrix_ComputePtAP(mli_Pmat2, mli_AExt, &mli_AExt2); // adjust pointers - hypre_ParCSRMatrixDestroy(hypreAP); + hypre_ParCSRMatrixDestroy(hypreAP); hypreAP = hypreAP2; delete mli_Pmat; funcPtr = new MLI_Function(); @@ -867,7 +866,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) index = index - ACPartition[recvProcs[iP]] + offset; break; } - offset += (ACPartition[recvProcs[iP]+1] - + offset += (ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]]); } newJA[newNnz] = index; @@ -876,7 +875,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) auxIA[index-ANRows]++; } newIA[iR+1] = newNnz; - } + } // (2,2) block for ( iR = ANRows; iR < ANRows+PENCols; iR++ ) @@ -901,11 +900,11 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) newJA[offset] = iR; newAA[offset] = APoffdA[iC]; auxIA[index-ANRows]++; - } - } + } + } /* --------------------------------------------------------------- */ - + int iZero=0, *newRowSizes; MPI_Comm newMPIComm; HYPRE_IJMatrix IJnewA; @@ -916,13 +915,13 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) ierr = HYPRE_IJMatrixCreate(newMPIComm,iZero,newNrows-1,iZero, newNrows-1,&IJnewA); ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if ( newNrows > 0 ) newRowSizes = new int[newNrows]; for ( iD = 0; iD < newNrows; iD++ ) newRowSizes[iD] = newIA[iD+1] - newIA[iD]; ierr = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes); ierr += HYPRE_IJMatrixInitialize(IJnewA); - assert(!ierr); + hypre_assert(!ierr); for ( iD = 0; iD < newNrows; iD++ ) { offset = newIA[iD]; @@ -978,7 +977,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) rLength = ACPartition[recvProcs[iP]+1] - ACPartition[recvProcs[iP]]; for ( iD = 0; iD < nullspaceDim_; iD++ ) for ( iD2 = 0; iD2 < rLength; iD2++ ) - newNullVecs[iD*newNrows+iD2+offset] = + newNullVecs[iD*newNrows+iD2+offset] = tmpNullVecs[offset+iD*rLength+iD2]; rLength *= nullspaceDim_; offset += rLength; @@ -1050,7 +1049,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) currNodeDofs_ = nodeDofs; if ( nullspaceVec_ != NULL ) delete [] nullspaceVec_; nullspaceVec_ = new double[nullspaceDim_*ANRows]; - for ( iD = 0; iD < nullspaceDim_*ANRows; iD++) + for ( iD = 0; iD < nullspaceDim_*ANRows; iD++) nullspaceVec_[iD] = nullVecs[iD]; // create prolongation and coarse grid operators @@ -1078,7 +1077,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) free( ACPartition ); delete [] PEPartition; delete [] auxIA; - HYPRE_IJMatrixDestroy(IJ_PE); + HYPRE_IJMatrixDestroy(IJ_PE); delete mli_AExt; delete [] nullVecs; delete mli_PE; @@ -1091,9 +1090,9 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp(MLI *mli) return (level); } -// ************************************************************************ +// ************************************************************************ // Purpose : Given Amat, perform preferential coarsening (small aggregates -// near processor boundaries and create the corresponding Pmat +// near processor boundaries and create the corresponding Pmat // (called by setupExtendedDomainDecomp) // ------------------------------------------------------------------------ @@ -1153,7 +1152,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, * perform coarsening (small aggregates on processor boundaries) * 10/2005 : add bdryData for secondary aggregation *-----------------------------------------------------------------*/ - + coarsenGraded(A2mat, &naggr, &node2aggr, &bdryData); if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat; if (blkSize > 1) @@ -1178,7 +1177,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1, PStartCol,PStartCol+PLocalNCols-1,&IJPmat); ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * expand the aggregation information if block size > 1 ==> eqn2aggr @@ -1192,9 +1191,9 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, delete [] node2aggr; } else eqn2aggr = node2aggr; - + /*----------------------------------------------------------------- - * create a compact form for the null space vectors + * create a compact form for the null space vectors * (get ready to perform QR on them) *-----------------------------------------------------------------*/ @@ -1237,13 +1236,13 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++; else aggCntArray[(-eqn2aggr[irow]-1)]++; maxAggSize = 0; - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig]; /* ------ register which equation is in which aggregate ------ */ aggIndArray = new int*[naggr]; - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) { aggIndArray[ig] = new int[aggCntArray[ig]]; aggCntArray[ig] = 0; @@ -1261,11 +1260,11 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, qArray = new double[maxAggSize * nullspaceDim_]; rArray = new double[nullspaceDim_ * nullspaceDim_]; - newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; + newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; /* ------ perform QR on each aggregate ------ */ - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) { aggSize = aggCntArray[ig]; @@ -1277,13 +1276,13 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, printf(" aggr size is %d\n", aggSize); exit(1); } - + /* ------ put data into the temporary array ------ */ - for ( jcol = 0; jcol < aggSize; jcol++ ) + for ( jcol = 0; jcol < aggSize; jcol++ ) { - for ( irow = 0; irow < nullspaceDim_; irow++ ) - qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; + for ( irow = 0; irow < nullspaceDim_; irow++ ) + qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; } /* ------ call QR function ------ */ @@ -1291,7 +1290,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, /* if ( currLevel_ < (numLevels_-1) ) { - info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_); + info = MLI_Utils_QR(qArray, rArray, aggSize, nullspaceDim_); if (info != 0) { printf("%4d : Aggregation WARNING : QR returns non-zero for\n", @@ -1301,13 +1300,13 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, } else { - for ( irow = 0; irow < nullspaceDim_; irow++ ) + for ( irow = 0; irow < nullspaceDim_; irow++ ) { dtemp = 0.0; - for ( jcol = 0; jcol < aggSize; jcol++ ) + for ( jcol = 0; jcol < aggSize; jcol++ ) dtemp += qArray[aggSize*irow+jcol]*qArray[aggSize*irow+jcol]; dtemp = 1.0 / sqrt(dtemp); - for ( jcol = 0; jcol < aggSize; jcol++ ) + for ( jcol = 0; jcol < aggSize; jcol++ ) qArray[aggSize*irow+jcol] *= dtemp; } } @@ -1318,7 +1317,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, /* for ( jcol = 0; jcol < nullspaceDim_; jcol++ ) for ( irow = 0; irow < nullspaceDim_; irow++ ) - newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = + newNull[ig*nullspaceDim_+jcol+irow*naggr*nullspaceDim_] = rArray[jcol+nullspaceDim_*irow]; */ for ( jcol = 0; jcol < nullspaceDim_; jcol++ ) @@ -1337,7 +1336,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, index = aggIndArray[ig][jcol]; PVecs[irow][index] = qArray[ irow*aggSize + jcol ]; } - } + } } for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig]; delete [] aggIndArray; @@ -1349,7 +1348,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, nullspaceVec_ = newNull; /*----------------------------------------------------------------- - * initialize Pmat + * initialize Pmat *-----------------------------------------------------------------*/ rowLengths = new int[PLocalNRows]; @@ -1357,11 +1356,11 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, rowLengths[irow] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*-------------------------------------------------------------------- - * load and assemble Pmat + * load and assemble Pmat *--------------------------------------------------------------------*/ colInd = new int[nullspaceDim_]; @@ -1380,13 +1379,13 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, } } rowNum = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, - (const int *) &rowNum, (const int *) colInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, + (const int *) &rowNum, (const int *) colInd, (const double *) colVal); } } ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat); commPkg = hypre_ParCSRMatrixCommPkg(Amat); @@ -1401,21 +1400,21 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, *-----------------------------------------------------------------*/ if ( PCols != NULL ) delete [] PCols; - if ( PVecs != NULL ) + if ( PVecs != NULL ) { - for (irow = 0; irow < nullspaceDim_; irow++) + for (irow = 0; irow < nullspaceDim_; irow++) if ( PVecs[irow] != NULL ) delete [] PVecs[irow]; delete [] PVecs; } (*eqn2aggrOut) = eqn2aggr; /*----------------------------------------------------------------- - * set up and return Pmat + * set up and return Pmat *-----------------------------------------------------------------*/ funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr ); (*PmatOut) = mli_Pmat; delete funcPtr; @@ -1426,7 +1425,7 @@ double MLI_Method_AMGSA::genP_DD(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, // graded coarsening scheme (Given a graph, aggregate on the local subgraph // but give smaller aggregate near processor boundaries) // (called by setupExtendedDomainDecomp/genP_DD) -// --------------------------------------------------------------------- +// --------------------------------------------------------------------- int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, int *mliAggrLeng, int **mliAggrArray, int **bdryData) @@ -1451,7 +1450,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, comm = hypre_ParCSRMatrixComm(hypreG); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&nprocs); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, &partition); startRow = partition[mypid]; endRow = partition[mypid+1] - 1; @@ -1471,7 +1470,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, GOffdI = hypre_CSRMatrixI(GOffd); /*----------------------------------------------------------------- - * allocate status arrays + * allocate status arrays *-----------------------------------------------------------------*/ if (localNRows > 0) @@ -1480,7 +1479,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, aggrSizes = new int[localNRows]; nodeStat = new int[localNRows]; bdrySet = new int[localNRows]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { aggrSizes[irow] = 0; node2aggr[irow] = -1; @@ -1491,13 +1490,13 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, else node2aggr = aggrSizes = nodeStat = bdrySet = NULL; /*----------------------------------------------------------------- - * search for zero rows and rows near the processor boundaries + * search for zero rows and rows near the processor boundaries *-----------------------------------------------------------------*/ - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rowLeng = GDiagI[irow+1] - GDiagI[irow]; - if (rowLeng <= 0) + if (rowLeng <= 0) { nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED; nNotSelected++; @@ -1513,15 +1512,15 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, *-----------------------------------------------------------------*/ localMinSize = nullspaceDim_ / currNodeDofs_ * 2; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { - if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY && bdrySet[irow] == 1 ) + if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY && bdrySet[irow] == 1 ) { nSelected++; node2aggr[irow] = - naggr - 1; aggrSizes[naggr] = 1; nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED2; - if (localMinSize > 1) + if (localMinSize > 1) { rowLeng = GDiagI[irow+1] - GDiagI[irow]; cols = &(GDiagJ[GDiagI[irow]]); @@ -1555,9 +1554,9 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, maxInd = -1; maxCount = -1; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { - if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY ) + if ( nodeStat[irow] == MLI_METHOD_AMGSA_READY ) { count = 0; rowLeng = GDiagI[irow+1] - GDiagI[irow]; @@ -1663,7 +1662,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; nSelected++; } - } + } } itmp[0] = naggr; itmp[1] = nSelected; @@ -1692,7 +1691,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, colNum = cols[jcol]; if ( nodeStat[colNum] == MLI_METHOD_AMGSA_READY ) count++; } - if ( count > 1 && count >= minAggrSize_ ) + if ( count > 1 && count >= minAggrSize_ ) { aggrSizes[naggr] = 0; for ( jcol = 0; jcol < rowLeng; jcol++ ) @@ -1772,7 +1771,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, nUndone--; nSelected++; jcol++; - if ( jcol >= minAggrSize_ && naggr < count-1 ) + if ( jcol >= minAggrSize_ && naggr < count-1 ) { jcol = 0; naggr++; @@ -1808,7 +1807,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, for ( jcol = 0; jcol < rowLeng; jcol++ ) { colNum = cols[jcol]; - printf("ERROR : neighbor of unselected node %9d = %9d\n", + printf("ERROR : neighbor of unselected node %9d = %9d\n", rowNum, colNum); } hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL); @@ -1821,11 +1820,11 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, } /*----------------------------------------------------------------- - * clean up and initialize the output arrays + * clean up and initialize the output arrays *-----------------------------------------------------------------*/ - if (localNRows > 0) delete [] aggrSizes; - if (localNRows > 0) delete [] nodeStat; + if (localNRows > 0) delete [] aggrSizes; + if (localNRows > 0) delete [] nodeStat; if (localNRows == 1 && naggr == 0) { node2aggr[0] = 0; @@ -1837,7 +1836,7 @@ int MLI_Method_AMGSA::coarsenGraded(hypre_ParCSRMatrix *hypreG, return 0; } -// ************************************************************************ +// ************************************************************************ // Purpose : Given Amat, perform preferential coarsening (no coarsening // when the bdry flag = 1 // (called by setupExtendedDomainDecomp) @@ -1893,7 +1892,7 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, /*----------------------------------------------------------------- * perform coarsening (no aggregation on processor boundaries) *-----------------------------------------------------------------*/ - + if (blkSize > 1) { compressBdryData = new int[ALocalNRows/blkSize]; @@ -1919,7 +1918,7 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1, PStartCol,PStartCol+PLocalNCols-1,&IJPmat); ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * expand the aggregation information if block size > 1 ==> eqn2aggr @@ -1933,9 +1932,9 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, delete [] node2aggr; } else eqn2aggr = node2aggr; - + /*----------------------------------------------------------------- - * create a compact form for the null space vectors + * create a compact form for the null space vectors * (get ready to perform QR on them) *-----------------------------------------------------------------*/ @@ -1978,13 +1977,13 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, if ( eqn2aggr[irow] >= 0 ) aggCntArray[eqn2aggr[irow]]++; else aggCntArray[(-eqn2aggr[irow]-1)]++; maxAggSize = 0; - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig]; /* ------ register which equation is in which aggregate ------ */ aggIndArray = new int*[naggr]; - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) { aggIndArray[ig] = new int[aggCntArray[ig]]; aggCntArray[ig] = 0; @@ -2002,11 +2001,11 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, qArray = new double[maxAggSize * nullspaceDim_]; rArray = new double[nullspaceDim_ * nullspaceDim_]; - newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; + newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; /* ------ perform QR on each aggregate ------ */ - for ( ig = 0; ig < naggr; ig++ ) + for ( ig = 0; ig < naggr; ig++ ) { aggSize = aggCntArray[ig]; @@ -2018,13 +2017,13 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, printf(" aggr size is %d\n", aggSize); exit(1); } - + /* ------ put data into the temporary array ------ */ - for ( jcol = 0; jcol < aggSize; jcol++ ) + for ( jcol = 0; jcol < aggSize; jcol++ ) { - for ( irow = 0; irow < nullspaceDim_; irow++ ) - qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; + for ( irow = 0; irow < nullspaceDim_; irow++ ) + qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; } /* ------ after QR, put the R into the next null space ------ */ @@ -2045,7 +2044,7 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, index = aggIndArray[ig][jcol]; PVecs[irow][index] = qArray[ irow*aggSize + jcol ]; } - } + } } for ( ig = 0; ig < naggr; ig++ ) delete [] aggIndArray[ig]; delete [] aggIndArray; @@ -2057,7 +2056,7 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, nullspaceVec_ = newNull; /*----------------------------------------------------------------- - * initialize Pmat + * initialize Pmat *-----------------------------------------------------------------*/ rowLengths = new int[PLocalNRows]; @@ -2065,11 +2064,11 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, rowLengths[irow] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*-------------------------------------------------------------------- - * load and assemble Pmat + * load and assemble Pmat *--------------------------------------------------------------------*/ colInd = new int[nullspaceDim_]; @@ -2088,13 +2087,13 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, } } rowNum = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, - (const int *) &rowNum, (const int *) colInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, + (const int *) &rowNum, (const int *) colInd, (const double *) colVal); } } ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat); commPkg = hypre_ParCSRMatrixCommPkg(Amat); @@ -2109,21 +2108,21 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, *-----------------------------------------------------------------*/ if (PCols != NULL) delete [] PCols; - if (PVecs != NULL) + if (PVecs != NULL) { - for (irow = 0; irow < nullspaceDim_; irow++) + for (irow = 0; irow < nullspaceDim_; irow++) if (PVecs[irow] != NULL) delete [] PVecs[irow]; delete [] PVecs; } delete [] eqn2aggr; /*----------------------------------------------------------------- - * set up and return Pmat + * set up and return Pmat *-----------------------------------------------------------------*/ funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr ); (*PmatOut) = mli_Pmat; delete funcPtr; @@ -2131,10 +2130,10 @@ double MLI_Method_AMGSA::genP_Selective(MLI_Matrix *mli_Amat, } // ********************************************************************* -// selective coarsening scheme (Given a graph, aggregate on the local +// selective coarsening scheme (Given a graph, aggregate on the local // subgraph but no aggregation near processor boundaries) // (called by setupExtendedDomainDecomp/genP_Selective) -// --------------------------------------------------------------------- +// --------------------------------------------------------------------- int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, int *naggrOut, int **aggrInfoOut, int *bdryData) @@ -2158,7 +2157,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, comm = hypre_ParCSRMatrixComm(hypreG); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&nprocs); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, &partition); startRow = partition[mypid]; endRow = partition[mypid+1] - 1; @@ -2176,7 +2175,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, GDiagA = hypre_CSRMatrixData(GDiag); /*----------------------------------------------------------------- - * allocate status arrays + * allocate status arrays *-----------------------------------------------------------------*/ if (localNRows > 0) @@ -2184,7 +2183,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, node2aggr = new int[localNRows]; aggrSizes = new int[localNRows]; nodeStat = new int[localNRows]; - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { if (bdryData[irow] == 1) { @@ -2204,13 +2203,13 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, else node2aggr = aggrSizes = nodeStat = NULL; /*----------------------------------------------------------------- - * search for zero rows and rows near the processor boundaries + * search for zero rows and rows near the processor boundaries *-----------------------------------------------------------------*/ - for ( irow = 0; irow < localNRows; irow++ ) + for ( irow = 0; irow < localNRows; irow++ ) { rowLeng = GDiagI[irow+1] - GDiagI[irow]; - if (rowLeng <= 0) + if (rowLeng <= 0) { nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED; nNotSelected++; @@ -2297,7 +2296,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; nSelected++; } - } + } } /*----------------------------------------------------------------- @@ -2318,7 +2317,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, colNum = cols[jcol]; if (nodeStat[colNum] == MLI_METHOD_AMGSA_READY) count++; } - if (count > 1 && count >= minAggrSize_) + if (count > 1 && count >= minAggrSize_) { aggrSizes[naggr] = 0; for (jcol = 0; jcol < rowLeng; jcol++) @@ -2382,7 +2381,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, nUndone--; nSelected++; jcol++; - if ( jcol >= minAggrSize_ && naggr < count-1 ) + if ( jcol >= minAggrSize_ && naggr < count-1 ) { jcol = 0; naggr++; @@ -2410,7 +2409,7 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, for ( jcol = 0; jcol < rowLeng; jcol++ ) { colNum = cols[jcol]; - printf("ERROR : neighbor of unselected node %9d = %9d\n", + printf("ERROR : neighbor of unselected node %9d = %9d\n", rowNum, colNum); } hypre_ParCSRMatrixRestoreRow(hypreG,rowNum,&rowLeng,&cols,NULL); @@ -2423,11 +2422,11 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, } /*----------------------------------------------------------------- - * clean up and initialize the output arrays + * clean up and initialize the output arrays *-----------------------------------------------------------------*/ - if (localNRows > 0) delete [] aggrSizes; - if (localNRows > 0) delete [] nodeStat; + if (localNRows > 0) delete [] aggrSizes; + if (localNRows > 0) delete [] nodeStat; if (localNRows == 1 && naggr == 0) { node2aggr[0] = 0; @@ -2442,9 +2441,9 @@ int MLI_Method_AMGSA::coarsenSelective(hypre_ParCSRMatrix *hypreG, // set up domain decomposition method by extending the local problem // (A simplified version of setupExtendedDomainDecomp using inefficient // method - just for testing only) -// --------------------------------------------------------------------- +// --------------------------------------------------------------------- -int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) +int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) { MLI_Function *funcPtr; int nRecvs, *recvProcs, nSends, *sendProcs, ierr, *rowSizes; @@ -2484,7 +2483,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) level = 0; mli_Amat = mli->getSystemMatrix(level); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &Apartition); AStart = Apartition[mypid]; ANRows = Apartition[mypid+1] - AStart; @@ -2562,12 +2561,12 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) ierr = HYPRE_IJMatrixCreate(comm,QExtRStart,QExtRStart+QExtNRows-1, QExtCStart,QExtCStart+QExtNCols-1,&IJ_QExt); ierr += HYPRE_IJMatrixSetObjectType(IJ_QExt, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowSizes = new int[QExtNRows]; for (iD = 0; iD < ANRows; iD++) rowSizes[iD] = 2 * nSends; ierr = HYPRE_IJMatrixSetRowSizes(IJ_QExt, rowSizes); ierr += HYPRE_IJMatrixInitialize(IJ_QExt); - assert(!ierr); + hypre_assert(!ierr); delete [] rowSizes; /* --------------------------------------------------------------- */ @@ -2612,7 +2611,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) { colInds = new int[nSends+1]; colVals = new double[nSends+1]; - for (iP = 0; iP <= nSends; iP++) colVals[iP] = 1.0; + for (iP = 0; iP <= nSends; iP++) colVals[iP] = 1.0; } for (iD = 0; iD < QExtNRows; iD++) { @@ -2691,7 +2690,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) rLength = AExtpartition[recvProcs[iP]+1] - AExtpartition[recvProcs[iP]]; for (iD = 0; iD < nullspaceDim_; iD++) for (iD2 = 0; iD2 < rLength; iD2++) - nullspaceVec_[iD*QExtNCols+iD2+offset] = + nullspaceVec_[iD*QExtNCols+iD2+offset] = tmpNullVecs[offset+iD*rLength+iD2]; rLength *= nullspaceDim_; offset += rLength; @@ -2706,7 +2705,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) /* --------------------------------------------------------------- */ MLI_Matrix *mli_PExt; - genP_AExt(mli_AExt, &mli_PExt, ANRows); + genP_AExt(mli_AExt, &mli_PExt, ANRows); /* --------------------------------------------------------------- */ /* create the local domain decomposition matrix */ @@ -2736,7 +2735,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) ierr = HYPRE_IJMatrixCreate(newMPIComm,iZero,ACExtNRows-1,iZero, ACExtNRows-1, &IJnewA); ierr += HYPRE_IJMatrixSetObjectType(IJnewA, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); if (ACExtNRows > 0) newRowSizes = new int[ACExtNRows]; csrACExt = hypre_ParCSRMatrixDiag(hypreACExt); ACExtI = hypre_CSRMatrixI(csrACExt); @@ -2746,7 +2745,7 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) newRowSizes[iD] = ACExtI[iD+1] - ACExtI[iD]; ierr = HYPRE_IJMatrixSetRowSizes(IJnewA, newRowSizes); ierr += HYPRE_IJMatrixInitialize(IJnewA); - assert(!ierr); + hypre_assert(!ierr); for (iD = 0; iD < ACExtNRows; iD++) { offset = ACExtI[iD]; @@ -2857,8 +2856,8 @@ int MLI_Method_AMGSA::setupExtendedDomainDecomp2(MLI *mli) return (level); } -// ************************************************************************ -// Purpose : Given Amat, perform preferential coarsening +// ************************************************************************ +// Purpose : Given Amat, perform preferential coarsening // (setupExtendedDomainDecomp2) // ------------------------------------------------------------------------ @@ -2915,9 +2914,9 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, if (minAggrSize_ <= 1) minAggrSize_ = 2; /*----------------------------------------------------------------- - * perform coarsening + * perform coarsening *-----------------------------------------------------------------*/ - + coarsenAExt(A2mat, &naggr, &node2aggr, inANRows); if (blkSize > 1 && mli_A2mat != NULL) delete mli_A2mat; @@ -2934,7 +2933,7 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, ierr = HYPRE_IJMatrixCreate(comm,PStartRow,PStartRow+PLocalNRows-1, PStartCol,PStartCol+PLocalNCols-1,&IJPmat); ierr = HYPRE_IJMatrixSetObjectType(IJPmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /*----------------------------------------------------------------- * expand the aggregation information if block size > 1 ==> eqn2aggr @@ -2948,9 +2947,9 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, delete [] node2aggr; } else eqn2aggr = node2aggr; - + /*----------------------------------------------------------------- - * create a compact form for the null space vectors + * create a compact form for the null space vectors * (get ready to perform QR on them) *-----------------------------------------------------------------*/ @@ -2993,13 +2992,13 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, if (eqn2aggr[irow] >= 0) aggCntArray[eqn2aggr[irow]]++; else aggCntArray[(-eqn2aggr[irow]-1)]++; maxAggSize = 0; - for (ig = 0; ig < naggr; ig++) + for (ig = 0; ig < naggr; ig++) if (aggCntArray[ig] > maxAggSize) maxAggSize = aggCntArray[ig]; /* ------ register which equation is in which aggregate ------ */ aggIndArray = new int*[naggr]; - for (ig = 0; ig < naggr; ig++) + for (ig = 0; ig < naggr; ig++) { aggIndArray[ig] = new int[aggCntArray[ig]]; aggCntArray[ig] = 0; @@ -3017,11 +3016,11 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, qArray = new double[maxAggSize * nullspaceDim_]; rArray = new double[nullspaceDim_ * nullspaceDim_]; - newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; + newNull = new double[naggr*nullspaceDim_*nullspaceDim_]; /* ------ perform QR on each aggregate ------ */ - for (ig = 0; ig < naggr; ig++) + for (ig = 0; ig < naggr; ig++) { aggSize = aggCntArray[ig]; @@ -3033,13 +3032,13 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, printf(" aggr size is %d\n", aggSize); exit(1); } - + /* ------ put data into the temporary array ------ */ - for (jcol = 0; jcol < aggSize; jcol++) + for (jcol = 0; jcol < aggSize; jcol++) { - for (irow = 0; irow < nullspaceDim_; irow++) - qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; + for (irow = 0; irow < nullspaceDim_; irow++) + qArray[aggSize*irow+jcol] = PVecs[irow][aggIndArray[ig][jcol]]; } /* ------ after QR, put the R into the next null space ------ */ @@ -3060,7 +3059,7 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, index = aggIndArray[ig][jcol]; PVecs[irow][index] = qArray[ irow*aggSize + jcol ]; } - } + } } for (ig = 0; ig < naggr; ig++) delete [] aggIndArray[ig]; delete [] aggIndArray; @@ -3072,7 +3071,7 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, nullspaceVec_ = newNull; /*----------------------------------------------------------------- - * initialize Pmat + * initialize Pmat *-----------------------------------------------------------------*/ rowLengths = new int[PLocalNRows]; @@ -3080,11 +3079,11 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, rowLengths[irow] = nullspaceDim_; ierr = HYPRE_IJMatrixSetRowSizes(IJPmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJPmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengths; /*-------------------------------------------------------------------- - * load and assemble Pmat + * load and assemble Pmat *--------------------------------------------------------------------*/ colInd = new int[nullspaceDim_]; @@ -3103,13 +3102,13 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, } } rowNum = PStartRow + irow; - HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, - (const int *) &rowNum, (const int *) colInd, + HYPRE_IJMatrixSetValues(IJPmat, 1, &nzcnt, + (const int *) &rowNum, (const int *) colInd, (const double *) colVal); } } ierr = HYPRE_IJMatrixAssemble(IJPmat); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(IJPmat, (void **) &Pmat); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Pmat); commPkg = hypre_ParCSRMatrixCommPkg(Amat); @@ -3124,20 +3123,20 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, *-----------------------------------------------------------------*/ if (PCols != NULL) delete [] PCols; - if (PVecs != NULL) + if (PVecs != NULL) { - for (irow = 0; irow < nullspaceDim_; irow++) + for (irow = 0; irow < nullspaceDim_; irow++) if (PVecs[irow] != NULL) delete [] PVecs[irow]; delete [] PVecs; } /*----------------------------------------------------------------- - * set up and return Pmat + * set up and return Pmat *-----------------------------------------------------------------*/ funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Pmat = new MLI_Matrix( Pmat, paramString, funcPtr ); (*PmatOut) = mli_Pmat; delete funcPtr; @@ -3148,7 +3147,7 @@ double MLI_Method_AMGSA::genP_AExt(MLI_Matrix *mli_Amat,MLI_Matrix **PmatOut, // graded coarsening scheme (Given a graph, aggregate on the local subgraph // but give smaller aggregate near processor boundaries) // (called by setupExtendedDomainDecomp2/genP_AExt) -// --------------------------------------------------------------------- +// --------------------------------------------------------------------- int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, int *mliAggrLeng, int **mliAggrArray, int inANRows) @@ -3169,7 +3168,7 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, comm = hypre_ParCSRMatrixComm(hypreG); MPI_Comm_rank(comm,&mypid); MPI_Comm_size(comm,&nprocs); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreG, &partition); startRow = partition[mypid]; endRow = partition[mypid+1] - 1; @@ -3187,7 +3186,7 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, GDiagA = hypre_CSRMatrixData(GDiag); /*----------------------------------------------------------------- - * allocate status arrays + * allocate status arrays *-----------------------------------------------------------------*/ if (localNRows > 0) @@ -3195,13 +3194,13 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, node2aggr = new int[localNRows]; aggrSizes = new int[localNRows]; nodeStat = new int[localNRows]; - for (irow = 0; irow < inANRows; irow++) + for (irow = 0; irow < inANRows; irow++) { aggrSizes[irow] = 1; node2aggr[irow] = -1; nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; } - for (irow = inANRows; irow < localNRows; irow++) + for (irow = inANRows; irow < localNRows; irow++) { aggrSizes[irow] = 0; node2aggr[irow] = -1; @@ -3212,13 +3211,13 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, else node2aggr = aggrSizes = nodeStat = NULL; /*----------------------------------------------------------------- - * search for zero rows and rows near the processor boundaries + * search for zero rows and rows near the processor boundaries *-----------------------------------------------------------------*/ - for (irow = inANRows; irow < localNRows; irow++) + for (irow = inANRows; irow < localNRows; irow++) { rowLeng = GDiagI[irow+1] - GDiagI[irow]; - if (rowLeng <= 0) + if (rowLeng <= 0) { nodeStat[irow] = MLI_METHOD_AMGSA_NOTSELECTED; nNotSelected++; @@ -3229,9 +3228,9 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, * Phase 0 : 1 node per aggregate for the immediate neighbors *-----------------------------------------------------------------*/ - for (irow = 0; irow < inANRows; irow++) + for (irow = 0; irow < inANRows; irow++) { - for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++) + for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++) { index = GDiagJ[jcol]; if (index >= inANRows && nodeStat[index]==MLI_METHOD_AMGSA_READY) @@ -3249,12 +3248,12 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, * Phase 1 : small aggregates for the next level *-----------------------------------------------------------------*/ - for (irow = inANRows; irow < localNRows; irow++) + for (irow = inANRows; irow < localNRows; irow++) { - if (nodeStat[irow] == MLI_METHOD_AMGSA_READY) + if (nodeStat[irow] == MLI_METHOD_AMGSA_READY) { selectFlag = 0; - for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++) + for (jcol = GDiagI[irow]; jcol < GDiagI[irow+1]; jcol++) { index = GDiagJ[jcol]; if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED) @@ -3284,7 +3283,7 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, } } } - for (irow = inANRows; irow < localNRows; irow++) + for (irow = inANRows; irow < localNRows; irow++) if (nodeStat[index] == MLI_METHOD_AMGSA_SELECTED2) nodeStat[index] = MLI_METHOD_AMGSA_SELECTED; @@ -3364,7 +3363,7 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, nodeStat[irow] = MLI_METHOD_AMGSA_SELECTED; nSelected++; } - } + } } /*----------------------------------------------------------------- @@ -3387,7 +3386,7 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, nUndone--; nSelected++; jcol++; - if (jcol >= minAggrSize_ && naggr < count-1) + if (jcol >= minAggrSize_ && naggr < count-1) { jcol = 0; naggr++; @@ -3399,11 +3398,11 @@ int MLI_Method_AMGSA::coarsenAExt(hypre_ParCSRMatrix *hypreG, } /*----------------------------------------------------------------- - * clean up and initialize the output arrays + * clean up and initialize the output arrays *-----------------------------------------------------------------*/ - if (localNRows > 0) delete [] aggrSizes; - if (localNRows > 0) delete [] nodeStat; + if (localNRows > 0) delete [] aggrSizes; + if (localNRows > 0) delete [] nodeStat; if (localNRows == 1 && naggr == 0) { node2aggr[0] = 0; diff --git a/src/FEI_mv/femli/mli_amgsa_elem.cxx b/src/FEI_mv/femli/mli_amgsa_elem.cxx index c579ba969..e08b6ede9 100644 --- a/src/FEI_mv/femli/mli_amgsa_elem.cxx +++ b/src/FEI_mv/femli/mli_amgsa_elem.cxx @@ -14,7 +14,6 @@ // --------------------------------------------------------------------- #include -#include #include "HYPRE.h" #include "_hypre_utilities.h" diff --git a/src/FEI_mv/femli/mli_fedata.cxx b/src/FEI_mv/femli/mli_fedata.cxx index 5e35e6d44..fb5f76394 100644 --- a/src/FEI_mv/femli/mli_fedata.cxx +++ b/src/FEI_mv/femli/mli_fedata.cxx @@ -14,7 +14,6 @@ #include #include #include -#include #if 0 /* RDF: Not sure this is really needed */ #ifdef WIN32 @@ -27,7 +26,7 @@ #include "mli_utils.h" /************************************************************************** - * constructor + * constructor *-----------------------------------------------------------------------*/ MLI_FEData::MLI_FEData(MPI_Comm mpi_comm) @@ -48,7 +47,7 @@ MLI_FEData::MLI_FEData(MPI_Comm mpi_comm) } //************************************************************************* -// destructor +// destructor //------------------------------------------------------------------------- MLI_FEData::~MLI_FEData() @@ -65,7 +64,7 @@ MLI_FEData::~MLI_FEData() int MLI_FEData::setOutputLevel(int level) { - if ( level < 0 ) + if ( level < 0 ) { printf("setOutputLevel ERROR : level should be >= 0.\n"); return 0; @@ -75,7 +74,7 @@ int MLI_FEData::setOutputLevel(int level) } //************************************************************************* -// dimension of the physical problem (2D, 3D, etc.) +// dimension of the physical problem (2D, 3D, etc.) //------------------------------------------------------------------------- int MLI_FEData::setSpaceDimension(int dimension) @@ -91,7 +90,7 @@ int MLI_FEData::setSpaceDimension(int dimension) } //************************************************************************* -// order of the partial differential equation +// order of the partial differential equation //------------------------------------------------------------------------- int MLI_FEData::setOrderOfPDE(int pdeOrder) @@ -199,7 +198,7 @@ int MLI_FEData::initElemBlock(int nElems, int nNodesPerElem, printf("initElemBlock ERROR : nodeNumFields < 0.\n"); exit(1); } - if (outputLevel_ >= 1) + if (outputLevel_ >= 1) { printf("initElemBlock : nElems = %d\n", nElems); printf("initElemBlock : node nFields = %d\n", nodeNumFields); @@ -210,8 +209,8 @@ int MLI_FEData::initElemBlock(int nElems, int nNodesPerElem, // --- clean up previous element setups // ------------------------------------------------------------- - if ( currentElemBlock_ >= 0 && currentElemBlock_ < numElemBlocks_ && - elemBlockList_[currentElemBlock_] != NULL ) + if ( currentElemBlock_ >= 0 && currentElemBlock_ < numElemBlocks_ && + elemBlockList_[currentElemBlock_] != NULL ) { deleteElemBlock(currentElemBlock_); createElemBlock(currentElemBlock_); @@ -250,8 +249,8 @@ int MLI_FEData::initElemBlock(int nElems, int nNodesPerElem, currBlock->nodeNumFields_ = nodeNumFields; currBlock->nodeFieldIDs_ = new int[nodeNumFields]; - for ( i = 0; i < nodeNumFields; i++ ) - currBlock->nodeFieldIDs_[i] = nodeFieldIDs[i]; + for ( i = 0; i < nodeNumFields; i++ ) + currBlock->nodeFieldIDs_[i] = nodeFieldIDs[i]; // ------------------------------------------------------------- // --- store element level data @@ -261,8 +260,8 @@ int MLI_FEData::initElemBlock(int nElems, int nNodesPerElem, if ( elemNumFields > 0 ) { currBlock->elemFieldIDs_ = new int[elemNumFields]; - for ( i = 0; i < elemNumFields; i++ ) - currBlock->elemFieldIDs_[i] = elemFieldIDs[i]; + for ( i = 0; i < elemNumFields; i++ ) + currBlock->elemFieldIDs_[i] = elemFieldIDs[i]; } return 1; } @@ -271,7 +270,7 @@ int MLI_FEData::initElemBlock(int nElems, int nNodesPerElem, // initialize the element connectivities //------------------------------------------------------------------------- -int MLI_FEData::initElemBlockNodeLists(int nElems, +int MLI_FEData::initElemBlockNodeLists(int nElems, const int *eGlobalIDs, int nNodesPerElem, const int* const *nGlobalIDLists, int spaceDim, const double* const *coord) @@ -303,14 +302,14 @@ int MLI_FEData::initElemBlockNodeLists(int nElems, #ifdef MLI_DEBUG_DETAILED printf("initElemBlockNodeLists Diagnostics: segFault test.\n"); double ddata; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { index = eGlobalIDs[i]; - for (j = 0; j < nNodesPerElem; j++) + for (j = 0; j < nNodesPerElem; j++) length = nGlobalIDLists[i][j]; if ( coord != NULL ) { - for (j = 0; j < nNodesPerElem*spaceDim; j++) + for (j = 0; j < nNodesPerElem*spaceDim; j++) ddata = coord[i][j]; } } @@ -329,16 +328,16 @@ int MLI_FEData::initElemBlockNodeLists(int nElems, for (i = 0; i < nElems; i++) currBlock->elemGlobalIDs_[i] = eGlobalIDs[i]; // ------------------------------------------------------------- - // --- allocate storage and load for element node connectivities + // --- allocate storage and load for element node connectivities // ------------------------------------------------------------- - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { currBlock->elemNodeIDList_[i] = new int[nNodesPerElem]; intArray = currBlock->elemNodeIDList_[i]; - for ( j = 0; j < nNodesPerElem; j++ ) - intArray[j] = nGlobalIDLists[i][j]; - } + for ( j = 0; j < nNodesPerElem; j++ ) + intArray[j] = nGlobalIDLists[i][j]; + } if ( coord == NULL ) return 1; // ------------------------------------------------------------- @@ -348,9 +347,9 @@ int MLI_FEData::initElemBlockNodeLists(int nElems, length = nNodesPerElem * spaceDimension_ * nElems; currBlock->nodeCoordinates_ = new double[length]; length = nNodesPerElem * spaceDimension_; - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { - for ( j = 0; j < length; j++ ) + for ( j = 0; j < length; j++ ) currBlock->nodeCoordinates_[i*length+j] = coord[i][j]; } return 1; @@ -361,7 +360,7 @@ int MLI_FEData::initElemBlockNodeLists(int nElems, //------------------------------------------------------------------------- int MLI_FEData::initElemNodeList( int eGlobalID, int nNodesPerElem, - const int* nGlobalIDs, int spaceDim, + const int* nGlobalIDs, int spaceDim, const double *coord) { int i, j, length, index, *intArray, nElems; @@ -405,12 +404,12 @@ int MLI_FEData::initElemNodeList( int eGlobalID, int nNodesPerElem, currBlock->elemGlobalIDs_[index] = eGlobalID; // ------------------------------------------------------------- - // --- allocate storage and load for element node connectivities + // --- allocate storage and load for element node connectivities // ------------------------------------------------------------- currBlock->elemNodeIDList_[index] = new int[nNodesPerElem]; intArray = currBlock->elemNodeIDList_[index]; - for ( j = 0; j < nNodesPerElem; j++ ) intArray[j] = nGlobalIDs[j]; + for ( j = 0; j < nNodesPerElem; j++ ) intArray[j] = nGlobalIDs[j]; if ( coord == NULL ) return 1; // ------------------------------------------------------------- @@ -422,16 +421,16 @@ int MLI_FEData::initElemNodeList( int eGlobalID, int nNodesPerElem, if ( currBlock->nodeCoordinates_ == NULL ) currBlock->nodeCoordinates_ = new double[length]; length = nNodesPerElem * spaceDimension_; - for ( i = 0; i < length; i++ ) + for ( i = 0; i < length; i++ ) currBlock->nodeCoordinates_[index*length+i] = coord[i]; return 1; } //************************************************************************* -// initialize shared node list +// initialize shared node list //------------------------------------------------------------------------- -int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, +int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, const int *numProcs, const int * const *procLists) { int i, j, length, index, index2, *nodeIDs, *auxArray; @@ -458,11 +457,11 @@ int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, #ifdef MLI_DEBUG_DETAILED printf("initSharedNodes Diagnostics: segFault test.\n"); - for (i = 0; i < nNodes; i++) + for (i = 0; i < nNodes; i++) { index = nGlobalIDs[i]; length = numProcs[i]; - for (j = 0; j < length; j++) + for (j = 0; j < length; j++) index = procLists[i][j]; } printf("initSharedNodes Diagnostics: passed the segFault test.\n"); @@ -478,14 +477,14 @@ int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, for (i = 0; i < nNodes; i++) auxArray[i] = i; MLI_Utils_IntQSort2(nodeIDs, auxArray, 0, nNodes-1); nSharedNodes = 1; - for (i = 1; i < nNodes; i++) + for (i = 1; i < nNodes; i++) if ( nodeIDs[i] != nodeIDs[nSharedNodes-1] ) nSharedNodes++; sharedNodeIDs = new int[nSharedNodes]; sharedNodeNProcs = new int[nSharedNodes]; sharedNodeProc = new int*[nSharedNodes]; nSharedNodes = 1; sharedNodeIDs[0] = nodeIDs[0]; - for (i = 1; i < nNodes; i++) + for (i = 1; i < nNodes; i++) if ( nodeIDs[i] != sharedNodeIDs[nSharedNodes-1] ) sharedNodeIDs[nSharedNodes++] = nodeIDs[i]; for ( i = 0; i < nSharedNodes; i++ ) sharedNodeNProcs[i] = 0; @@ -507,7 +506,7 @@ int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, nSharedNodes); index2 = auxArray[i]; for ( j = 0; j < numProcs[index2]; j++ ) - sharedNodeProc[index][sharedNodeNProcs[index]++] = + sharedNodeProc[index][sharedNodeNProcs[index]++] = procLists[index2][j]; } delete [] nodeIDs; @@ -515,7 +514,7 @@ int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, for ( i = 0; i < nSharedNodes; i++ ) { MLI_Utils_IntQSort2(sharedNodeProc[i],NULL,0,sharedNodeNProcs[i]-1); - length = 1; + length = 1; for ( j = 1; j < sharedNodeNProcs[i]; j++ ) if ( sharedNodeProc[i][j] != sharedNodeProc[i][length-1] ) sharedNodeProc[i][length++] = sharedNodeProc[i][j]; @@ -530,7 +529,7 @@ int MLI_FEData::initSharedNodes(int nNodes, const int *nGlobalIDs, } //************************************************************************* -// initialize element face lists +// initialize element face lists //------------------------------------------------------------------------- int MLI_FEData::initElemBlockFaceLists(int nElems, int nFaces, @@ -557,7 +556,7 @@ int MLI_FEData::initElemBlockFaceLists(int nElems, int nFaces, #ifdef MLI_DEBUG_DETAILED printf("initElemBlockFaceLists Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) for (j = 0; j < nFaces; j++) index = fGlobalIDLists[i][j]; printf("initElemBlockFaceLists Diagnostics: passed the segFault test.\n"); #endif @@ -569,8 +568,8 @@ int MLI_FEData::initElemBlockFaceLists(int nElems, int nFaces, if ( currBlock->elemFaceIDList_ == NULL ) { currBlock->elemFaceIDList_ = new int*[nElems]; - currBlock->elemNumFaces_ = nFaces; - for (i = 0; i < nElems; i++) + currBlock->elemNumFaces_ = nFaces; + for (i = 0; i < nElems; i++) currBlock->elemFaceIDList_[i] = new int[nFaces]; } @@ -582,11 +581,11 @@ int MLI_FEData::initElemBlockFaceLists(int nElems, int nFaces, { index = currBlock->elemGlobalIDAux_[i]; elemFaceList = currBlock->elemFaceIDList_[i]; - for ( j = 0; j < nFaces; j++ ) + for ( j = 0; j < nFaces; j++ ) elemFaceList[j] = fGlobalIDLists[index][j]; } return 1; -} +} //************************************************************************* // initialize face node list @@ -611,10 +610,10 @@ int MLI_FEData::initFaceBlockNodeLists(int nFaces, const int *fGlobalIDs, #ifdef MLI_DEBUG_DETAILED printf("initFaceBlockNodeLists Diagnostics: segFault test.\n"); - for (i = 0; i < nFaces; i++) + for (i = 0; i < nFaces; i++) { index = fGlobalIDs[i]; - for (j = 0; j < nNodes; j++) + for (j = 0; j < nNodes; j++) index = nGlobalIDLists[i][j]; } printf("initFaceBlockNodeLists Diagnostics: passed the segFault test.\n"); @@ -628,18 +627,18 @@ int MLI_FEData::initFaceBlockNodeLists(int nFaces, const int *fGlobalIDs, currBlock->faceNumNodes_ = nNodes; currBlock->numExternalFaces_ = 0; currBlock->faceGlobalIDs_ = new int[nFaces]; - currBlock->faceNodeIDList_ = new int*[nFaces]; + currBlock->faceNodeIDList_ = new int*[nFaces]; faceArray = new int[nFaces]; for ( i = 0; i < nFaces; i++ ) { - currBlock->faceGlobalIDs_[i] = fGlobalIDs[i]; + currBlock->faceGlobalIDs_[i] = fGlobalIDs[i]; currBlock->faceNodeIDList_[i] = NULL; faceArray[i] = i; - } + } MLI_Utils_IntQSort2(currBlock->faceGlobalIDs_, faceArray, 0, nFaces-1); // ------------------------------------------------------------- - // --- load the face Node list + // --- load the face Node list // ------------------------------------------------------------- faceNodeList = currBlock->faceNodeIDList_; @@ -647,15 +646,15 @@ int MLI_FEData::initFaceBlockNodeLists(int nFaces, const int *fGlobalIDs, { index = faceArray[faceArray[i]]; faceNodeList[index] = new int[nNodes]; - for ( j = 0; j < nNodes; j++ ) + for ( j = 0; j < nNodes; j++ ) faceNodeList[i][j] = nGlobalIDLists[index][j]; } delete [] faceArray; return 1; -} +} //************************************************************************* -// initialize shared face list +// initialize shared face list // (*** need to take into consideration of repeated face numbers in the // face list - for pairs, just as already been done in initSharedNodes) //------------------------------------------------------------------------- @@ -685,7 +684,7 @@ int MLI_FEData::initSharedFaces(int nFaces, const int *fGlobalIDs, #ifdef MLI_DEBUG_DETAILED printf("initSharedFaces Diagnostics: segFault test.\n"); - for (i = 0; i < nFaces; i++) + for (i = 0; i < nFaces; i++) { index = fGlobalIDs[i]; length = numProcs[i]; @@ -695,7 +694,7 @@ int MLI_FEData::initSharedFaces(int nFaces, const int *fGlobalIDs, #endif // ------------------------------------------------------------- - // --- allocate space for the incoming data + // --- allocate space for the incoming data // ------------------------------------------------------------- currBlock->numSharedFaces_ = nFaces; @@ -704,7 +703,7 @@ int MLI_FEData::initSharedFaces(int nFaces, const int *fGlobalIDs, currBlock->sharedFaceProc_ = new int*[nFaces]; // ------------------------------------------------------------- - // --- load shared face information + // --- load shared face information // ------------------------------------------------------------- intArray = new int[nFaces]; @@ -724,9 +723,9 @@ int MLI_FEData::initSharedFaces(int nFaces, const int *fGlobalIDs, currBlock->sharedFaceProc_[i] = new int[numProcs[index]]; for ( j = 0; j < numProcs[index]; j++ ) currBlock->sharedFaceProc_[i][j] = procLists[index][j]; - MLI_Utils_IntQSort2(currBlock->sharedFaceProc_[i], NULL, 0, + MLI_Utils_IntQSort2(currBlock->sharedFaceProc_[i], NULL, 0, numProcs[index]-1); - } + } delete [] intArray; return 1; } @@ -745,9 +744,9 @@ int MLI_FEData::initComplete() int mypid, totalFaces, nExtFaces, *faceArray, *procArray; int **elemFaceList, *procArray2, *ownerP, *sndrcvReg, nProcs; int nRecv, nSend, *recvProcs, *sendProcs, *recvLengs, *sendLengs; - int nNodes, pnum, **sendBuf, **recvBuf, *iauxArray, index2; + int nNodes, pnum, **sendBuf, **recvBuf, *iauxArray, index2; int *intArray, **intArray2, nNodesPerElem, length, *nodeArrayAux; - double *dtemp_array, *nodeCoords; + double *dtemp_array, *nodeCoords; MPI_Request *request; MPI_Status status; MLI_ElemBlock *currBlock; @@ -759,7 +758,7 @@ int MLI_FEData::initComplete() // ------------------------------------------------------------- nElems = currBlock->numLocalElems_; - assert( nElems > 0 ); + hypre_assert( nElems > 0 ); elemList = currBlock->elemGlobalIDs_; if ( elemList == NULL ) { @@ -799,9 +798,9 @@ int MLI_FEData::initComplete() // --- error checking (for duplicate element IDs) // ------------------------------------------------------------- - for ( i = 1; i < nElems; i++ ) - { - assert( currBlock->elemGlobalIDs_[i] >= 0 ); + for ( i = 1; i < nElems; i++ ) + { + hypre_assert( currBlock->elemGlobalIDs_[i] >= 0 ); if ( currBlock->elemGlobalIDs_[i] == currBlock->elemGlobalIDs_[i-1] ) { printf("initComplete ERROR : duplicate elemIDs.\n"); @@ -810,18 +809,18 @@ int MLI_FEData::initComplete() } // ------------------------------------------------------------- - // --- allocate storage and load for element node connectivities + // --- allocate storage and load for element node connectivities // ------------------------------------------------------------- nNodesPerElem = currBlock->elemNumNodes_; intArray2 = new int*[nElems]; for ( i = 0; i < nElems; i++ ) intArray2[i] = new int[nNodesPerElem]; - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { index = currBlock->elemGlobalIDAux_[i]; intArray = currBlock->elemNodeIDList_[index]; - for ( j = 0; j < nNodesPerElem; j++ ) intArray2[i][j] = intArray[j]; - } + for ( j = 0; j < nNodesPerElem; j++ ) intArray2[i][j] = intArray[j]; + } for ( i = 0; i < nElems; i++ ) delete [] currBlock->elemNodeIDList_[i]; delete [] currBlock->elemNodeIDList_; currBlock->elemNodeIDList_ = intArray2; @@ -829,9 +828,9 @@ int MLI_FEData::initComplete() if ( currBlock->nodeCoordinates_ != NULL ) { nodeCoords = new double[length]; - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { - for ( j = 0; j < length; j++ ) + for ( j = 0; j < length; j++ ) { index = currBlock->elemGlobalIDAux_[i]; nodeCoords[i*length+j] = @@ -891,12 +890,12 @@ int MLI_FEData::initComplete() { for ( j = 0; j < sharedNodeNProcs[i]; j++ ) { - if ( sharedNodeProc[i][j] < mypid ) + if ( sharedNodeProc[i][j] < mypid ) { nExtNodes++; - index = MLI_Utils_BinarySearch( sharedNodeIDs[i], nodeArray, + index = MLI_Utils_BinarySearch( sharedNodeIDs[i], nodeArray, totalNodes); - if ( index < 0 ) + if ( index < 0 ) { printf("initComplete ERROR : shared node not in elements.\n"); printf(" %d\n", sharedNodeIDs[i]); @@ -921,14 +920,14 @@ int MLI_FEData::initComplete() currBlock->numLocalNodes_ = totalNodes - nExtNodes; currBlock->nodeGlobalIDs_ = new int[totalNodes]; temp_cnt = 0; - for (i = 0; i < totalNodes; i++) + for (i = 0; i < totalNodes; i++) { - if ( nodeArrayAux[i] >= 0 ) + if ( nodeArrayAux[i] >= 0 ) currBlock->nodeGlobalIDs_[temp_cnt++] = nodeArray[i]; } - for (i = 0; i < totalNodes; i++) + for (i = 0; i < totalNodes; i++) { - if ( nodeArrayAux[i] < 0 ) + if ( nodeArrayAux[i] < 0 ) currBlock->nodeGlobalIDs_[temp_cnt++] = nodeArray[i]; } delete [] nodeArray; @@ -976,7 +975,7 @@ int MLI_FEData::initComplete() MLI_Utils_IntQSort2( iauxArray, NULL, 0, nExtNodes-1); if ( nExtNodes > 0 ) nRecv = 1; for ( i = 1; i < nExtNodes; i++ ) - if (iauxArray[i] != iauxArray[nRecv-1]) + if (iauxArray[i] != iauxArray[nRecv-1]) iauxArray[nRecv++] = iauxArray[i]; if ( nRecv > 0 ) { @@ -984,7 +983,7 @@ int MLI_FEData::initComplete() for ( i = 0; i < nRecv; i++ ) recvProcs[i] = iauxArray[i]; recvLengs = new int[nRecv]; for ( i = 0; i < nRecv; i++ ) recvLengs[i] = 0; - for ( i = 0; i < nExtNodes; i++ ) + for ( i = 0; i < nExtNodes; i++ ) { index = MLI_Utils_BinarySearch( ownerP[i], recvProcs, nRecv ); recvLengs[index]++; @@ -995,15 +994,15 @@ int MLI_FEData::initComplete() if ( nExtNodes > 0 ) delete [] iauxArray; counter = 0; - for ( i = 0; i < numSharedNodes; i++ ) + for ( i = 0; i < numSharedNodes; i++ ) if ( sndrcvReg[i] == 0 ) counter += sharedNodeNProcs[i]; if ( counter > 0 ) iauxArray = new int[counter]; counter = 0; - for ( i = 0; i < numSharedNodes; i++ ) + for ( i = 0; i < numSharedNodes; i++ ) { - if ( sndrcvReg[i] == 0 ) + if ( sndrcvReg[i] == 0 ) { - for ( j = 0; j < sharedNodeNProcs[i]; j++ ) + for ( j = 0; j < sharedNodeNProcs[i]; j++ ) if ( sharedNodeProc[i][j] != mypid ) iauxArray[counter++] = sharedNodeProc[i][j]; } @@ -1017,62 +1016,62 @@ int MLI_FEData::initComplete() MLI_Utils_IntQSort2( iauxArray, NULL, 0, counter-1); nSend = 1; for ( i = 1; i < counter; i++ ) - if (iauxArray[i] != iauxArray[nSend-1]) + if (iauxArray[i] != iauxArray[nSend-1]) iauxArray[nSend++] = iauxArray[i]; sendProcs = new int[nSend]; for ( i = 0; i < nSend; i++ ) sendProcs[i] = iauxArray[i]; sendLengs = new int[nSend]; for ( i = 0; i < nSend; i++ ) sendLengs[i] = 0; - for ( i = 0; i < numSharedNodes; i++ ) + for ( i = 0; i < numSharedNodes; i++ ) { - if ( sndrcvReg[i] == 0 ) + if ( sndrcvReg[i] == 0 ) { for ( j = 0; j < sharedNodeNProcs[i]; j++ ) { - if ( sharedNodeProc[i][j] != mypid ) + if ( sharedNodeProc[i][j] != mypid ) { index = sharedNodeProc[i][j]; index = MLI_Utils_BinarySearch( index, sendProcs, nSend ); sendLengs[index]++; - } - } - } + } + } + } } sendBuf = new int*[nSend]; for ( i = 0; i < nSend; i++ ) sendBuf[i] = new int[sendLengs[i]]; for ( i = 0; i < nSend; i++ ) sendLengs[i] = 0; - for ( i = 0; i < numSharedNodes; i++ ) + for ( i = 0; i < numSharedNodes; i++ ) { - if ( sndrcvReg[i] == 0 ) + if ( sndrcvReg[i] == 0 ) { for ( j = 0; j < sharedNodeNProcs[i]; j++ ) { - if ( sharedNodeProc[i][j] != mypid ) + if ( sharedNodeProc[i][j] != mypid ) { index = sharedNodeProc[i][j]; index = MLI_Utils_BinarySearch( index, sendProcs, nSend ); index2 = searchNode( sharedNodeIDs[i] ); - sendBuf[index][sendLengs[index]++] = + sendBuf[index][sendLengs[index]++] = currBlock->nodeOffset_ + index2; - } - } - } - } + } + } + } + } } if ( counter > 0 ) delete [] iauxArray; if ( nRecv > 0 ) request = new MPI_Request[nRecv]; for ( i = 0; i < nRecv; i++ ) - MPI_Irecv( recvBuf[i], recvLengs[i], MPI_INT, + MPI_Irecv( recvBuf[i], recvLengs[i], MPI_INT, recvProcs[i], 183, mpiComm_, &request[i]); for ( i = 0; i < nSend; i++ ) - MPI_Send( sendBuf[i], sendLengs[i], MPI_INT, + MPI_Send( sendBuf[i], sendLengs[i], MPI_INT, sendProcs[i], 183, mpiComm_); for ( i = 0; i < nRecv; i++ ) MPI_Wait( &request[i], &status ); if ( nExtNodes > 0 ) currBlock->nodeExtNewGlobalIDs_ = new int[nExtNodes]; for ( i = 0; i < nRecv; i++ ) recvLengs[i] = 0; - for ( i = 0; i < nExtNodes; i++ ) + for ( i = 0; i < nExtNodes; i++ ) { index = MLI_Utils_BinarySearch( ownerP[i], recvProcs, nRecv ); j = recvBuf[index][recvLengs[index]++]; @@ -1105,14 +1104,14 @@ int MLI_FEData::initComplete() for ( j = 0; j < currBlock->elemNumNodes_; j++ ) { index = currBlock->elemNodeIDList_[i][j]; - searchInd = MLI_Utils_BinarySearch(index, nodeArray, + searchInd = MLI_Utils_BinarySearch(index, nodeArray, totalNodes-nExtNodes); if ( searchInd < 0 ) - searchInd = MLI_Utils_BinarySearch(index, - &(nodeArray[totalNodes-nExtNodes]), + searchInd = MLI_Utils_BinarySearch(index, + &(nodeArray[totalNodes-nExtNodes]), nExtNodes) + totalNodes - nExtNodes; for ( k = 0; k < spaceDimension_; k++ ) - nodeCoords[searchInd*spaceDimension_+k] = + nodeCoords[searchInd*spaceDimension_+k] = dtemp_array[(i*elemNumNodes+j)*spaceDimension_+k]; } } @@ -1142,18 +1141,18 @@ int MLI_FEData::initComplete() if ( faceArray[i] != faceArray[i-1] ) faceArray[totalFaces++] = faceArray[i]; - if ( totalFaces != currBlock->numLocalFaces_ && + if ( totalFaces != currBlock->numLocalFaces_ && currBlock->faceGlobalIDs_ == NULL ) { printf("initComplete WARNING : face IDs not initialized.\n"); } - else if ( totalFaces != currBlock->numLocalFaces_ && + else if ( totalFaces != currBlock->numLocalFaces_ && currBlock->faceGlobalIDs_ != NULL ) { printf("initComplete ERROR : numbers of face do not match.\n"); exit(1); } - else + else { delete [] currBlock->faceGlobalIDs_; currBlock->faceGlobalIDs_ = NULL; @@ -1176,12 +1175,12 @@ int MLI_FEData::initComplete() { for ( j = 0; j < sharedFaceNProcs[i]; j++ ) { - if ( sharedFaceProc[i][j] < mypid ) + if ( sharedFaceProc[i][j] < mypid ) { nExtFaces++; - index = MLI_Utils_BinarySearch( sharedFaceIDs[i], faceArray, + index = MLI_Utils_BinarySearch( sharedFaceIDs[i], faceArray, totalFaces); - if ( index < 0 ) + if ( index < 0 ) { printf("initComplete ERROR : shared node not in elements.\n"); exit(1); @@ -1196,24 +1195,24 @@ int MLI_FEData::initComplete() currBlock->numLocalFaces_ = totalFaces - nExtFaces; currBlock->faceGlobalIDs_ = new int[totalFaces]; temp_cnt = 0; - for (i = 0; i < totalFaces; i++) + for (i = 0; i < totalFaces; i++) { - if ( faceArray[i] >= 0 ) + if ( faceArray[i] >= 0 ) currBlock->faceGlobalIDs_[temp_cnt++] = faceArray[i]; } - for (i = 0; i < totalFaces; i++) + for (i = 0; i < totalFaces; i++) { - if ( faceArray[i] < 0 ) + if ( faceArray[i] < 0 ) currBlock->faceGlobalIDs_[temp_cnt++] = - faceArray[i]; } delete [] faceArray; } // ------------------------------------------------------------- - // --- get element, node and face offsets + // --- get element, node and face offsets // ------------------------------------------------------------- - MPI_Comm_size( mpiComm_, &numProcs ); + MPI_Comm_size( mpiComm_, &numProcs ); procArray = new int[numProcs]; procArray2 = new int[numProcs]; for ( i = 0; i < numProcs; i++ ) procArray2[i] = 0; @@ -1261,7 +1260,7 @@ int MLI_FEData::loadElemBlockMatrices(int nElems, int sMatDim, printf("loadElemBlockMatrices ERROR : nElems mismatch.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockMatrices ERROR : initialization not completed.\n"); exit(1); @@ -1270,7 +1269,7 @@ int MLI_FEData::loadElemBlockMatrices(int nElems, int sMatDim, #ifdef MLI_DEBUG_DETAILED printf("loadElemBlockMatrices Diagnostics: segFault test.\n"); double ddata; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { for (j = 0; j < sMatDim*sMatDim; j++) ddata = stiffMat[i][j]; } @@ -1288,19 +1287,19 @@ int MLI_FEData::loadElemBlockMatrices(int nElems, int sMatDim, } currBlock->elemStiffDim_ = sMatDim; currBlock->elemStiffMat_ = new double*[nElems]; - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { length = sMatDim * sMatDim; currBlock->elemStiffMat_[i] = new double[length]; index = currBlock->elemGlobalIDAux_[i]; row_darray = currBlock->elemStiffMat_[i]; - for ( j = 0; j < length; j++ ) row_darray[j] = stiffMat[index][j]; + for ( j = 0; j < length; j++ ) row_darray[j] = stiffMat[index][j]; } return 1; } //************************************************************************* -// load element nullspace for all elements +// load element nullspace for all elements //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockNullSpaces(int nElems, const int *nNSpace, @@ -1313,7 +1312,7 @@ int MLI_FEData::loadElemBlockNullSpaces(int nElems, const int *nNSpace, // --- initial checking // ------------------------------------------------------------- - (void) sMatDim; + (void) sMatDim; currBlock = elemBlockList_[currentElemBlock_]; if ( nElems != currBlock->numLocalElems_ ) @@ -1321,7 +1320,7 @@ int MLI_FEData::loadElemBlockNullSpaces(int nElems, const int *nNSpace, printf("loadElemBlockNullSpaces ERROR : nElems do not match.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockNullSpaces ERROR : initialization not complete.\n"); exit(1); @@ -1339,7 +1338,7 @@ int MLI_FEData::loadElemBlockNullSpaces(int nElems, const int *nNSpace, #ifdef MLI_DEBUG_DETAILED printf("loadElemBlockNullSpaces Diagnostics: segFault test.\n"); double ddata; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { length = nNSpace[i]; for (j = 0; j < sMatDim*length; j++) ddata = nSpace[i][j]; @@ -1364,7 +1363,7 @@ int MLI_FEData::loadElemBlockNullSpaces(int nElems, const int *nNSpace, } //************************************************************************* -// load element volumes for all elements +// load element volumes for all elements //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockVolumes(int nElems, const double *elemVols) @@ -1382,7 +1381,7 @@ int MLI_FEData::loadElemBlockVolumes(int nElems, const double *elemVols) printf("loadElemBlockVolumes ERROR : nElems do not match.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockVolumes ERROR : initialization not complete.\n"); exit(1); @@ -1410,7 +1409,7 @@ int MLI_FEData::loadElemBlockVolumes(int nElems, const double *elemVols) } //************************************************************************* -// load element material for all elements +// load element material for all elements //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockMaterials(int nElems, const int *elemMats) @@ -1428,7 +1427,7 @@ int MLI_FEData::loadElemBlockMaterials(int nElems, const int *elemMats) printf("loadElemBlockMaterials ERROR : nElems do not match.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockMaterials ERROR : initialization not complete.\n"); exit(1); @@ -1456,7 +1455,7 @@ int MLI_FEData::loadElemBlockMaterials(int nElems, const int *elemMats) } //************************************************************************* -// load element parent IDs for all elements +// load element parent IDs for all elements //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockParentIDs(int nElems, const int *elemPIDs) @@ -1474,7 +1473,7 @@ int MLI_FEData::loadElemBlockParentIDs(int nElems, const int *elemPIDs) printf("loadElemBlockParentIDs ERROR : nElems do not match.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockParentIDs ERROR : initialization not complete.\n"); exit(1); @@ -1501,7 +1500,7 @@ int MLI_FEData::loadElemBlockParentIDs(int nElems, const int *elemPIDs) } //************************************************************************* -// load element load +// load element load //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockLoads(int nElems, int loadDim, @@ -1526,7 +1525,7 @@ int MLI_FEData::loadElemBlockLoads(int nElems, int loadDim, printf("loadElemBlockLoads ERROR : loadDim invalid.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockLoads ERROR : initialization not complete.\n"); exit(1); @@ -1535,7 +1534,7 @@ int MLI_FEData::loadElemBlockLoads(int nElems, int loadDim, #ifdef MLI_DEBUG_DETAILED double ddata; printf("loadElemBlockLoads Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) for (j = 0; j < loadDim; j++) ddata = elemLoads[i][j]; printf("loadElemBlockLoads Diagnostics: passed the segFault test.\n"); #endif @@ -1565,7 +1564,7 @@ int MLI_FEData::loadElemBlockLoads(int nElems, int loadDim, } //************************************************************************* -// load element solution +// load element solution //------------------------------------------------------------------------- int MLI_FEData::loadElemBlockSolutions(int nElems, int solDim, @@ -1590,7 +1589,7 @@ int MLI_FEData::loadElemBlockSolutions(int nElems, int solDim, printf("loadElemBlockSolutions ERROR : solDim invalid."); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBlockSolutions ERROR : initialization not complete.\n"); exit(1); @@ -1599,7 +1598,7 @@ int MLI_FEData::loadElemBlockSolutions(int nElems, int solDim, #ifdef MLI_DEBUG_DETAILED printf("loadElemBlockSolutions Diagnostics: segFault test.\n"); double ddata; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) for (j = 0; j < loadDim; j++) ddata = elemSols[i][j]; printf("loadElemBlockSolutions Diagnostics: passed the segFault test.\n"); #endif @@ -1629,11 +1628,11 @@ int MLI_FEData::loadElemBlockSolutions(int nElems, int solDim, } //************************************************************************* -// load element boundary conditions +// load element boundary conditions //------------------------------------------------------------------------- -int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, - int elemDOF, const char * const *BCFlags, +int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, + int elemDOF, const char * const *BCFlags, const double *const *BCVals) { @@ -1659,7 +1658,7 @@ int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, printf("loadElemBCs ERROR : element DOF not valid.\n"); exit(1); } - if ( ! currBlock->initComplete_ ) + if ( ! currBlock->initComplete_ ) { printf("loadElemBCs ERROR : initialization not complete.\n"); exit(1); @@ -1669,7 +1668,7 @@ int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, printf("loadElemBCs Diagnostics: segFault test.\n"); char cdata; double ddata; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { j = eGlobalIDs[i]; for (j = 0; j < elemDOF; j++) cdata = BCFlags[i][j]; @@ -1703,7 +1702,7 @@ int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, { currBlock->elemBCIDList_[i] = eGlobalIDs[i]; bcData = currBlock->elemBCValues_[i]; - for ( j = 0; j < elemDOF; j++ ) + for ( j = 0; j < elemDOF; j++ ) { bcData[j] = BCVals[i][j]; currBlock->elemBCFlagList_[i][j] = BCFlags[i][j]; @@ -1713,10 +1712,10 @@ int MLI_FEData::loadElemBCs(int nElems, const int *eGlobalIDs, } //************************************************************************* -// load element node list and stiffness matrix +// load element node list and stiffness matrix //------------------------------------------------------------------------- -int MLI_FEData::loadElemMatrix(int eGlobalID, int eMatDim, +int MLI_FEData::loadElemMatrix(int eGlobalID, int eMatDim, const double *elemMat) { int i, j, index; @@ -1728,12 +1727,12 @@ int MLI_FEData::loadElemMatrix(int eGlobalID, int eMatDim, currBlock = elemBlockList_[currentElemBlock_]; #ifdef MLI_DEBUG_DETAILED - if ( ! currBlock->intComplete_ ) + if ( ! currBlock->intComplete_ ) { printf("loadElemMatrix ERROR : initialization not complete.\n"); exit(1); } - if (currBlock->elemStiffMat_ != NULL && eMatDim != currBlock->elemStiffDim_) + if (currBlock->elemStiffMat_ != NULL && eMatDim != currBlock->elemStiffDim_) { printf("loadElemMatrix ERROR : dimension mismatch.\n"); exit(1); @@ -1756,7 +1755,7 @@ int MLI_FEData::loadElemMatrix(int eGlobalID, int eMatDim, currBlock->elemStiffMat_[i] = NULL; currBlock->elemStiffDim_ = eMatDim; } - + // ------------------------------------------------------------- // --- search for the data holder // ------------------------------------------------------------- @@ -1780,14 +1779,14 @@ int MLI_FEData::loadElemMatrix(int eGlobalID, int eMatDim, // ------------------------------------------------------------- currBlock->elemStiffMat_[index] = new double[eMatDim*eMatDim]; - for ( j = 0; j < eMatDim*eMatDim; j++ ) + for ( j = 0; j < eMatDim*eMatDim; j++ ) currBlock->elemStiffMat_[index][j] = elemMat[j]; - + return 1; } //************************************************************************* -// load element nullspace +// load element nullspace //------------------------------------------------------------------------- int MLI_FEData::loadElemNullSpace(int eGlobalID, int numNS, int eMatDim, @@ -1803,7 +1802,7 @@ int MLI_FEData::loadElemNullSpace(int eGlobalID, int numNS, int eMatDim, currBlock = elemBlockList_[currentElemBlock_]; #ifdef MLI_DEBUG_DETAILED - if ( ! currBlock->intComplete_ ) + if ( ! currBlock->intComplete_ ) { printf("loadElemNullSpace ERROR : initialization not complete.\n"); exit(1); @@ -1860,7 +1859,7 @@ int MLI_FEData::loadElemNullSpace(int eGlobalID, int numNS, int eMatDim, } //************************************************************************* -// load element load (right hand side) +// load element load (right hand side) //------------------------------------------------------------------------- int MLI_FEData::loadElemLoad(int eGlobalID, int eMatDim, @@ -1876,7 +1875,7 @@ int MLI_FEData::loadElemLoad(int eGlobalID, int eMatDim, currBlock = elemBlockList_[currentElemBlock_]; #ifdef MLI_DEBUG_DETAILED - if ( ! currBlock->intComplete_ ) + if ( ! currBlock->intComplete_ ) { printf("loadElemLoad ERROR : initialization not complete.\n"); exit(1); @@ -1925,7 +1924,7 @@ int MLI_FEData::loadElemLoad(int eGlobalID, int eMatDim, } //************************************************************************* -// load element solution +// load element solution //------------------------------------------------------------------------- int MLI_FEData::loadElemSolution(int eGlobalID, int eMatDim, @@ -1941,7 +1940,7 @@ int MLI_FEData::loadElemSolution(int eGlobalID, int eMatDim, currBlock = elemBlockList_[currentElemBlock_]; #ifdef MLI_DEBUG_DETAILED - if ( ! currBlock->intComplete_ ) + if ( ! currBlock->intComplete_ ) { printf("loadElemSolution ERROR : initialization not complete.\n"); exit(1); @@ -1990,7 +1989,7 @@ int MLI_FEData::loadElemSolution(int eGlobalID, int eMatDim, } //************************************************************************* -// set node boundary condition +// set node boundary condition //------------------------------------------------------------------------- int MLI_FEData::loadNodeBCs(int nNodes, const int *nodeIDs, int nodeDOF, @@ -2029,7 +2028,7 @@ int MLI_FEData::loadNodeBCs(int nNodes, const int *nodeIDs, int nodeDOF, printf("loadNodeBCs Diagnostics: segFault test.\n"); char cdata; double ddata; - for (i = 0; i < nNodes; i++) + for (i = 0; i < nNodes; i++) { j = nodeIDs[i]; for (j = 0; j < nodeDOF; j++) cdata = BCFlags[i][j]; @@ -2083,7 +2082,7 @@ int MLI_FEData::getSpaceDimension(int& numDim) } //************************************************************************* -// get order of PDE +// get order of PDE //------------------------------------------------------------------------- int MLI_FEData::getOrderOfPDE(int& order) @@ -2093,7 +2092,7 @@ int MLI_FEData::getOrderOfPDE(int& order) } //************************************************************************* -// get order of FE +// get order of FE //------------------------------------------------------------------------- int MLI_FEData::getOrderOfFE(int& order) @@ -2103,7 +2102,7 @@ int MLI_FEData::getOrderOfFE(int& order) } //************************************************************************* -// get field size +// get field size //------------------------------------------------------------------------- int MLI_FEData::getFieldSize(int fieldID, int& fieldSize) @@ -2116,7 +2115,7 @@ int MLI_FEData::getFieldSize(int fieldID, int& fieldSize) } //************************************************************************* -// get number of local elements +// get number of local elements //------------------------------------------------------------------------- int MLI_FEData::getNumElements(int& nelems) @@ -2156,7 +2155,7 @@ int MLI_FEData::getElemFieldIDs(int numFields, int *fieldIDs) } //************************************************************************* -// get an element globalID +// get an element globalID //------------------------------------------------------------------------- int MLI_FEData::getElemGlobalID(int localID, int &globalID) @@ -2183,7 +2182,7 @@ int MLI_FEData::getElemGlobalID(int localID, int &globalID) } //************************************************************************* -// get all element globalIDs +// get all element globalIDs //------------------------------------------------------------------------- int MLI_FEData::getElemBlockGlobalIDs(int nElems, int *eGlobalIDs) @@ -2210,7 +2209,7 @@ int MLI_FEData::getElemBlockGlobalIDs(int nElems, int *eGlobalIDs) printf("getElemBlockGlobalIDs Diagnostics: passed the segFault test.\n"); #endif - for ( int j = 0; j < nElems; j++ ) + for ( int j = 0; j < nElems; j++ ) eGlobalIDs[j] = currBlock->elemGlobalIDs_[j]; return 1; } @@ -2227,7 +2226,7 @@ int MLI_FEData::getElemNumNodes(int& nNodes) } //************************************************************************* -// get element block nodelists +// get element block nodelists //------------------------------------------------------------------------- int MLI_FEData::getElemBlockNodeLists(int nElems, int nNodes, int **nodeList) @@ -2256,7 +2255,7 @@ int MLI_FEData::getElemBlockNodeLists(int nElems, int nNodes, int **nodeList) #ifdef MLI_DEBUG_DETAILED printf("getElemBlockNodeLists Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) for (j = 0; j < nNodes; j++) nodeList[i][j] = 0; printf("getElemBlockNodeLists Diagnostics: passed the segFault test.\n"); #endif @@ -2274,7 +2273,7 @@ int MLI_FEData::getElemBlockNodeLists(int nElems, int nNodes, int **nodeList) } //************************************************************************* -// get element matrices' dimension +// get element matrices' dimension //------------------------------------------------------------------------- int MLI_FEData::getElemMatrixDim(int& matDim) @@ -2285,7 +2284,7 @@ int MLI_FEData::getElemMatrixDim(int& matDim) } //************************************************************************* -// get all element stiffness matrices +// get all element stiffness matrices //------------------------------------------------------------------------- int MLI_FEData::getElemBlockMatrices(int nElems,int eMatDim,double **elemMat) @@ -2316,7 +2315,7 @@ int MLI_FEData::getElemBlockMatrices(int nElems,int eMatDim,double **elemMat) #ifdef MLI_DEBUG_DETAILED printf("getElemBlockMatrices Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) for (j = 0; j < eMatDim*eMatDim; j++) elemMat[i][j] = 0.0; printf("getElemBlockMatrices Diagnostics: passed the segFault test.\n"); #endif @@ -2364,7 +2363,7 @@ int MLI_FEData::getElemBlockNullSpaceSizes(int nElems, int *dimNS) #ifdef MLI_DEBUG_DETAILED printf("getElemBlockNullSpaceSizes Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) dimNS[i] = 0; + for (i = 0; i < nElems; i++) dimNS[i] = 0; printf("getElemBlockNullSpaceSizes Diagnostics: passed segFault test.\n"); #endif @@ -2381,10 +2380,10 @@ int MLI_FEData::getElemBlockNullSpaceSizes(int nElems, int *dimNS) } //************************************************************************* -// get all element nullspaces +// get all element nullspaces //------------------------------------------------------------------------- -int MLI_FEData::getElemBlockNullSpaces(int nElems, const int *dimNS, +int MLI_FEData::getElemBlockNullSpaces(int nElems, const int *dimNS, int eMatDim, double **nullSpaces) { int i,j; @@ -2416,8 +2415,8 @@ int MLI_FEData::getElemBlockNullSpaces(int nElems, const int *dimNS, #ifdef MLI_DEBUG_DETAILED printf("getElemBlockNullSpaces Diagnostics: segFault test.\n"); - for (i = 0; i < nElems; i++) - for (j = 0; j < dimNS[i]*eMatDim; j++) nullSpaces[i][j] = 0.0; + for (i = 0; i < nElems; i++) + for (j = 0; j < dimNS[i]*eMatDim; j++) nullSpaces[i][j] = 0.0; printf("getElemBlockNullSpaces Diagnostics: passed segFault test.\n"); #endif @@ -2425,21 +2424,21 @@ int MLI_FEData::getElemBlockNullSpaces(int nElems, const int *dimNS, // --- load nullspace sizes // ------------------------------------------------------------- - for ( i = 0; i < nElems; i++ ) + for ( i = 0; i < nElems; i++ ) { if ( dimNS[i] != currBlock->elemNumNS_[i] ) { printf("getElemBlockNullSpaces ERROR : dimension do not match.\n"); exit(1); } - for ( j = 0; j < eMatDim*dimNS[i]; j++ ) + for ( j = 0; j < eMatDim*dimNS[i]; j++ ) nullSpaces[i][j] = currBlock->elemNullSpace_[i][j]; } return 1; } //************************************************************************* -// get all element volumes +// get all element volumes //------------------------------------------------------------------------- int MLI_FEData::getElemBlockVolumes(int nElems, double *elemVols) @@ -2466,7 +2465,7 @@ int MLI_FEData::getElemBlockVolumes(int nElems, double *elemVols) } // ------------------------------------------------------------- - // --- load element volumes + // --- load element volumes // ------------------------------------------------------------- for ( int i = 0; i < nElems; i++ ) elemVols[i] = currBlock->elemVolume_[i]; @@ -2475,7 +2474,7 @@ int MLI_FEData::getElemBlockVolumes(int nElems, double *elemVols) } //************************************************************************* -// get all element materials +// get all element materials //------------------------------------------------------------------------- int MLI_FEData::getElemBlockMaterials(int nElems, int *elemMats) @@ -2502,7 +2501,7 @@ int MLI_FEData::getElemBlockMaterials(int nElems, int *elemMats) } // ------------------------------------------------------------- - // --- load element materials + // --- load element materials // ------------------------------------------------------------- for (int i = 0; i < nElems; i++) elemMats[i] = currBlock->elemMaterial_[i]; @@ -2511,7 +2510,7 @@ int MLI_FEData::getElemBlockMaterials(int nElems, int *elemMats) } //************************************************************************* -// get all element parent IDs +// get all element parent IDs //------------------------------------------------------------------------- int MLI_FEData::getElemBlockParentIDs(int nElems, int *parentIDs) @@ -2538,7 +2537,7 @@ int MLI_FEData::getElemBlockParentIDs(int nElems, int *parentIDs) } // ------------------------------------------------------------- - // --- load element parent IDs + // --- load element parent IDs // ------------------------------------------------------------- for (int i = 0; i < nElems; i++) parentIDs[i] = currBlock->elemParentIDs_[i]; @@ -2634,7 +2633,7 @@ int MLI_FEData::getElemNodeList(int eGlobalID, int nNodes, int *nodeList) } //************************************************************************* -// get an element matrix +// get an element matrix //------------------------------------------------------------------------- int MLI_FEData::getElemMatrix(int eGlobalID, int eMatDim, double *elemMat) @@ -2710,10 +2709,10 @@ int MLI_FEData::getElemNullSpaceSize(int eGlobalID, int &dimNS) } //************************************************************************* -// get an element nullspace +// get an element nullspace //------------------------------------------------------------------------- -int MLI_FEData::getElemNullSpace(int eGlobalID, int dimNS, int eMatDim, +int MLI_FEData::getElemNullSpace(int eGlobalID, int dimNS, int eMatDim, double *nullSpaces) { // ------------------------------------------------------------- @@ -2747,13 +2746,13 @@ int MLI_FEData::getElemNullSpace(int eGlobalID, int dimNS, int eMatDim, printf("getElemNullSpace ERROR : element not found.\n"); exit(1); } - for ( int i = 0; i < eMatDim*dimNS; i++ ) + for ( int i = 0; i < eMatDim*dimNS; i++ ) nullSpaces[i] = currBlock->elemNullSpace_[index][i]; return 1; } //************************************************************************* -// get an element volume +// get an element volume //------------------------------------------------------------------------- int MLI_FEData::getElemVolume(int eGlobalID, double &elemVol) @@ -2775,7 +2774,7 @@ int MLI_FEData::getElemVolume(int eGlobalID, double &elemVol) } // ------------------------------------------------------------- - // --- load element volumes + // --- load element volumes // ------------------------------------------------------------- int index = searchElement(eGlobalID); @@ -2790,7 +2789,7 @@ int MLI_FEData::getElemVolume(int eGlobalID, double &elemVol) } //************************************************************************* -// get an element material +// get an element material //------------------------------------------------------------------------- int MLI_FEData::getElemMaterial(int eGlobalID, int &elemMat) @@ -2812,7 +2811,7 @@ int MLI_FEData::getElemMaterial(int eGlobalID, int &elemMat) } // ------------------------------------------------------------- - // --- load element material + // --- load element material // ------------------------------------------------------------- int index = searchElement(eGlobalID); @@ -2827,7 +2826,7 @@ int MLI_FEData::getElemMaterial(int eGlobalID, int &elemMat) } //************************************************************************* -// get all element parent IDs +// get all element parent IDs //------------------------------------------------------------------------- int MLI_FEData::getElemParentID(int eGlobalID, int &parentID) @@ -2849,7 +2848,7 @@ int MLI_FEData::getElemParentID(int eGlobalID, int &parentID) } // ------------------------------------------------------------- - // --- load element parent IDs + // --- load element parent IDs // ------------------------------------------------------------- int index = searchElement(eGlobalID); @@ -2915,7 +2914,7 @@ int MLI_FEData::getNumBCElems(int& nElems) // get number of boundary elements //------------------------------------------------------------------------- -int MLI_FEData::getElemBCs(int nElems, int *eGlobalIDs, int eDOFs, +int MLI_FEData::getElemBCs(int nElems, int *eGlobalIDs, int eDOFs, char **fieldFlag, double **BCVals) { // ------------------------------------------------------------- @@ -2956,7 +2955,7 @@ int MLI_FEData::getElemBCs(int nElems, int *eGlobalIDs, int eDOFs, } //************************************************************************* -// get number of total nodes (local + external) +// get number of total nodes (local + external) //------------------------------------------------------------------------- int MLI_FEData::getNumNodes(int& nNodes) @@ -2967,7 +2966,7 @@ int MLI_FEData::getNumNodes(int& nNodes) } //************************************************************************* -// get all node globalIDs +// get all node globalIDs //------------------------------------------------------------------------- int MLI_FEData::getNodeBlockGlobalIDs(int nNodes, int *nGlobalIDs) @@ -2992,7 +2991,7 @@ int MLI_FEData::getNodeBlockGlobalIDs(int nNodes, int *nGlobalIDs) // --- get nodal global IDs // ------------------------------------------------------------- - for (int i = 0; i < nNodes; i++) + for (int i = 0; i < nNodes; i++) nGlobalIDs[i] = currBlock->nodeGlobalIDs_[i]; return 1; } @@ -3021,7 +3020,7 @@ int MLI_FEData::getNodeFieldIDs(int numFields, int *fieldIDs) } //************************************************************************* -// get all node coordinates +// get all node coordinates //------------------------------------------------------------------------- int MLI_FEData::getNodeBlockCoordinates(int nNodes, int spaceDim, @@ -3052,7 +3051,7 @@ int MLI_FEData::getNodeBlockCoordinates(int nNodes, int spaceDim, // --- get nodal coordinates // ------------------------------------------------------------- - for (int i = 0; i < nNodes*spaceDim; i++) + for (int i = 0; i < nNodes*spaceDim; i++) coordinates[i] = currBlock->nodeCoordinates_[i]; return 1; } @@ -3072,7 +3071,7 @@ int MLI_FEData::getNumBCNodes(int& nNodes) // get number of boundary nodes //------------------------------------------------------------------------- -int MLI_FEData::getNodeBCs(int nNodes, int *nGlobalIDs, int nDOFs, +int MLI_FEData::getNodeBCs(int nNodes, int *nGlobalIDs, int nDOFs, char **fieldFlag, double **BCVals) { // ------------------------------------------------------------- @@ -3147,7 +3146,7 @@ int MLI_FEData::getSharedNodeNumProcs(int nNodes, int *nGlobalIDs, } // ------------------------------------------------------------- - // --- get information + // --- get information // ------------------------------------------------------------- for ( int i = 0; i < nNodes; i++ ) @@ -3182,7 +3181,7 @@ int MLI_FEData::getSharedNodeProcs(int nNodes, int *numProcs, } // ------------------------------------------------------------- - // --- get information + // --- get information // ------------------------------------------------------------- for ( int i = 0; i < nNodes; i++ ) @@ -3199,7 +3198,7 @@ int MLI_FEData::getSharedNodeProcs(int nNodes, int *numProcs, } //************************************************************************* -// get number of faces +// get number of faces //------------------------------------------------------------------------- int MLI_FEData::getNumFaces(int &nFaces) @@ -3215,7 +3214,7 @@ int MLI_FEData::getNumFaces(int &nFaces) } //************************************************************************* -// get all face globalIDs +// get all face globalIDs //------------------------------------------------------------------------- int MLI_FEData::getFaceBlockGlobalIDs(int nFaces, int *fGlobalIDs) @@ -3231,13 +3230,13 @@ int MLI_FEData::getFaceBlockGlobalIDs(int nFaces, int *fGlobalIDs) printf("getFaceBlockGlobalIDs ERROR : nFaces mismatch.\n"); exit(1); } - for ( int i = 0; i < nFaces; i++ ) + for ( int i = 0; i < nFaces; i++ ) fGlobalIDs[i] = currBlock->faceGlobalIDs_[i]; return 1; } //************************************************************************* -// get number of shared faces +// get number of shared faces //------------------------------------------------------------------------- int MLI_FEData::getNumSharedFaces(int &nFaces) @@ -3276,7 +3275,7 @@ int MLI_FEData::getSharedFaceNumProcs(int nFaces, int *fGlobalIDs, } // ------------------------------------------------------------- - // --- get information + // --- get information // ------------------------------------------------------------- for ( int i = 0; i < nFaces; i++ ) @@ -3311,7 +3310,7 @@ int MLI_FEData::getSharedFaceProcs(int nFaces, int *numProcs, } // ------------------------------------------------------------- - // --- get information + // --- get information // ------------------------------------------------------------- for ( int i = 0; i < nFaces; i++ ) @@ -3344,7 +3343,7 @@ int MLI_FEData::getFaceNumNodes(int &nNodes) } //************************************************************************* -// get block face node list +// get block face node list //------------------------------------------------------------------------- int MLI_FEData::getFaceBlockNodeLists(int nFaces, int nNodesPerFace, @@ -3379,12 +3378,12 @@ int MLI_FEData::getFaceBlockNodeLists(int nFaces, int nNodesPerFace, for ( int i = 0; i < nFaces; i++ ) for ( int j = 0; j < nNodesPerFace; j++ ) nGlobalIDLists[i][j] = currBlock->faceNodeIDList_[i][j]; - + return 1; } //************************************************************************* -// get face node list +// get face node list //------------------------------------------------------------------------- int MLI_FEData::getFaceNodeList(int fGlobalID, int nNodes, int *nodeList) @@ -3418,12 +3417,12 @@ int MLI_FEData::getFaceNodeList(int fGlobalID, int nNodes, int *nodeList) } for ( int i = 0; i < nNodes; i++ ) nodeList[i] = currBlock->faceNodeIDList_[index][i]; - + return 1; } //************************************************************************* -// load in the function to calculate shape function interpolant +// load in the function to calculate shape function interpolant //------------------------------------------------------------------------- int MLI_FEData::loadFunc_computeShapeFuncInterpolant(void *object, int (*func) @@ -3435,10 +3434,10 @@ int MLI_FEData::loadFunc_computeShapeFuncInterpolant(void *object, int (*func) } //************************************************************************* -// get shape function interpolant +// get shape function interpolant //------------------------------------------------------------------------- -int MLI_FEData::getShapeFuncInterpolant(int elemID, int nNodes, +int MLI_FEData::getShapeFuncInterpolant(int elemID, int nNodes, const double *coord, double *coef) { USR_computeShapeFuncInterpolant(USR_FEGridObj_, elemID, nNodes, @@ -3454,7 +3453,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) { int mypid, nprocs; MLI_ElemBlock *currBlock = elemBlockList_[currentElemBlock_]; - + // ------------------------------------------------------------- // --- error checking // ------------------------------------------------------------- @@ -3466,7 +3465,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) } // ------------------------------------------------------------- - // --- output help menu + // --- output help menu // ------------------------------------------------------------- MPI_Comm_rank( mpiComm_, &mypid); @@ -3507,11 +3506,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) if ( ! strcmp("getElemOffset",data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("implSpecificRequests ERROR : getElemOffset - argc < 1.\n"); exit(1); - } + } int *offset = (int *) argv[0]; (*offset) = currBlock->elemOffset_; return 1; @@ -3521,11 +3520,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getNodeOffset", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getNodeOffset - argc < 1.\n"); exit(1); - } + } int *offset = (int *) argv[0]; (*offset) = currBlock->nodeOffset_; return 1; @@ -3535,11 +3534,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getFaceOffset", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getFaceOffset - argc < 1.\n"); exit(1); - } + } int *offset = (int *) argv[0]; (*offset) = currBlock->faceOffset_; return 1; @@ -3549,11 +3548,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getNumExtNodes", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getNumExtNodes - argc < 1.\n"); exit(1); - } + } int *nNodesExt = (int *) argv[0]; (*nNodesExt) = currBlock->numExternalNodes_; return 1; @@ -3563,11 +3562,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getNumExtFaces", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getNumExtFaces - argc < 1.\n"); exit(1); - } + } int *nFacesExt = (int *) argv[0]; (*nFacesExt) = currBlock->numExternalFaces_; return 1; @@ -3577,11 +3576,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getExtNodeNewGlobalIDs", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getExtNodeNewGlobalIDs-argc<1\n"); exit(1); - } + } int *newGlobalIDs = (int *) argv[0]; for ( int i = 0; i < currBlock->numExternalNodes_; i++ ) newGlobalIDs[i] = currBlock->nodeExtNewGlobalIDs_[i]; @@ -3592,11 +3591,11 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) else if ( ! strcmp("getExtFaceNewGlobalIDs", data_key) ) { - if ( argc < 1 ) + if ( argc < 1 ) { printf("impSpecificRequests ERROR : getExtFaceNewGlobalIDs-argc<1\n"); exit(1); - } + } int *newGlobalIDs = (int *) argv[0]; for ( int j = 0; j < currBlock->numExternalFaces_; j++ ) newGlobalIDs[j] = currBlock->faceExtNewGlobalIDs_[j]; @@ -3660,28 +3659,28 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) } } - // find out how many distinct processor numbers and fill the + // find out how many distinct processor numbers and fill the // send buffer if ( nNodesExt > 0 ) procList = new int[mypid]; else procList = NULL; for ( i = 0; i < nNodesExt; i++ ) procList[i] = 0; - for ( i = 0; i < nNodesExt; i++ ) + for ( i = 0; i < nNodesExt; i++ ) procList[owner[index]] += ncols[i+nNodes] + 2; nSends = 0; for ( i = 0; i < mypid; i++ ) if ( procList[i] > 0 ) nSends++; sendLengs = NULL; sendProcs = NULL; sendBufs = NULL; - if ( nSends > 0 ) + if ( nSends > 0 ) { sendLengs = new int[nSends]; sendProcs = new int[nSends]; sendBufs = new int*[nSends]; nSends = 0; - for ( i = 0; i < mypid; i++ ) + for ( i = 0; i < mypid; i++ ) { - if ( procList[i] > 0 ) + if ( procList[i] > 0 ) { sendLengs[nSends] = procList[i]; sendProcs[nSends] = i; @@ -3691,14 +3690,14 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) } } nSends = 0; - for ( i = 0; i < mypid; i++ ) - if ( procList[i] > 0 ) procList[i] = nSends++; + for ( i = 0; i < mypid; i++ ) + if ( procList[i] > 0 ) procList[i] = nSends++; for ( i = 0; i < nNodesExt; i++ ) owner[i] = procList[owner[i]]; - for ( i = 0; i < nNodesExt; i++ ) + for ( i = 0; i < nNodesExt; i++ ) { sendBufs[owner[i]][sendLengs[owner[i]]++] = nodeList[i+nNodes]; sendBufs[owner[i]][sendLengs[owner[i]]++] = ncols[i+nNodes]; - for ( j = 0; j < ncols[i+nNodes]; j++ ) + for ( j = 0; j < ncols[i+nNodes]; j++ ) sendBufs[owner[i]][sendLengs[owner[i]]++] = cols[i+nNodes][j]; } } @@ -3723,33 +3722,33 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) recvLengs = new int[nRecvs]; pSrc = MPI_ANY_SOURCE; msgID = 33420; - for ( i = 0; i < nRecvs; i++ ) + for ( i = 0; i < nRecvs; i++ ) MPI_Irecv(&recvLengs[i],1,MPI_INT,pSrc,msgID,mpiComm_,&request[i]); } if ( nSends > 0 ) { msgID = 33420; - for ( i = 0; i < nSends; i++ ) + for ( i = 0; i < nSends; i++ ) MPI_Send(&sendLengs[i],1,MPI_INT,sendProcs[i],msgID,mpiComm_); } if ( nRecvs > 0 ) { recvProcs = new int[nRecvs]; recvBufs = new int*[nRecvs]; - for ( i = 0; i < nRecvs; i++ ) + for ( i = 0; i < nRecvs; i++ ) { MPI_Wait( &request[i], &status ); recvProcs[i] = status.MPI_SOURCE; recvBufs[i] = new int[recvLengs[i]]; } } - + // now send/receive the external information if ( nRecvs > 0 ) { msgID = 33421; - for ( i = 0; i < nRecvs; i++ ) + for ( i = 0; i < nRecvs; i++ ) { pSrc = recvProcs[i]; length = recvLengs[i]; @@ -3760,7 +3759,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) if ( nSends > 0 ) { msgID = 33421; - for ( i = 0; i < nSends; i++ ) + for ( i = 0; i < nSends; i++ ) { pSrc = sendProcs[i]; length = sendLengs[i]; @@ -3772,7 +3771,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) { for ( i = 0; i < nRecvs; i++ ) MPI_Wait( &request[i], &status ); } - + // owners of shared nodes receive data for( i = 0; i < nRecvs; i++ ) @@ -3791,7 +3790,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) } columns = new int[ncols[index]+length]; for ( j = 0; j < ncols[index]; j++ ) columns[j] = cols[index][j]; - for ( j = 0; j < length; j++ ) + for ( j = 0; j < length; j++ ) columns[ncols[index]++] = iBuf[j+ncnt]; ncnt += length; delete [] cols[index]; @@ -3824,7 +3823,7 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) int *columns, l, k; MPI_Request request; MPI_Status Status; - + // get the owners for the external faces int nFaces = currBlock->numLocalFaces_; @@ -3847,19 +3846,19 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) owner[index] = pnum; } } - + // external faces send with which elements are connected for ( i = 0; i < nFacesExt; i++ ) - MPI_Isend(cols[i+nFaces], ncols[i+nFaces], MPI_INT, + MPI_Isend(cols[i+nFaces], ncols[i+nFaces], MPI_INT, owner[i], faceList[i+nFaces], mpiComm_, &request); - + // owners of shared faces receive data for ( i = 0; i < numSharedFaces; i++ ) { ind[i] = MLI_Utils_BinarySearch(sharedFaceList[i], faceList, nFaces); - + // the shared face is owned by this subdomain if (ind[i] >= 0) @@ -3872,10 +3871,10 @@ int MLI_FEData::impSpecificRequests(char *data_key, int argc, char **argv) MPI_Get_count( &Status, MPI_INT, &n); k = MLI_Utils_BinarySearch(Status.MPI_TAG,faceList,nFaces); columns = new int[ncols[k]+n]; - + for( l = 0; l < ncols[k]; l++ ) columns[l] = cols[k][l]; for( l = 0; l < n; l++ ) columns[ncols[k]++] = Buf[l]; - + delete [] cols[k]; cols[k] = columns; } @@ -3914,9 +3913,9 @@ int MLI_FEData::readFromFile(char *infile) // number of elements // number of nodes per element // number of element fields - // element field IDs... + // element field IDs... // number of nodal fields - // nodal field IDs... + // nodal field IDs... // element global IDs (nElems of them) // element node list (nElems*nNodesPerElem of them) // ------------------------------------------------------------- @@ -3954,14 +3953,14 @@ int MLI_FEData::readFromFile(char *infile) IDLists = new int*[nElems]; for (i = 0; i < nElems; i++) IDLists[i] = new int[nNodesPerElem]; - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { for (j = 0; j < nNodesPerElem; j++) fscanf(fp, "%d", &(IDLists[i][j])); } fclose(fp); // ------------------------------------------------------------- - // --- read coordinate file, if present + // --- read coordinate file, if present // Format : number of nodes // space dimension // node global ID x y z ... @@ -3977,24 +3976,24 @@ int MLI_FEData::readFromFile(char *infile) fscanf(fp, "%d", &spaceDim); nodeIDs = new int[nNodes]; nodeCoords = new double[nNodes * spaceDim]; - for (i = 0; i < nNodes; i++) + for (i = 0; i < nNodes; i++) { fscanf(fp, "%d", &(nodeIDs[i])); - for (j = 0; j < spaceDim; j++) + for (j = 0; j < spaceDim; j++) fscanf(fp, "%lg", &(nodeCoords[i*spaceDim+j])); } fclose(fp); nodeIDAux = new int[nNodes]; - for (i = 0; i < nNodes; i++) nodeIDAux[i] = i; + for (i = 0; i < nNodes; i++) nodeIDAux[i] = i; newCoords = new double*[nElems]; - for (i = 0; i < nElems; i++) - newCoords[i] = new double[nNodesPerElem*spaceDim]; + for (i = 0; i < nElems; i++) + newCoords[i] = new double[nNodesPerElem*spaceDim]; MLI_Utils_IntQSort2(nodeIDs, nodeIDAux, 0, nNodes-1); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { - for (j = 0; j < nNodesPerElem; j++) + for (j = 0; j < nNodesPerElem; j++) { index = MLI_Utils_BinarySearch(IDLists[i][j], nodeIDs, nNodes); if ( index < 0 ) @@ -4002,7 +4001,7 @@ int MLI_FEData::readFromFile(char *infile) printf("readFromFile ERROR : element node ID not found.\n"); exit(1); } - for (k = 0; k < spaceDim; k++) + for (k = 0; k < spaceDim; k++) { index2 = nodeIDAux[index]; newCoords[i][j*spaceDim+k] = nodeCoords[index2*spaceDim+k]; @@ -4043,7 +4042,7 @@ int MLI_FEData::readFromFile(char *infile) if ( nodeIDAux != NULL ) delete [] nodeIDAux; if ( elemFieldIDs != NULL ) delete [] elemFieldIDs; if ( nodeFieldIDs != NULL ) delete [] nodeFieldIDs; - + // ------------------------------------------------------------- // --- read and shared nodes information // ------------------------------------------------------------- @@ -4058,11 +4057,11 @@ int MLI_FEData::readFromFile(char *infile) nodeIDs = new int[nNodes]; numProcs = new int[nNodes]; procLists = new int*[nNodes]; - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { fscanf(fp, "%d %d", &(nodeIDs[i]), &(numProcs[i])); procLists[i] = new int[numProcs[i]]; - for ( j = 0; j < numProcs[i]; j++ ) + for ( j = 0; j < numProcs[i]; j++ ) fscanf(fp, "%d", &(procLists[i][j])); } initSharedNodes(nNodes, nodeIDs, numProcs, procLists); @@ -4123,21 +4122,21 @@ int MLI_FEData::readFromFile(char *infile) nodeBCFlags = new char*[nNodes]; nodeBCVals = new double*[nNodes]; for ( i = 0; i < nNodes; i++ ) nodeBCFlags[i] = new char[nodeDOF]; - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { nodeBCVals[i] = new double[nodeDOF]; - for ( j = 0; j < nodeDOF; j++ ) nodeBCVals[i][j] = 0.0; + for ( j = 0; j < nodeDOF; j++ ) nodeBCVals[i][j] = 0.0; } - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { fscanf(fp, "%d", &(nodeIDs[i])); - for ( j = 0; j < nodeDOF; j++ ) + for ( j = 0; j < nodeDOF; j++ ) { fscanf(fp, "%d", &k); if ( k > 0 ) { nodeBCFlags[i][j] = 'Y'; - fscanf(fp, "%lg", &(nodeBCVals[i][j])); + fscanf(fp, "%lg", &(nodeBCVals[i][j])); } else nodeBCFlags[i][j] = 'N'; } @@ -4184,9 +4183,9 @@ int MLI_FEData::writeToFile(char *infile) // number of elements // number of nodes per element // number of element fields - // element field IDs... + // element field IDs... // number of nodal fields - // nodal field IDs... + // nodal field IDs... // element global IDs (nElems of them) // element node list (nElems*nNodesPerElem of them) // ------------------------------------------------------------- @@ -4217,32 +4216,32 @@ int MLI_FEData::writeToFile(char *infile) fprintf(fp, "%12d\n", numFields_); for ( i = 0; i < numFields_; i++ ) fprintf(fp, "%12d %12d\n", fieldIDs_[i], fieldSizes_[i]); - + nElems = currBlock->numLocalElems_; fprintf(fp, "%12d\n", nElems); fprintf(fp, "%12d\n", currBlock->elemNumNodes_); fprintf(fp, "%12d\n", currBlock->elemNumFields_); - for (i = 0; i < currBlock->elemNumFields_; i++) + for (i = 0; i < currBlock->elemNumFields_; i++) fprintf(fp, "%12d\n", currBlock->elemFieldIDs_[i]); fprintf(fp, "%12d\n", currBlock->nodeNumFields_); - for (i = 0; i < currBlock->nodeNumFields_; i++) + for (i = 0; i < currBlock->nodeNumFields_; i++) fprintf(fp, "%12d\n", currBlock->nodeFieldIDs_[i]); fprintf(fp, "\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) fprintf(fp, "%12d\n", currBlock->elemGlobalIDs_[i]); fprintf(fp,"\n"); - for (i = 0; i < nElems; i++) + for (i = 0; i < nElems; i++) { - for ( j = 0; j < currBlock->elemNumNodes_; j++ ) + for ( j = 0; j < currBlock->elemNumNodes_; j++ ) fprintf(fp, "%d ", currBlock->elemNodeIDList_[i][j]); fprintf(fp,"\n"); - } + } fclose(fp); // ------------------------------------------------------------- - // --- write coordinate file, if needed + // --- write coordinate file, if needed // Format : number of nodes // space dimension // node global ID x y z ... @@ -4266,10 +4265,10 @@ int MLI_FEData::writeToFile(char *infile) nNodes = currBlock->numLocalNodes_ + currBlock->numExternalNodes_; fprintf(fp, "%12d\n", nNodes); fprintf(fp, "%12d\n", spaceDimension_); - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { fprintf(fp, "%12d", currBlock->nodeGlobalIDs_[i]); - for (j = 0; j < spaceDimension_; j++) + for (j = 0; j < spaceDimension_; j++) fprintf(fp, "%20.12e", currBlock->nodeCoordinates_[i*spaceDimension_+j]); fprintf(fp,"\n"); @@ -4299,11 +4298,11 @@ int MLI_FEData::writeToFile(char *infile) fprintf(fp, "#\n"); fprintf(fp, "%d\n", nNodes); - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { - fprintf(fp, "%12d %12d\n", currBlock->sharedNodeIDs_[i], + fprintf(fp, "%12d %12d\n", currBlock->sharedNodeIDs_[i], currBlock->sharedNodeNProcs_[i]); - for ( j = 0; j < currBlock->sharedNodeNProcs_[i]; j++ ) + for ( j = 0; j < currBlock->sharedNodeNProcs_[i]; j++ ) fprintf(fp, "%12d\n", currBlock->sharedNodeProc_[i][j]); } fclose(fp); @@ -4365,9 +4364,9 @@ int MLI_FEData::writeToFile(char *infile) fprintf(fp, "%d\n", nNodes ); fprintf(fp, "%d\n", nodeDOF ); - for ( i = 0; i < nNodes; i++ ) + for ( i = 0; i < nNodes; i++ ) { - for ( j = 0; j < nodeDOF; j++ ) + for ( j = 0; j < nodeDOF; j++ ) { if ( currBlock->nodeBCFlagList_[i][j] == 'Y' ) fprintf(fp, "%12d 1 %25.16e\n", currBlock->nodeBCIDList_[i], @@ -4382,7 +4381,7 @@ int MLI_FEData::writeToFile(char *infile) } /************************************************************************** - * constructor for the elemBlock + * constructor for the elemBlock *-----------------------------------------------------------------------*/ int MLI_FEData::createElemBlock(int blockID) @@ -4479,7 +4478,7 @@ int MLI_FEData::createElemBlock(int blockID) } /************************************************************************** - * destructor for the elemBlock + * destructor for the elemBlock *-----------------------------------------------------------------------*/ int MLI_FEData::deleteElemBlock(int blockID) @@ -4508,16 +4507,16 @@ int MLI_FEData::deleteElemBlock(int blockID) currBlock = elemBlockList_[blockID]; if (currBlock->elemGlobalIDs_ != NULL) delete [] currBlock->elemGlobalIDs_; - if (currBlock->elemGlobalIDAux_ != NULL) + if (currBlock->elemGlobalIDAux_ != NULL) delete [] currBlock->elemGlobalIDAux_; if (currBlock->elemFieldIDs_ != NULL) delete [] currBlock->elemFieldIDs_; - if (currBlock->elemNodeIDList_ != NULL) + if (currBlock->elemNodeIDList_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemNodeIDList_[i]; delete [] currBlock->elemNodeIDList_; } - if (currBlock->elemStiffMat_ != NULL) + if (currBlock->elemStiffMat_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemStiffMat_[i]; @@ -4528,26 +4527,26 @@ int MLI_FEData::deleteElemBlock(int blockID) if (currBlock->elemVolume_ != NULL) delete [] currBlock->elemVolume_; if (currBlock->elemMaterial_ != NULL) delete [] currBlock->elemMaterial_; if (currBlock->elemParentIDs_ != NULL) delete [] currBlock->elemParentIDs_; - if (currBlock->elemLoads_ != NULL) + if (currBlock->elemLoads_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemLoads_[i]; delete [] currBlock->elemLoads_; } - if (currBlock->elemSol_ != NULL) + if (currBlock->elemSol_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemSol_[i]; delete [] currBlock->elemSol_; } - if (currBlock->elemFaceIDList_ != NULL) + if (currBlock->elemFaceIDList_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemFaceIDList_[i]; delete [] currBlock->elemFaceIDList_; } if (currBlock->elemBCIDList_ != NULL) delete [] currBlock->elemBCIDList_; - if (currBlock->elemBCFlagList_ != NULL) + if (currBlock->elemBCFlagList_ != NULL) { for ( i = 0; i < currBlock->numLocalElems_; i++ ) delete [] currBlock->elemBCFlagList_[i]; @@ -4567,7 +4566,7 @@ int MLI_FEData::deleteElemBlock(int blockID) if (currBlock->nodeGlobalIDs_ != NULL) delete [] currBlock->nodeGlobalIDs_; if (currBlock->nodeFieldIDs_ != NULL) delete [] currBlock->nodeFieldIDs_; - if (currBlock->nodeCoordinates_ != NULL) + if (currBlock->nodeCoordinates_ != NULL) delete [] currBlock->nodeCoordinates_; if (currBlock->nodeBCIDList_ != NULL) delete [] currBlock->nodeBCIDList_; if (currBlock->nodeBCFlagList_ != NULL) @@ -4580,9 +4579,9 @@ int MLI_FEData::deleteElemBlock(int blockID) delete [] currBlock->nodeBCValues_; } if (currBlock->sharedNodeIDs_ != NULL) delete [] currBlock->sharedNodeIDs_; - if (currBlock->sharedNodeNProcs_ != NULL) + if (currBlock->sharedNodeNProcs_ != NULL) delete [] currBlock->sharedNodeNProcs_; - if (currBlock->sharedNodeProc_ != NULL) + if (currBlock->sharedNodeProc_ != NULL) { for ( i = 0; i < currBlock->numSharedNodes_; i++ ) delete [] currBlock->sharedNodeProc_[i]; @@ -4599,7 +4598,7 @@ int MLI_FEData::deleteElemBlock(int blockID) currBlock->nodeOffset_ = 0; if (currBlock->faceGlobalIDs_ != NULL) delete [] currBlock->faceGlobalIDs_; - if (currBlock->faceNodeIDList_ != NULL) + if (currBlock->faceNodeIDList_ != NULL) { int nFaces = currBlock->numLocalFaces_ + currBlock->numExternalFaces_; for ( i = 0; i < nFaces; i++ ) @@ -4607,9 +4606,9 @@ int MLI_FEData::deleteElemBlock(int blockID) delete [] currBlock->faceNodeIDList_; } if (currBlock->sharedFaceIDs_ != NULL) delete [] currBlock->sharedFaceIDs_; - if (currBlock->sharedFaceNProcs_ != NULL) + if (currBlock->sharedFaceNProcs_ != NULL) delete [] currBlock->sharedFaceNProcs_; - if (currBlock->sharedFaceProc_ != NULL) + if (currBlock->sharedFaceProc_ != NULL) { for ( i = 0; i < currBlock->numSharedFaces_; i++ ) delete [] currBlock->sharedFaceProc_[i]; @@ -4636,7 +4635,7 @@ int MLI_FEData::searchElement(int key) int index; MLI_ElemBlock *currBlock = elemBlockList_[currentElemBlock_]; - index = MLI_Utils_BinarySearch(key, currBlock->elemGlobalIDs_, + index = MLI_Utils_BinarySearch(key, currBlock->elemGlobalIDs_, currBlock->numLocalElems_); return index; } @@ -4650,7 +4649,7 @@ int MLI_FEData::searchNode(int key) int index; MLI_ElemBlock *currBlock = elemBlockList_[currentElemBlock_]; - index = MLI_Utils_BinarySearch(key, currBlock->nodeGlobalIDs_, + index = MLI_Utils_BinarySearch(key, currBlock->nodeGlobalIDs_, currBlock->numLocalNodes_); if ( index < 0 ) { @@ -4671,7 +4670,7 @@ int MLI_FEData::searchFace(int key) int index; MLI_ElemBlock *currBlock = elemBlockList_[currentElemBlock_]; - index = MLI_Utils_BinarySearch(key, currBlock->faceGlobalIDs_, + index = MLI_Utils_BinarySearch(key, currBlock->faceGlobalIDs_, currBlock->numLocalFaces_); if ( index < 0 ) { diff --git a/src/FEI_mv/femli/mli_mapper.cxx b/src/FEI_mv/femli/mli_mapper.cxx index ed9114eb7..7e3c48469 100644 --- a/src/FEI_mv/femli/mli_mapper.cxx +++ b/src/FEI_mv/femli/mli_mapper.cxx @@ -6,14 +6,13 @@ ******************************************************************************/ #include -#include #include "_hypre_utilities.h" #include "HYPRE.h" #include "mli_mapper.h" #include "mli_utils.h" /*************************************************************************** - * constructor function for the MLI_Mapper + * constructor function for the MLI_Mapper *--------------------------------------------------------------------------*/ MLI_Mapper::MLI_Mapper() @@ -24,7 +23,7 @@ MLI_Mapper::MLI_Mapper() } /*************************************************************************** - * destructor function for the MLI_Mapper + * destructor function for the MLI_Mapper *--------------------------------------------------------------------------*/ MLI_Mapper::~MLI_Mapper() @@ -58,16 +57,16 @@ int MLI_Mapper::setMap(int nItems, int *itemList, int *mapList) * adjust map offset (This is used for slide surface reduction) *--------------------------------------------------------------------------*/ -int MLI_Mapper::adjustMapOffset(MPI_Comm comm, int *procNRows, +int MLI_Mapper::adjustMapOffset(MPI_Comm comm, int *procNRows, int *procOffsets) { int i, j, nprocs; if ( nEntries <= 0 ) return -1; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - for ( i = 0; i < nEntries; i++ ) + for ( i = 0; i < nEntries; i++ ) { - for ( j = 0; j < nprocs; j++ ) + for ( j = 0; j < nprocs; j++ ) if ( tokenList[i] < procNRows[j] ) break; tokenMap[i] -= procOffsets[j-1]; } @@ -90,7 +89,7 @@ int MLI_Mapper::getMap(int nItems, int *itemList, int *mapList) MLI_Utils_IntQSort2( itemTemp, itemAux, 0, nItems-1 ); counter = 0; - for ( i = 0; i < nItems; i++ ) + for ( i = 0; i < nItems; i++ ) { if ( itemTemp[i] == tokenList[counter] ) mapList[itemAux[i]] = tokenMap[counter]; @@ -111,7 +110,7 @@ int MLI_Mapper::getMap(int nItems, int *itemList, int *mapList) { printf("MLI_Mapper::getMap - item not found %d.\n", itemList[i]); exit(1); - } + } } delete [] itemTemp; delete [] itemAux; @@ -119,7 +118,7 @@ int MLI_Mapper::getMap(int nItems, int *itemList, int *mapList) } /*************************************************************************** - * setParams + * setParams *--------------------------------------------------------------------------*/ int MLI_Mapper::setParams(char *param_string, int argc, char **argv) diff --git a/src/FEI_mv/femli/mli_matrix.cxx b/src/FEI_mv/femli/mli_matrix.cxx index ae92614f0..8ec10d53b 100644 --- a/src/FEI_mv/femli/mli_matrix.cxx +++ b/src/FEI_mv/femli/mli_matrix.cxx @@ -7,7 +7,6 @@ #include #include -#include #include "_hypre_utilities.h" #include "HYPRE.h" #include "_hypre_parcsr_mv.h" @@ -16,7 +15,7 @@ #include "mli_utils.h" /*************************************************************************** - * constructor function for the MLI_Matrix + * constructor function for the MLI_Matrix *--------------------------------------------------------------------------*/ MLI_Matrix::MLI_Matrix(void *inMatrix,char *inName, MLI_Function *func) @@ -40,7 +39,7 @@ MLI_Matrix::MLI_Matrix(void *inMatrix,char *inName, MLI_Function *func) } /*************************************************************************** - * destructor function for the MLI_Matrix + * destructor function for the MLI_Matrix *--------------------------------------------------------------------------*/ MLI_Matrix::~MLI_Matrix() @@ -59,7 +58,7 @@ MLI_Matrix::~MLI_Matrix() * apply function ( vec3 = alpha * Matrix * vec1 + beta * vec2) *--------------------------------------------------------------------------*/ -int MLI_Matrix::apply(double alpha, MLI_Vector *vec1, double beta, +int MLI_Matrix::apply(double alpha, MLI_Vector *vec1, double beta, MLI_Vector *vec2, MLI_Vector *vec3) { int irow, status, ncolsA, nrowsV, mypid, index; @@ -118,7 +117,7 @@ int MLI_Matrix::apply(double alpha, MLI_Vector *vec1, double beta, nrowsV = hypre_VectorSize(hypre_ParVectorLocalVector(hypreV1)); if (!strcmp(name_, "HYPRE_ParCSR")) ncolsA = hypre_ParCSRMatrixNumCols(hypreA); - else + else ncolsA = hypre_ParCSRMatrixNumRows(hypreA); if (subMatrixLength_ == 0 || ncolsA == nrowsV) { @@ -212,7 +211,7 @@ int MLI_Matrix::apply(double alpha, MLI_Vector *vec1, double beta, } /****************************************************************************** - * create a vector from information of this matrix + * create a vector from information of this matrix *---------------------------------------------------------------------------*/ MLI_Vector *MLI_Matrix::createVector() @@ -252,21 +251,21 @@ MLI_Vector *MLI_Matrix::createVector() ierr += HYPRE_IJVectorGetObject(IJvec, (void **) &newVec); ierr += HYPRE_IJVectorSetObjectType(IJvec, -1); ierr += HYPRE_IJVectorDestroy(IJvec); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_ParVectorSetConstantValues(newVec, 0.0); sprintf(paramString, "HYPRE_ParVector"); funcPtr = new MLI_Function(); - MLI_Utils_HypreParVectorGetDestroyFunc(funcPtr); + MLI_Utils_HypreParVectorGetDestroyFunc(funcPtr); mli_vec = new MLI_Vector((void*) newVec, paramString, funcPtr); delete funcPtr; return mli_vec; } /****************************************************************************** - * create a vector from information of this matrix + * create a vector from information of this matrix *---------------------------------------------------------------------------*/ -int MLI_Matrix::getMatrixInfo(char *paramString, int &intParams, +int MLI_Matrix::getMatrixInfo(char *paramString, int &intParams, double &dbleParams) { int matInfo[4]; @@ -303,7 +302,7 @@ int MLI_Matrix::getMatrixInfo(char *paramString, int &intParams, } /****************************************************************************** - * load submatrix equation list + * load submatrix equation list *---------------------------------------------------------------------------*/ void MLI_Matrix::setSubMatrixEqnList(int length, int *list) @@ -336,7 +335,7 @@ void *MLI_Matrix::takeMatrix() } /****************************************************************************** - * get the name of this matrix + * get the name of this matrix *---------------------------------------------------------------------------*/ char *MLI_Matrix::getName() diff --git a/src/FEI_mv/femli/mli_matrix_mult.cxx b/src/FEI_mv/femli/mli_matrix_mult.cxx index cd109a420..3ab16573d 100644 --- a/src/FEI_mv/femli/mli_matrix_mult.cxx +++ b/src/FEI_mv/femli/mli_matrix_mult.cxx @@ -7,7 +7,6 @@ #include #include -#include #include "_hypre_utilities.h" #include "HYPRE.h" #include "_hypre_parcsr_mv.h" @@ -17,14 +16,14 @@ #include "mli_utils.h" /*************************************************************************** - * get the external rows of B in order to multiply A * B + * get the external rows of B in order to multiply A * B *--------------------------------------------------------------------------*/ -void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, +void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, MLI_Matrix **Cmat) { int ir, ic, is, ia, ia2, ib, index, length, offset, iTemp; - int *iArray, ibegin, sortFlag, tempCnt, nprocs, mypid; + int *iArray, ibegin, sortFlag, tempCnt, nprocs, mypid; int BExtNumUniqueCols, BExtNRows, *BExtRowLengs, *BExtCols, BExtNnz; int *extColList, *extColListAux; int *BRowStarts, *BColStarts, BNRows, BNCols, BStartCol, BEndCol; @@ -45,7 +44,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, MLI_Function *funcPtr; hypre_CSRMatrix *BDiag, *BOffd, *ADiag, *AOffd, *CDiag, *COffd; hypre_ParCSRMatrix *hypreA, *hypreB, *hypreC; - + /* ----------------------------------------------------------------------- * check to make sure both matrices are ParCSR matrices * ----------------------------------------------------------------------*/ @@ -63,8 +62,8 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, MPI_Comm_rank(mpiComm, &mypid); /* ----------------------------------------------------------------------- - * Get external rows of B (BExtRowLengs has been allocated 1 longer than - * BExtNRows in the GetExtRows function) + * Get external rows of B (BExtRowLengs has been allocated 1 longer than + * BExtNRows in the GetExtRows function) * Extract the diagonal indices into arrays diagCols * ----------------------------------------------------------------------*/ @@ -77,7 +76,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, BExtRowLengs[ir] = tempCnt; tempCnt += iTemp; } - if ( BExtNRows > 0 ) + if ( BExtNRows > 0 ) { BExtRowLengs[BExtNRows*2] = tempCnt; for ( ir = 0; ir < BExtNRows*2; ir++ ) @@ -123,7 +122,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, mergeSortList2D = new int*[mergeSortNList]; mergeSortAuxs = new int*[mergeSortNList]; mergeSortLengs = new int[mergeSortNList]; - for ( is = 0; is < BExtNRows*2; is++ ) + for ( is = 0; is < BExtNRows*2; is++ ) { if ( is % 2 == 0 ) { @@ -138,16 +137,16 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, mergeSortLengs[is] = BExtRowLengs[is+1] - BExtRowLengs[is]; } } - for ( ir = 0; ir < BExtNRows; ir++ ) + for ( ir = 0; ir < BExtNRows; ir++ ) extDiagListAux[ir] = extColListAux[BExtRowLengs[ir*2]]; mergeSortList2D[BExtNRows*2] = diagCols; mergeSortAuxs[BExtNRows*2] = extDiagListAux; mergeSortLengs[BExtNRows*2] = BExtNRows; - MLI_Utils_IntMergeSort(mergeSortNList, mergeSortLengs, + MLI_Utils_IntMergeSort(mergeSortNList, mergeSortLengs, mergeSortList2D, mergeSortAuxs, &BExtNumUniqueCols, &mergeSortList); - for ( ir = 0; ir < BExtNRows; ir++ ) + for ( ir = 0; ir < BExtNRows; ir++ ) extColListAux[BExtRowLengs[ir*2]] = extDiagListAux[ir]; delete [] mergeSortList2D; delete [] mergeSortAuxs; @@ -157,42 +156,42 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, delete [] diagCols; if ( BExtNumUniqueCols > 0 ) extColList = new int[BExtNumUniqueCols]; else extColList = NULL; - for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) + for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) extColList[ir] = mergeSortList[ir]; free( mergeSortList ); } /* ----------------------------------------------------------------------- * Next prune the internal columns (to my proc) from this list (by setting - * the colum index to its ones-complement), since they have already been + * the colum index to its ones-complement), since they have already been * included elsewhere * ----------------------------------------------------------------------*/ BColStarts = hypre_ParCSRMatrixColStarts(hypreB); BStartCol = BColStarts[mypid]; BEndCol = BColStarts[mypid+1] - 1; - for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) + for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) { if ( extColList[ir] >= BStartCol && extColList[ir] <= BEndCol ) extColList[ir] = - (extColList[ir] - BStartCol) - 1; } /* ----------------------------------------------------------------------- - * Next prune the external columns by eliminating all columns already - * present in the BColMap list, which is assumed ordered + * Next prune the external columns by eliminating all columns already + * present in the BColMap list, which is assumed ordered * ----------------------------------------------------------------------*/ BOffd = hypre_ParCSRMatrixOffd(hypreB); BColMap = hypre_ParCSRMatrixColMapOffd(hypreB); BColMapInd = 0; BNCols = BColStarts[mypid+1] - BColStarts[mypid]; - for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) + for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) { if ( extColList[ir] >= 0 ) { while (BColMapInd 0 ) iArray = new int[BExtNumUniqueCols]; tempCnt = 0; - for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) + for ( ir = 0; ir < BExtNumUniqueCols; ir++ ) { - if ( extColList[ir] >= 0 ) iArray[ir] = tempCnt++; + if ( extColList[ir] >= 0 ) iArray[ir] = tempCnt++; else iArray[ir] = -1; } - for ( ir = 0; ir < BExtNnz; ir++ ) + for ( ir = 0; ir < BExtNnz; ir++ ) { index = extColListAux[ir]; iTemp = extColList[index]; if ( iTemp < 0 ) BExtCols[ir] = - iTemp - 1; else BExtCols[ir] = iArray[index] + BNCols + BExtNRows; } - if ( BExtNumUniqueCols > 0 ) delete [] iArray; + if ( BExtNumUniqueCols > 0 ) delete [] iArray; tempCnt = BExtNumUniqueCols; BExtNumUniqueCols = 0; - for ( ir = 0; ir < tempCnt; ir++ ) + for ( ir = 0; ir < tempCnt; ir++ ) { - if ( extColList[ir] >= 0 ) + if ( extColList[ir] >= 0 ) extColList[BExtNumUniqueCols++] = extColList[ir]; } if ( BExtNRows > 0 ) delete [] extColListAux; CExtNCols = BNCols + BExtNRows + BExtNumUniqueCols; /* ----------------------------------------------------------------------- - * fetch information about matrix A and B + * fetch information about matrix A and B * ----------------------------------------------------------------------*/ if (!hypre_ParCSRMatrixCommPkg(hypreA)) hypre_MatvecCommPkgCreate(hypreA); @@ -276,7 +275,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, { colIndA = ADiagJA[ia2]; if ( colIndA < BNRows ) - { + { for ( ib = BDiagIA[colIndA]; ib < BDiagIA[colIndA+1]; ib++ ) { colIndB = BDiagJA[ib]; @@ -299,10 +298,10 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, else { index = colIndA - BNRows; - for (ib=BExtRowLengs[2*index]; ib 0) CColMap = hypre_TAlloc(int, COffdNCols , HYPRE_MEMORY_HOST); for ( ia = 0; ia < BExtNRows; ia++ ) CColMap[ia] = BColMap[ia]; - for ( ia = BExtNRows; ia < COffdNCols; ia++ ) + for ( ia = BExtNRows; ia < COffdNCols; ia++ ) CColMap[ia] = extColList[ia-BExtNRows]; if ( COffdNCols > 0 ) CColMapAux = new int[COffdNCols]; for ( ia = 0; ia < COffdNCols; ia++ ) CColMapAux[ia] = ia; MLI_Utils_IntQSort2(CColMap, CColMapAux, 0, COffdNCols-1); iArray = CColMapAux; if ( COffdNCols > 0 ) CColMapAux = new int[COffdNCols]; - for ( ia = 0; ia < COffdNCols; ia++ ) + for ( ia = 0; ia < COffdNCols; ia++ ) CColMapAux[iArray[ia]] = ia; if ( COffdNCols > 0 ) delete [] iArray; @@ -402,7 +401,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, colIndA = ADiagJA[ia2]; dTempA = ADiagAA[ia2]; if ( colIndA < BNRows ) - { + { for ( ib = BDiagIA[colIndA]; ib < BDiagIA[colIndA+1]; ib++ ) { colIndB = BDiagJA[ib]; @@ -437,7 +436,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, { colIndB = BExtCols[ib]; dTempB = BOffdAA[ib]; - if ( colIndB < CNCols ) + if ( colIndB < CNCols ) { offset = CDiagReg[colIndB]; if ( offset < iTempDiag ) @@ -467,7 +466,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, colIndA = AOffdJA[ia2] + ANCols; dTempA = AOffdAA[ia2]; if ( colIndA < BNRows ) - { + { for ( ib = BDiagIA[colIndA]; ib < BDiagIA[colIndA+1]; ib++ ) { colIndB = BDiagJA[ib]; @@ -498,11 +497,11 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, else { index = colIndA - BNRows; - for (ib=BExtRowLengs[2*index];ib= 0 ) { - for ( ib = ia2; ib > CDiagIA[ia]; ib-- ) + for ( ib = ia2; ib > CDiagIA[ia]; ib-- ) { CDiagJA[ib] = CDiagJA[ib-1]; CDiagAA[ib] = CDiagAA[ib-1]; } CDiagJA[CDiagIA[ia]] = iTemp; CDiagAA[CDiagIA[ia]] = dTemp; - } - } + } + } /* ----------------------------------------------------------------------- * finally form HYPRE_ParCSRMatrix for the product @@ -569,12 +568,12 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, #if 0 if ( mypid == 1 ) { - for ( ia = 0; ia < CNRows; ia++ ) + for ( ia = 0; ia < CNRows; ia++ ) { - for ( ia2 = CDiagIA[ia]; ia2 < CDiagIA[ia+1]; ia2++ ) + for ( ia2 = CDiagIA[ia]; ia2 < CDiagIA[ia+1]; ia2++ ) printf("%d : CDiag %5d = %5d %e\n",mypid,ia,CDiagJA[ia2], CDiagAA[ia2]); - for ( ia2 = COffdIA[ia]; ia2 < COffdIA[ia+1]; ia2++ ) + for ( ia2 = COffdIA[ia]; ia2 < COffdIA[ia+1]; ia2++ ) printf("%d : COffd %5d = %5d %e\n",mypid,ia,COffdJA[ia2], COffdAA[ia2]); } @@ -609,7 +608,7 @@ void MLI_Matrix_MatMatMult( MLI_Matrix *Amat, MLI_Matrix *Bmat, } /*************************************************************************** - * get the external rows of B in order to multiply A * B + * get the external rows of B in order to multiply A * B * (modified so that extRowLengs has 2 numbers for each row, one for * the diagonal part, and the other for the off-diagonal part. This is * done to optimize the code in order each part is sorted.) @@ -656,7 +655,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, #endif BColStarts = hypre_ParCSRMatrixColStarts(hypreB); BStartCol = BColStarts[mypid]; - if ( nprocs == 1 ) + if ( nprocs == 1 ) { (*extRowLengsP) = NULL; (*extColsP) = NULL; @@ -684,7 +683,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, if ( nRecvs + nSends > 0 ) requests = new MPI_Request[nRecvs+nSends]; /* ----------------------------------------------------------------------- - * fetch the local B matrix + * fetch the local B matrix * ----------------------------------------------------------------------*/ colMapOffd = hypre_ParCSRMatrixColMapOffd(hypreB); @@ -698,7 +697,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, BOffdAA = hypre_CSRMatrixData(BOffd); /* ----------------------------------------------------------------------- - * construct external row lengths (recvRowLengs) + * construct external row lengths (recvRowLengs) * ----------------------------------------------------------------------*/ if ( recvNRows > 0 ) recvRowLengs = new int[2*recvNRows+1]; @@ -709,7 +708,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, proc = recvProcs[ip]; offset = recvStarts[ip]; length = recvStarts[ip+1] - offset; - MPI_Irecv(&(recvRowLengs[offset*2]), length*2, MPI_INT, proc, 27027, + MPI_Irecv(&(recvRowLengs[offset*2]), length*2, MPI_INT, proc, 27027, mpiComm, &requests[requestCnt++]); } if ( sendNRows > 0 ) iSendBuf = new int[sendNRows*2]; @@ -802,7 +801,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, length = recvStarts[ip+1] - offset; curNnz = 0; for (jp = 0; jp < length*2; jp++) curNnz += recvRowLengs[offset*2+jp]; - MPI_Irecv(&recvVals[totalRecvNnz], curNnz, MPI_DOUBLE, proc, 27029, + MPI_Irecv(&recvVals[totalRecvNnz], curNnz, MPI_DOUBLE, proc, 27029, mpiComm, &requests[requestCnt++]); totalRecvNnz += curNnz; } @@ -829,7 +828,7 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, dSendBuf[curNnz++] = BOffdAA[kp]; } curNnz -= totalSendNnz; - MPI_Isend(&(dSendBuf[totalSendNnz]), curNnz, MPI_DOUBLE, proc, 27029, + MPI_Isend(&(dSendBuf[totalSendNnz]), curNnz, MPI_DOUBLE, proc, 27029, mpiComm, &requests[requestCnt++]); totalSendNnz += curNnz; } @@ -860,9 +859,9 @@ void MLI_Matrix_GetExtRows( MLI_Matrix *Amat, MLI_Matrix *Bmat, int *extNRowsP, length = recvStarts[ip+1] - offset; curNnz = 0; for (jp = 0; jp < length*2; jp++) curNnz += recvRowLengs[offset*2+jp]; - for (jp = 0; jp < curNnz; jp++) + for (jp = 0; jp < curNnz; jp++) { - printf("%d : recvData = %5d %e\n", mypid, recvCols[totalRecvNnz], + printf("%d : recvData = %5d %e\n", mypid, recvCols[totalRecvNnz], recvVals[totalRecvNnz]); totalRecvNnz++; } diff --git a/src/FEI_mv/femli/mli_matrix_utils.cxx b/src/FEI_mv/femli/mli_matrix_utils.cxx index e75539152..f2fb139c6 100644 --- a/src/FEI_mv/femli/mli_matrix_utils.cxx +++ b/src/FEI_mv/femli/mli_matrix_utils.cxx @@ -7,7 +7,6 @@ #include #include -#include #include "HYPRE.h" #include "_hypre_utilities.h" #include "_hypre_parcsr_mv.h" diff --git a/src/FEI_mv/femli/mli_method_amgcr.cxx b/src/FEI_mv/femli/mli_method_amgcr.cxx index bf50a4c73..1e95f8d9c 100644 --- a/src/FEI_mv/femli/mli_method_amgcr.cxx +++ b/src/FEI_mv/femli/mli_method_amgcr.cxx @@ -12,7 +12,6 @@ #endif #include -#include #include "HYPRE.h" #include "_hypre_utilities.h" #include "_hypre_parcsr_ls.h" @@ -94,7 +93,7 @@ int MLI_Method_AMGCR::setParams(char *inName, int argc, char *argv[]) comm = getComm(); MPI_Comm_rank( comm, &mypid ); sscanf(inName, "%s", param1); - if ( outputLevel_ >= 1 && mypid == 0 ) + if ( outputLevel_ >= 1 && mypid == 0 ) printf("\tMLI_Method_AMGCR::setParam = %s\n", inName); if ( !strcmp(param1, "setOutputLevel" )) { @@ -147,7 +146,7 @@ int MLI_Method_AMGCR::setParams(char *inName, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } nSweeps = *(int *) argv[0]; weights = (double *) argv[1]; smootherNum_ = nSweeps; @@ -167,7 +166,7 @@ int MLI_Method_AMGCR::setParams(char *inName, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } else if ( strcmp(param2, "SuperLU") ) { strcpy(coarseSolver_, param2); @@ -189,7 +188,7 @@ int MLI_Method_AMGCR::setParams(char *inName, int argc, char *argv[]) else if ( !strcmp(param1, "setParamFile" )) { param3 = (char *) argv[0]; - strcpy( paramFile_, param3 ); + strcpy( paramFile_, param3 ); return 0; } else if ( !strcmp(param1, "print" )) @@ -204,7 +203,7 @@ int MLI_Method_AMGCR::setParams(char *inName, int argc, char *argv[]) * generate multilevel structure * --------------------------------------------------------------------- */ -int MLI_Method_AMGCR::setup( MLI *mli ) +int MLI_Method_AMGCR::setup( MLI *mli ) { int level, mypid, *ISMarker, localNRows; int irow, nrows, gNRows, numFpts, *fList;; @@ -243,7 +242,7 @@ int MLI_Method_AMGCR::setup( MLI *mli ) /* -------------------------------------------------- */ mli_Amat = mli->getSystemMatrix(level); - assert (mli_Amat != NULL); + hypre_assert (mli_Amat != NULL); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); gNRows = hypre_ParCSRMatrixGlobalNumRows(hypreA); ADiag = hypre_ParCSRMatrixDiag(hypreA); @@ -271,31 +270,31 @@ int MLI_Method_AMGCR::setup( MLI *mli ) ADiag = hypre_ParCSRMatrixDiag(hypreA); ADiagI = hypre_CSRMatrixI(ADiag); ADiagJ = hypre_CSRMatrixJ(ADiag); - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { - if (ISMarker[irow] == 0) + if (ISMarker[irow] == 0) { ISMarker[irow] = 1; - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) if (ISMarker[ADiagJ[jcol]] == 0) ISMarker[ADiagJ[jcol]] = -1; } } - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (ISMarker[irow] < 0) ISMarker[irow] = 0; #endif } - else + else { ISMarker = new int[localNRows]; for (irow = 0; irow < localNRows; irow++) ISMarker[irow] = 0; } - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (ISMarker[irow] < 0) ISMarker[irow] = 0; mli_Affmat = performCR(mli_Amat, ISMarker, &mli_Afcmat); nrows = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (ISMarker[irow] == 1) nrows++; if (nrows < minCoarseSize_) break; mli_Pmat = createPmat(ISMarker, mli_Amat, mli_Affmat, mli_Afcmat); @@ -325,7 +324,7 @@ int MLI_Method_AMGCR::setup( MLI *mli ) mli->setSystemMatrix(level+1, mli_cAmat); elapsedTime = (MLI_Utils_WTime() - startTime); RAPTime_ += elapsedTime; - if (mypid == 0 && outputLevel_ > 0) + if (mypid == 0 && outputLevel_ > 0) printf("\tRAP computed, time = %e seconds.\n", elapsedTime); /* -------------------------------------------------- */ @@ -343,20 +342,20 @@ int MLI_Method_AMGCR::setup( MLI *mli ) sprintf(paramString, "relaxWeight"); smootherPtr->setParams(paramString, 2, targv); numFpts = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (ISMarker[irow] == 0) numFpts++; #if 1 - if (numFpts > 0) + if (numFpts > 0) { fList = new int[numFpts]; numFpts = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (ISMarker[irow] == 0) fList[numFpts++] = irow; targv[0] = (char *) &numFpts; targv[1] = (char *) fList; sprintf(paramString, "setFptList"); smootherPtr->setParams(paramString, 2, targv); - } + } sprintf(paramString, "setModifiedDiag"); smootherPtr->setParams(paramString, 0, NULL); smootherPtr->setup(mli_Affmat); @@ -408,7 +407,7 @@ int MLI_Method_AMGCR::setOutputLevel( int level ) } /* ********************************************************************* * - * set number of levels + * set number of levels * --------------------------------------------------------------------- */ int MLI_Method_AMGCR::setNumLevels( int nlevels ) @@ -418,7 +417,7 @@ int MLI_Method_AMGCR::setNumLevels( int nlevels ) } /* ********************************************************************* * - * select independent set + * select independent set * --------------------------------------------------------------------- */ int MLI_Method_AMGCR::selectIndepSet(MLI_Matrix *mli_Amat, int **indepSet) @@ -442,9 +441,9 @@ int MLI_Method_AMGCR::selectIndepSet(MLI_Matrix *mli_Amat, int **indepSet) MPI_Comm_size(comm, &nprocs); measureArray = new double[localNRows+numColsOffd]; - for (irow = 0; irow < localNRows+numColsOffd; irow++) + for (irow = 0; irow < localNRows+numColsOffd; irow++) measureArray[irow] = 0; - for (irow = 0; irow < ADiagI[localNRows]; irow++) + for (irow = 0; irow < ADiagI[localNRows]; irow++) measureArray[ADiagJ[irow]] += 1; hypre_BoomerAMGCreateS(hypreA, 0.0e0, 0.0e0, 1, NULL, &hypreS); @@ -452,7 +451,7 @@ int MLI_Method_AMGCR::selectIndepSet(MLI_Matrix *mli_Amat, int **indepSet) graphArraySize = localNRows; graphArray = new int[localNRows]; - for (irow = 0; irow < localNRows; irow++) graphArray[irow] = irow; + for (irow = 0; irow < localNRows; irow++) graphArray[irow] = irow; if (numColsOffd) graphArrayOffd = new int[numColsOffd]; else graphArrayOffd = NULL; @@ -460,7 +459,7 @@ int MLI_Method_AMGCR::selectIndepSet(MLI_Matrix *mli_Amat, int **indepSet) ISMarker = new int[localNRows]; for (irow = 0; irow < localNRows; irow++) ISMarker[irow] = 0; - if (numColsOffd) + if (numColsOffd) { ISMarkerOffd = new int[numColsOffd]; for (irow = 0; irow < numColsOffd; irow++) ISMarkerOffd[irow] = 0; @@ -470,7 +469,7 @@ int MLI_Method_AMGCR::selectIndepSet(MLI_Matrix *mli_Amat, int **indepSet) hypre_BoomerAMGIndepSet(hypreS, measureArray, graphArray, graphArraySize, graphArrayOffd, numColsOffd, ISMarker, ISMarkerOffd); - + delete [] measureArray; delete [] graphArray; if (numColsOffd > 0) delete [] graphArrayOffd; @@ -554,7 +553,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, for (iP = 0; iP < nprocs; iP++) reduceArray1[iP] = 0; reduceArray1[mypid] = numFpts; MPI_Allreduce(reduceArray1,reduceArray2,nprocs,MPI_INT,MPI_SUM,comm); - for (iP = nprocs-1; iP >= 0; iP--) + for (iP = nprocs-1; iP >= 0; iP--) reduceArray2[iP+1] = reduceArray2[iP]; reduceArray2[0] = 0; for (iP = 2; iP <= nprocs; iP++) reduceArray2[iP] += reduceArray2[iP-1]; @@ -572,20 +571,20 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, FStartRow,FStartRow+FNRows-1,&IJPFF); ierr = HYPRE_IJMatrixSetObjectType(IJPFF, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[localNRows]; for (irow = 0; irow < localNRows; irow++) rowLengs[irow] = 1; ierr = HYPRE_IJMatrixSetRowSizes(IJPFF, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJPFF); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, CStartRow,CStartRow+CNRows-1, &IJPFC); ierr = HYPRE_IJMatrixSetObjectType(IJPFC, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJPFC, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJPFC); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; /* --------------------------------------------------- */ @@ -597,7 +596,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, for (irow = 0; irow < localNRows; irow++) { rowIndex = startRow + irow; - if (indepSet[irow] == 0) + if (indepSet[irow] == 0) { colIndex = FStartRow + rowCount; HYPRE_IJMatrixSetValues(IJPFF,1,&one,(const int *) &rowIndex, @@ -613,14 +612,14 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, } } ierr = HYPRE_IJMatrixAssemble(IJPFF); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPFF, (void **) &hyprePFF); //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFF); sprintf(paramString, "HYPRE_ParCSR" ); mli_PFFMat = new MLI_Matrix((void *)hyprePFF,paramString,NULL); ierr = HYPRE_IJMatrixAssemble(IJPFC); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPFC, (void **) &hyprePFC); //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFC); hypreAPFC = hypre_ParMatmul(hypreA, hyprePFC); @@ -628,12 +627,12 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, hypreAfc = hypre_ParMatmul(hyprePFFT, hypreAPFC); rowStarts = hypre_ParCSRMatrixRowStarts(hyprePFFT); newRowStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newRowStarts[irow] = rowStarts[irow]; hypre_ParCSRMatrixRowStarts(hypreAfc) = newRowStarts; colStarts = hypre_ParCSRMatrixColStarts(hypreAPFC); newColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newColStarts[irow] = colStarts[irow]; hypre_ParCSRMatrixColStarts(hypreAfc) = newColStarts; hypre_ParCSRMatrixOwnsRowStarts(hypreAfc) = 1; @@ -647,11 +646,11 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, hypreAff = (hypre_ParCSRMatrix *) mli_AffMat->getMatrix(); colStarts = hypre_ParCSRMatrixColStarts(hyprePFF); newColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newColStarts[irow] = colStarts[irow]; hypre_ParCSRMatrixColStarts(hypreAff) = newColStarts; newColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newColStarts[irow] = colStarts[irow]; hypre_ParCSRMatrixRowStarts(hypreAff) = newColStarts; @@ -758,7 +757,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, delete smootherPtr; #else MLI_Utils_mJacobiCreate(comm, &hypreSolver); - MLI_Utils_mJacobiSetParams(hypreSolver, PDegree_); + MLI_Utils_mJacobiSetParams(hypreSolver, PDegree_); XData = (double *) hypre_VectorData(hypre_ParVectorLocalVector(hypreX)); aratio = 0.0; for (iV = 0; iV < numVectors_; iV++) @@ -782,7 +781,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, (HYPRE_Vector) hypreB, (HYPRE_Vector) hypreX, paramString); hypre_ParCSRMatrixMatvec(-1.0, hypreAff, hypreX, 1.0, hypreB); rnorm1 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB)); - if (rnorm1 < rnorm0 * 1.0e-10 || rnorm1 < 1.0e-10) + if (rnorm1 < rnorm0 * 1.0e-10 || rnorm1 < 1.0e-10) { printf("\tperformCR : rnorm0, rnorm1 = %e %e\n",rnorm0,rnorm1); break; @@ -796,7 +795,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, hypre_ParCSRMatrixMatvec(-1.0, hypreAff, hypreX, 1.0, hypreB); rnorm1 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB)); rnorm1 = 0.2 * log10(rnorm1/rnorm0); - rnorm1 = pow(1.0e1, rnorm1); + rnorm1 = pow(1.0e1, rnorm1); ratio1 = rnorm1; /* ------------------------------------------------------- */ @@ -827,7 +826,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, hypre_ParCSRMatrixMatvec(-1.0, hypreAffT, hypreX, 1.0, hypreB); rnorm1 = sqrt(hypre_ParVectorInnerProd(hypreB, hypreB)); rnorm1 = 0.2 * log10(rnorm1/rnorm0); - ratio2 = pow(1.0e1, rnorm1); + ratio2 = pow(1.0e1, rnorm1); if (ratio1 > ratio2) aratio += ratio1; else aratio += ratio2; #else @@ -840,7 +839,7 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, /* ------------------------------------------------------- */ hypre_ParVectorAxpy(dOne, hypreX, hypreXacc); - if (ratio1 < targetMu_ && ratio2 < targetMu_) + if (ratio1 < targetMu_ && ratio2 < targetMu_) { printf("\tTrial %3d(%3d) : GMRES norms ratios = %16.8e %16.8e ##\n", iT, iV, ratio1, ratio2); @@ -856,44 +855,44 @@ MLI_Matrix *MLI_Method_AMGCR::performCR(MLI_Matrix *mli_Amat, int *indepSet, /* --------------------------------------------------- */ /* select coarse points */ /* --------------------------------------------------- */ - - if (iV == numVectors_) aratio /= (double) numVectors_; + + if (iV == numVectors_) aratio /= (double) numVectors_; printf("aratio = %e\n", aratio); - if ((aratio >= targetMu_ || (iT == 0 && localNRows == FNRows)) && - iT < (numTrials_-1)) + if ((aratio >= targetMu_ || (iT == 0 && localNRows == FNRows)) && + iT < (numTrials_-1)) { - XaccData = (double *) + XaccData = (double *) hypre_VectorData(hypre_ParVectorLocalVector(hypreXacc)); sortIndices = new int[FNRows]; for (irow = 0; irow < FNRows; irow++) sortIndices[irow] = irow; - for (irow = 0; irow < FNRows; irow++) + for (irow = 0; irow < FNRows; irow++) if (XaccData[irow] < 0.0) XaccData[irow] = - XaccData[irow]; //MLI_Utils_DbleQSort2a(XaccData, sortIndices, 0, FNRows-1); if (FNRows > 0) threshold = XaccData[FNRows-1] * cutThreshold_; #if 0 newCount = 0; - for (ic = 0; ic < localNRows; ic++) + for (ic = 0; ic < localNRows; ic++) { threshold = XaccData[FNRows-1] * cutThreshold_; - for (it = 0; it < 6; it++) + for (it = 0; it < 6; it++) { - for (irow = FNRows-1; irow >= 0; irow--) + for (irow = FNRows-1; irow >= 0; irow--) { ddata = XaccData[irow]; if (ddata > threshold) { idata = sortIndices[irow]; fPt = fList[idata]; - if (indepSet[fPt] == 0) + if (indepSet[fPt] == 0) { count = 0; - for (jcol = ADiagI[fPt]; jcol < ADiagI[fPt+1]; jcol++) + for (jcol = ADiagI[fPt]; jcol < ADiagI[fPt+1]; jcol++) if (indepSet[ADiagJ[jcol]] == 1) count++; if (count <= ic) { newCount++; indepSet[fPt] = 1; - for (jcol = ADiagI[fPt];jcol < ADiagI[fPt+1];jcol++) + for (jcol = ADiagI[fPt];jcol < ADiagI[fPt+1];jcol++) if (indepSet[ADiagJ[jcol]] == 0) indepSet[ADiagJ[jcol]] = -1; } @@ -901,15 +900,15 @@ printf("aratio = %e\n", aratio); } } threshold *= 0.1; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (indepSet[irow] < 0) indepSet[irow] = 0; - if ((localNRows+newCount-FNRows) > (localNRows/2) && ic > 2) + if ((localNRows+newCount-FNRows) > (localNRows/2) && ic > 2) { if (((double) newCount/ (double) localNRows) > 0.05) break; } } - if ((localNRows+newCount-FNRows) > (localNRows/2) && ic > 2) + if ((localNRows+newCount-FNRows) > (localNRows/2) && ic > 2) { if (((double) newCount/ (double) localNRows) > 0.05) break; @@ -918,20 +917,20 @@ printf("aratio = %e\n", aratio); #else newCount = 0; threshold = XaccData[FNRows-1] * cutThreshold_; - for (it = 0; it < 1; it++) + for (it = 0; it < 1; it++) { - for (irow = FNRows-1; irow >= 0; irow--) + for (irow = FNRows-1; irow >= 0; irow--) { ddata = XaccData[irow]; if (ddata > threshold) { idata = sortIndices[irow]; fPt = fList[idata]; - if (indepSet[fPt] == 0) + if (indepSet[fPt] == 0) { newCount++; indepSet[fPt] = 1; - for (jcol = ADiagI[fPt];jcol < ADiagI[fPt+1];jcol++) + for (jcol = ADiagI[fPt];jcol < ADiagI[fPt+1];jcol++) if (indepSet[ADiagJ[jcol]] == 0 && habs(ADiagA[jcol]/ADiagA[ADiagI[fPt]]) > 1.0e-12) indepSet[ADiagJ[jcol]] = -1; @@ -939,9 +938,9 @@ printf("aratio = %e\n", aratio); } } threshold *= 0.1; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (indepSet[irow] < 0) indepSet[irow] = 0; - if ((localNRows+newCount-FNRows) > (localNRows/2)) + if ((localNRows+newCount-FNRows) > (localNRows/2)) { if (((double) newCount/ (double) localNRows) > 0.1) break; @@ -949,7 +948,7 @@ printf("aratio = %e\n", aratio); } #endif delete [] sortIndices; - if (newCount == 0) + if (newCount == 0) { printf("CR stops because newCount = 0\n"); break; @@ -959,7 +958,7 @@ printf("aratio = %e\n", aratio); /* --------------------------------------------------- */ /* clean up */ /* --------------------------------------------------- */ - + HYPRE_IJMatrixDestroy(IJPFF); hypre_ParCSRMatrixDestroy(hyprePFFT); hypre_ParCSRMatrixDestroy(hypreAPFC); @@ -1034,12 +1033,12 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1, AffStartRow,AffStartRow+AffNRows-1,&IJInvD); ierr = HYPRE_IJMatrixSetObjectType(IJInvD, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[AffNRows]; for (irow = 0; irow < AffNRows; irow++) rowLengs[irow] = 1; ierr = HYPRE_IJMatrixSetRowSizes(IJInvD, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJInvD); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; /* ------------------------------------------------------ */ @@ -1050,14 +1049,14 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, for (irow = 0; irow < localNRows; irow++) { rowIndex = startRow + irow; - if (indepSet[irow] == 0) + if (indepSet[irow] == 0) { - HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, + HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, &rowSize, &colInd, &colVal); colValue = 1.0; for (jcol = 0; jcol < rowSize; jcol++) { - if (colInd[jcol] == rowIndex) + if (colInd[jcol] == rowIndex) { colValue = colVal[jcol]; break; @@ -1066,17 +1065,17 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, if (colValue >= 0.0) { for (jcol = 0; jcol < rowSize; jcol++) - if (colInd[jcol] != rowIndex && - (indepSet[colInd[jcol]-startRow] == 0) && - colVal[jcol] > 0.0) + if (colInd[jcol] != rowIndex && + (indepSet[colInd[jcol]-startRow] == 0) && + colVal[jcol] > 0.0) colValue += colVal[jcol]; } else { for (jcol = 0; jcol < rowSize; jcol++) - if (colInd[jcol] != rowIndex && - (indepSet[colInd[jcol]-startRow] == 0) && - colVal[jcol] < 0.0) + if (colInd[jcol] != rowIndex && + (indepSet[colInd[jcol]-startRow] == 0) && + colVal[jcol] < 0.0) colValue += colVal[jcol]; } colValue = 1.0 / colValue; @@ -1084,7 +1083,7 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, HYPRE_IJMatrixSetValues(IJInvD,1,&one,(const int *) &colIndex, (const int *) &colIndex, (const double *) &colValue); rowCount++; - HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, + HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, &rowSize, &colInd, &colVal); } } @@ -1094,11 +1093,11 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, /* ------------------------------------------------------ */ ierr = HYPRE_IJMatrixAssemble(IJInvD); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJInvD, (void **) &hypreInvD); ierr += HYPRE_IJMatrixSetObjectType(IJInvD, -1); ierr += HYPRE_IJMatrixDestroy(IJInvD); - assert( !ierr ); + hypre_assert( !ierr ); /* ------------------------------------------------------ */ /* generate polynomial of Aff and invD */ @@ -1112,8 +1111,8 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ADiagI = hypre_CSRMatrixI(ADiag); ADiagJ = hypre_CSRMatrixJ(ADiag); ADiagA = hypre_CSRMatrixData(ADiag); - for (irow = 0; irow < AffNRows; irow++) - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (irow = 0; irow < AffNRows; irow++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) ADiagA[jcol] = - ADiagA[jcol]; } else if (PDegree_ == 1) @@ -1126,11 +1125,11 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ADiagI = hypre_CSRMatrixI(ADiag); ADiagJ = hypre_CSRMatrixJ(ADiag); ADiagA = hypre_CSRMatrixData(ADiag); - for (irow = 0; irow < AffNRows; irow++) + for (irow = 0; irow < AffNRows; irow++) { - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { - if (ADiagJ[jcol] == irow) + if (ADiagJ[jcol] == irow) ADiagA[jcol] = - omega*DDiagA[irow]*(2.0-omega*ADiagA[jcol]); else ADiagA[jcol] = omega * omega * DDiagA[irow] * ADiagA[jcol]; } @@ -1138,14 +1137,14 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, hypre_ParCSRMatrixOwnsColStarts(hypreInvD) = 0; rowStarts = hypre_ParCSRMatrixRowStarts(hypreA); newRowStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newRowStarts[irow] = rowStarts[irow]; hypre_ParCSRMatrixRowStarts(hypreP) = newRowStarts; #else ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1, AffStartRow,AffStartRow+AffNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[AffNRows]; maxRowLeng = 0; ADiag = hypre_ParCSRMatrixDiag(hypreAff); @@ -1161,11 +1160,11 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0) newRowSize++; rowLengs[irow] = newRowSize; - if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; + if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; newColInd = new int[maxRowLeng]; newColVal = new double[maxRowLeng]; @@ -1183,7 +1182,7 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, { if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0) { - newColInd[newRowSize] = AffStartRow + ADiagJ[jcol]; + newColInd[newRowSize] = AffStartRow + ADiagJ[jcol]; newColVal[newRowSize++] = ADiagA[jcol]; } else @@ -1198,12 +1197,12 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreAD); hypreP = hypre_ParMatmul(hypreAD, hypreInvD); hypre_ParCSRMatrixOwnsRowStarts(hypreP) = 1; @@ -1230,17 +1229,17 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1, AffStartRow,AffStartRow+AffNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[AffNRows]; maxRowLeng = 0; for (irow = 0; irow < AffNRows; irow++) { newRowSize = 0; for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++) - newColInd[newRowSize] = ADDiagJ[jcol]; + newColInd[newRowSize] = ADDiagJ[jcol]; for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++) - newColInd[newRowSize] = AD2DiagJ[jcol]; - if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; + newColInd[newRowSize] = AD2DiagJ[jcol]; + if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; hypre_qsort0(newColInd, 0, newRowSize-1); ncount = 0; for ( jcol = 0; jcol < newRowSize; jcol++ ) @@ -1249,14 +1248,14 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, { ncount++; newColInd[ncount] = newColInd[jcol]; - } + } } newRowSize = ncount + 1; rowLengs[irow] = newRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; nnz = 0; for (irow = 0; irow < AffNRows; irow++) @@ -1265,16 +1264,16 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, newRowSize = 0; for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++) { - newColInd[newRowSize] = ADDiagJ[jcol]; - if (ADDiagJ[jcol] == irow) - newColVal[newRowSize++] = 3.0 * (1.0 - ADDiagA[jcol]); + newColInd[newRowSize] = ADDiagJ[jcol]; + if (ADDiagJ[jcol] == irow) + newColVal[newRowSize++] = 3.0 * (1.0 - ADDiagA[jcol]); else - newColVal[newRowSize++] = - 3.0 * ADDiagA[jcol]; + newColVal[newRowSize++] = - 3.0 * ADDiagA[jcol]; } for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++) { - newColInd[newRowSize] = AD2DiagJ[jcol]; - newColVal[newRowSize++] = AD2DiagA[jcol]; + newColInd[newRowSize] = AD2DiagJ[jcol]; + newColVal[newRowSize++] = AD2DiagA[jcol]; } hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncount = 0; @@ -1287,26 +1286,26 @@ MLI_Matrix *MLI_Method_AMGCR::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ncount++; newColVal[ncount] = newColVal[jcol]; newColInd[ncount] = newColInd[jcol]; - } + } } newRowSize = ncount + 1; for ( jcol = 0; jcol < newRowSize; jcol++ ) newColVal[jcol] = - (DDiagA[irow] * newColVal[jcol]); - + ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); nnz += newRowSize; - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP); ierr += HYPRE_IJMatrixSetObjectType(IJP, -1); ierr += HYPRE_IJMatrixDestroy(IJP); - assert(!ierr); + hypre_assert(!ierr); hypre_ParCSRMatrixDestroy(hypreAD); hypre_ParCSRMatrixDestroy(hypreAD2); } @@ -1346,23 +1345,23 @@ printf("finish parasails\n"); ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, AccStartRow,AccStartRow+AccNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[localNRows]; maxRowLeng = 0; ncount = 0; for (irow = 0; irow < localNRows; irow++) { if (indepSet[irow] == 1) rowLengs[irow] = 1; - else + else { rowLengs[irow] = tPDiagI[ncount+1] - tPDiagI[ncount]; ncount++; } - if (rowLengs[irow] > maxRowLeng) maxRowLeng = rowLengs[irow]; + if (rowLengs[irow] > maxRowLeng) maxRowLeng = rowLengs[irow]; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; fCount = 0; cCount = 0; @@ -1377,14 +1376,14 @@ printf("finish parasails\n"); newColInd[0] = AccStartRow + cCount; newColVal[0] = 1.0; cCount++; - } + } else { newRowSize = 0; for (jcol = tPDiagI[fCount]; jcol < tPDiagI[fCount+1]; jcol++) { - newColInd[newRowSize] = tPDiagJ[jcol] + AccStartRow; - newColVal[newRowSize++] = tPDiagA[jcol]; + newColInd[newRowSize] = tPDiagJ[jcol] + AccStartRow; + newColVal[newRowSize++] = tPDiagA[jcol]; } fCount++; } @@ -1397,7 +1396,7 @@ if (habs(newColVal[jcol]) > dtemp) dtemp = habs(newColVal[jcol]); dtemp *= 0.25; ncount = 0; for (jcol = 0; jcol < newRowSize; jcol++) -if (habs(newColVal[jcol]) > dtemp) +if (habs(newColVal[jcol]) > dtemp) { newColInd[ncount] = newColInd[jcol]; newColVal[ncount++] = newColVal[jcol]; @@ -1421,17 +1420,17 @@ newColVal[jcol] *= dtemp; ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); hypre_ParCSRMatrixDestroy(hypreP); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP); ierr += HYPRE_IJMatrixSetObjectType(IJP, -1); ierr += HYPRE_IJMatrixDestroy(IJP); - assert(!ierr); + hypre_assert(!ierr); /* ------------------------------------------------------ */ /* package the P matrix */ @@ -1449,7 +1448,7 @@ newColVal[jcol] *= dtemp; * create the restriction matrix * --------------------------------------------------------------------- */ -MLI_Matrix *MLI_Method_AMGCR::createRmat(int *indepSet, MLI_Matrix *mli_Amat, +MLI_Matrix *MLI_Method_AMGCR::createRmat(int *indepSet, MLI_Matrix *mli_Amat, MLI_Matrix *mli_Affmat) { int startRow, localNRows, AffStartRow, AffNRows, RStartRow; @@ -1485,12 +1484,12 @@ MLI_Matrix *MLI_Method_AMGCR::createRmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,RStartRow,RStartRow+RNRows-1, startRow,startRow+localNRows-1,&IJR); ierr = HYPRE_IJMatrixSetObjectType(IJR, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[RNRows]; for (irow = 0; irow < RNRows; irow++) rowLengs[irow] = 1; ierr = HYPRE_IJMatrixSetRowSizes(IJR, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJR); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; /* ------------------------------------------------------ */ @@ -1501,7 +1500,7 @@ MLI_Matrix *MLI_Method_AMGCR::createRmat(int *indepSet, MLI_Matrix *mli_Amat, colValue = 1.0; for (irow = 0; irow < localNRows; irow++) { - if (indepSet[irow] == 1) + if (indepSet[irow] == 1) { rowIndex = RStartRow + rowCount; colIndex = startRow + irow; @@ -1516,11 +1515,11 @@ MLI_Matrix *MLI_Method_AMGCR::createRmat(int *indepSet, MLI_Matrix *mli_Amat, /* ------------------------------------------------------ */ ierr = HYPRE_IJMatrixAssemble(IJR); - assert(!ierr); + hypre_assert(!ierr); HYPRE_IJMatrixGetObject(IJR, (void **) &hypreR); ierr += HYPRE_IJMatrixSetObjectType(IJR, -1); ierr += HYPRE_IJMatrixDestroy(IJR); - assert( !ierr ); + hypre_assert( !ierr ); sprintf(paramString, "HYPRE_ParCSR"); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); @@ -1551,11 +1550,11 @@ int MLI_Method_AMGCR::print() printf("\t*** number of trial vectors = %d\n", numVectors_); printf("\t*** polynomial degree = %d\n", PDegree_); printf("\t*** minimum coarse size = %d\n", minCoarseSize_); - printf("\t*** smoother type = %s\n", smoother_); + printf("\t*** smoother type = %s\n", smoother_); printf("\t*** smoother nsweeps = %d\n", smootherNum_); printf("\t*** smoother weight = %e\n", smootherWgts_[0]); - printf("\t*** coarse solver type = %s\n", coarseSolver_); - printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNum_); + printf("\t*** coarse solver type = %s\n", coarseSolver_); + printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNum_); printf("\t********************************************************\n"); } return 0; diff --git a/src/FEI_mv/femli/mli_method_amgrs.cxx b/src/FEI_mv/femli/mli_method_amgrs.cxx index 3e083f780..3fe5ef446 100644 --- a/src/FEI_mv/femli/mli_method_amgrs.cxx +++ b/src/FEI_mv/femli/mli_method_amgrs.cxx @@ -18,7 +18,6 @@ /* #define MLI_USE_HYPRE_MATMATMULT */ #include -#include #include "HYPRE.h" #include "_hypre_parcsr_ls.h" #include "mli_utils.h" @@ -170,7 +169,7 @@ int MLI_Method_AMGRS::setParams(char *in_name, int argc, char *argv[]) useInjectionForR_ = 1; return 0; } - else if ( !strcmp(param1, "setSmoother" ) || + else if ( !strcmp(param1, "setSmoother" ) || !strcmp(param1, "setPreSmoother" )) { sscanf(in_name,"%s %s", param1, param2); @@ -181,7 +180,7 @@ int MLI_Method_AMGRS::setParams(char *in_name, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } nSweeps = *(int *) argv[0]; weights = (double *) argv[1]; return ( setSmoother(param2, nSweeps, weights) ); @@ -206,7 +205,7 @@ int MLI_Method_AMGRS::setParams(char *in_name, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } else if ( strcmp(param2, "SuperLU") ) { nSweeps = *(int *) argv[0]; @@ -230,11 +229,11 @@ int MLI_Method_AMGRS::setParams(char *in_name, int argc, char *argv[]) * generate multilevel structure * --------------------------------------------------------------------- */ -int MLI_Method_AMGRS::setup( MLI *mli ) +int MLI_Method_AMGRS::setup( MLI *mli ) { int k, level, irow, localNRows, mypid, nprocs, startRow; int numNodes, one=1, globalNRows, *coarsePartition; - int *CFMarkers, coarseNRows, *dofArray, *cdofArray=NULL; + int *CFMarkers=NULL, coarseNRows, *dofArray, *cdofArray=NULL; int *reduceArray1, *reduceArray2, *rowLengs, ierr, zeroNRows; int startCol, localNCols, colInd, rowNum; int globalCoarseNRows, numTrials; @@ -285,7 +284,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) /* ------fetch fine grid matrix----------------------------------- */ mli_Amat = mli->getSystemMatrix(level); - assert ( mli_Amat != NULL ); + hypre_assert ( mli_Amat != NULL ); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); startRow = hypre_ParCSRMatrixFirstRowIndex(hypreA); localNRows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(hypreA)); @@ -298,7 +297,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) { printf("\tMLI_Method_AMGRS::setup - nrows not divisible by dof.\n"); printf("\tMLI_Method_AMGRS::setup - revert nodeDOF to 1.\n"); - nodeDOF_ = 1; + nodeDOF_ = 1; numNodes = localNRows / nodeDOF_; } if ( level == 0 ) @@ -341,14 +340,14 @@ int MLI_Method_AMGRS::setup( MLI *mli ) coarsenScheme_, outputLevel_, &CFMarkers); break; case MLI_METHOD_AMGRS_FALGOUT : - hypre_BoomerAMGCoarsenFalgout(hypreS, hypreA, measureType_, + hypre_BoomerAMGCoarsenFalgout(hypreS, hypreA, measureType_, outputLevel_, &CFMarkers); break; case MLI_METHOD_AMGRS_CR : hypre_BoomerAMGCoarsen(hypreS, hypreA, 0, outputLevel_, &CFMarkers); k = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { if (CFMarkers[irow] > 0) {CFMarkers[irow] = 1; k++;} else if (CFMarkers[irow] < 0) CFMarkers[irow] = 0; @@ -358,7 +357,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) mli_Affmat = performCR(mli_Amat,CFMarkers,&mli_Afcmat,numTrials, hypreS2); k = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { if (CFMarkers[irow] > 0) {CFMarkers[irow] = 1; k++;} else if (CFMarkers[irow] <= 0) CFMarkers[irow] = -1; @@ -414,11 +413,11 @@ int MLI_Method_AMGRS::setup( MLI *mli ) } } } - + /* ------ wrap up creating the multigrid hierarchy --------------- */ if ( coarsePartition[nprocs] < minCoarseSize_ || - coarsePartition[nprocs] == globalNRows || zeroNRows == 1 ) + coarsePartition[nprocs] == globalNRows || zeroNRows == 1 ) { if ( symmetric_ == 0 ) { @@ -468,12 +467,12 @@ int MLI_Method_AMGRS::setup( MLI *mli ) //=============================================== #endif { - hypre_BoomerAMGBuildInterp(hypreA, CFMarkers, hypreS, - coarsePartition, nodeDOF_, dofArray, outputLevel_, + hypre_BoomerAMGBuildInterp(hypreA, CFMarkers, hypreS, + coarsePartition, nodeDOF_, dofArray, outputLevel_, truncFactor_, mxelmtsP_, mapStoA, &hypreP); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Pmat = new MLI_Matrix( (void *) hypreP, paramString, funcPtr ); mli->setProlongation(level+1, mli_Pmat); delete funcPtr; @@ -499,32 +498,32 @@ int MLI_Method_AMGRS::setup( MLI *mli ) ierr = HYPRE_IJMatrixCreate(comm, startCol, startCol+localNCols-1, startRow,startRow+localNRows-1,&IJRmat); ierr = HYPRE_IJMatrixSetObjectType(IJRmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[localNCols]; for ( k = 0; k < localNCols; k++ ) rowLengs[k] = 1; ierr = HYPRE_IJMatrixSetRowSizes(IJRmat, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJRmat); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; delete [] reduceArray1; delete [] reduceArray2; k = 0; - for ( irow = 0; irow < localNCols; irow++ ) + for ( irow = 0; irow < localNCols; irow++ ) { - while ( CFMarkers[k] != 1 ) k++; + while ( CFMarkers[k] != 1 ) k++; rowNum = startCol + irow; colInd = k + startRow; - HYPRE_IJMatrixSetValues(IJRmat, 1, &one, (const int *) &rowNum, + HYPRE_IJMatrixSetValues(IJRmat, 1, &one, (const int *) &rowNum, (const int *) &colInd, (const double *) &colVal); k++; } ierr = HYPRE_IJMatrixAssemble(IJRmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJRmat, (void **) &hypreR); hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreR); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Rmat = new MLI_Matrix( (void *) hypreR, paramString, funcPtr ); mli->setRestriction(level, mli_Rmat); delete funcPtr; @@ -533,14 +532,14 @@ int MLI_Method_AMGRS::setup( MLI *mli ) } else if ( symmetric_ == 0 ) { - hypre_BoomerAMGBuildInterp(hypreAT, CFMarkers, hypreST, - coarsePartition, nodeDOF_, dofArray, outputLevel_, + hypre_BoomerAMGBuildInterp(hypreAT, CFMarkers, hypreST, + coarsePartition, nodeDOF_, dofArray, outputLevel_, truncFactor_, mxelmtsP_, mapStoA, &hypreRT); hypreRT->owns_col_starts = 0; hypre_ParCSRMatrixTranspose( hypreRT, &hypreR, one ); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_Rmat = new MLI_Matrix( (void *) hypreR, paramString, funcPtr ); mli->setRestriction(level, mli_Rmat); delete funcPtr; @@ -572,7 +571,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) //{ // rowColStarts = hypre_ParCSRMatrixRowStarts(hypreR); // newRowColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - // for (irow = 0; irow <= nprocs; irow++) + // for (irow = 0; irow <= nprocs; irow++) // newRowColStarts[irow] = rowColStarts[irow]; // hypre_ParCSRMatrixRowStarts(hypreCA) = newRowColStarts; // hypre_ParCSRMatrixOwnsRowStarts(hypreCA) = 1; @@ -581,14 +580,14 @@ int MLI_Method_AMGRS::setup( MLI *mli ) //{ // rowColStarts = hypre_ParCSRMatrixColStarts(hypreAP); // newRowColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - // for (irow = 0; irow <= nprocs; irow++) + // for (irow = 0; irow <= nprocs; irow++) // newRowColStarts[irow] = rowColStarts[irow]; // hypre_ParCSRMatrixColStarts(hypreCA) = newRowColStarts; // hypre_ParCSRMatrixOwnsColStarts(hypreCA) = 1; //} //funcPtr = new MLI_Function(); //MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - //sprintf(paramString, "HYPRE_ParCSR" ); + //sprintf(paramString, "HYPRE_ParCSR" ); //mli_cAmat = new MLI_Matrix((void *) hypreCA, paramString, funcPtr); //delete funcPtr; //hypre_ParCSRMatrixDestroy( hypreR ); @@ -605,7 +604,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) hypre_ParCSRMatrixDestroy( hypreAP ); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSR" ); + sprintf(paramString, "HYPRE_ParCSR" ); mli_cAmat = new MLI_Matrix((void *) hypreCA, paramString, funcPtr); delete funcPtr; #else @@ -617,7 +616,7 @@ int MLI_Method_AMGRS::setup( MLI *mli ) mli->setSystemMatrix(level+1, mli_cAmat); elapsedTime = (MLI_Utils_WTime() - startTime); RAPTime_ += elapsedTime; - if ( mypid == 0 && outputLevel_ > 0 ) + if ( mypid == 0 && outputLevel_ > 0 ) printf("\tRAP computed, time = %e seconds.\n", elapsedTime); /* ------set the smoothers---------------------------------------- */ @@ -683,7 +682,7 @@ int MLI_Method_AMGRS::setOutputLevel( int level ) } /* ********************************************************************* * - * set number of levels + * set number of levels * --------------------------------------------------------------------- */ int MLI_Method_AMGRS::setNumLevels( int nlevels ) @@ -716,7 +715,7 @@ int MLI_Method_AMGRS::setSmoother(char *stype, int num, double *wgt) } /* ********************************************************************* * - * set coarse solver + * set coarse solver * --------------------------------------------------------------------- */ int MLI_Method_AMGRS::setCoarseSolver( char *stype, int num, double *wgt ) @@ -732,8 +731,8 @@ int MLI_Method_AMGRS::setCoarseSolver( char *stype, int num, double *wgt ) delete [] coarseSolverWeights_ ; if ( wgt != NULL && strcmp(coarseSolver_, "SuperLU") ) { - coarseSolverWeights_ = new double[coarseSolverNSweeps_]; - for (i = 0; i < coarseSolverNSweeps_; i++) + coarseSolverWeights_ = new double[coarseSolverNSweeps_]; + for (i = 0; i < coarseSolverNSweeps_; i++) coarseSolverWeights_ [i] = wgt[i]; } else coarseSolverWeights_ = NULL; @@ -741,7 +740,7 @@ int MLI_Method_AMGRS::setCoarseSolver( char *stype, int num, double *wgt ) } /* ********************************************************************* * - * set measure type + * set measure type * --------------------------------------------------------------------- */ int MLI_Method_AMGRS::setMeasureType( int mtype ) @@ -751,7 +750,7 @@ int MLI_Method_AMGRS::setMeasureType( int mtype ) } /* ********************************************************************* * - * set node degree of freedom + * set node degree of freedom * --------------------------------------------------------------------- */ int MLI_Method_AMGRS::setNodeDOF( int dof ) @@ -761,22 +760,22 @@ int MLI_Method_AMGRS::setNodeDOF( int dof ) } /* ********************************************************************* * - * set coarsening scheme + * set coarsening scheme * --------------------------------------------------------------------- */ int MLI_Method_AMGRS::setCoarsenScheme( int scheme ) { - if ( scheme == MLI_METHOD_AMGRS_CLJP ) + if ( scheme == MLI_METHOD_AMGRS_CLJP ) { coarsenScheme_ = MLI_METHOD_AMGRS_CLJP; return 0; } - else if ( scheme == MLI_METHOD_AMGRS_RUGE ) + else if ( scheme == MLI_METHOD_AMGRS_RUGE ) { coarsenScheme_ = MLI_METHOD_AMGRS_RUGE; return 0; } - else if ( scheme == MLI_METHOD_AMGRS_FALGOUT ) + else if ( scheme == MLI_METHOD_AMGRS_FALGOUT ) { coarsenScheme_ = MLI_METHOD_AMGRS_FALGOUT; return 0; @@ -833,10 +832,10 @@ int MLI_Method_AMGRS::print() printf("\t*** symmetric flag = %d\n", symmetric_); printf("\t*** R injection flag = %d\n", useInjectionForR_); printf("\t*** minimum coarse size = %d\n", minCoarseSize_); - printf("\t*** smoother type = %s\n", smoother_); + printf("\t*** smoother type = %s\n", smoother_); printf("\t*** smoother nsweeps = %d\n", smootherNSweeps_); - printf("\t*** coarse solver type = %s\n", coarseSolver_); - printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNSweeps_); + printf("\t*** coarse solver type = %s\n", coarseSolver_); + printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNSweeps_); printf("\t********************************************************\n"); } return 0; @@ -1016,32 +1015,32 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, #if 0 if (numTrials != 1) { - for (irow = 0; irow < localNRows; irow++) indepSet[irow] = 1; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) indepSet[irow] = 1; + for (irow = 0; irow < localNRows; irow++) { if (indepSet[irow] == 1) /* if I am a C-point */ { indepSet[irow] = 0; /* set myself to be a F-pt */ - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { colInd = ADiagJ[jcol]; /* for each of my neighbors */ - if (indepSet[colInd] == 1) /* if it is a C-point */ + if (indepSet[colInd] == 1) /* if it is a C-point */ { /* if I depend strongly on it, leave it as C-pt */ - for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) + for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) { - if (SDiagJ[kcol] == colInd) + if (SDiagJ[kcol] == colInd) { indepSet[colInd] = -1; break; } } /* if I don't depend strongly on it, see if it depends on me*/ - if (kcol == SDiagI[irow+1]) + if (kcol == SDiagI[irow+1]) { - for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++) + for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++) { - if (SDiagJ[kcol] == irow) + if (SDiagJ[kcol] == irow) { indepSet[colInd] = -1; break; @@ -1052,41 +1051,41 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, } } } - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (indepSet[irow] < 0) indepSet[irow] = 1; count = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (indepSet[irow] == 1) count++; /* ------------------------------------------------------ */ /* select second set of fine points */ /* ------------------------------------------------------ */ - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { if (indepSet[irow] == 1) /* if I am a C-point */ { count = 0; - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { colInd = ADiagJ[jcol]; /* for each of my neighbors */ - if (indepSet[colInd] == 0) /* if it is a F-point */ + if (indepSet[colInd] == 0) /* if it is a F-point */ { /* if I depend strongly on it, increment counter */ - for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) + for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) { - if (SDiagJ[kcol] == colInd) + if (SDiagJ[kcol] == colInd) { count++; break; } } /* if I don't depend strongly on it, see if it depends on me*/ - if (kcol == SDiagI[irow+1]) + if (kcol == SDiagI[irow+1]) { - for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++) + for (kcol=SDiagI[colInd]; kcol < SDiagI[colInd+1]; kcol++) { - if (SDiagJ[kcol] == irow) + if (SDiagJ[kcol] == irow) { count++; break; @@ -1099,7 +1098,7 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, } } count = 0; - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) if (indepSet[irow] == 1) count++; } #endif @@ -1150,20 +1149,20 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, FStartRow,FStartRow+FNRows-1,&IJPFF); ierr = HYPRE_IJMatrixSetObjectType(IJPFF, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[localNRows]; for (irow = 0; irow < localNRows; irow++) rowLengs[irow] = 1; ierr = HYPRE_IJMatrixSetRowSizes(IJPFF, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJPFF); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, CStartRow,CStartRow+CNRows-1, &IJPFC); ierr = HYPRE_IJMatrixSetObjectType(IJPFC, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJPFC, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJPFC); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; /* --------------------------------------------------- */ @@ -1175,7 +1174,7 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, for (irow = 0; irow < localNRows; irow++) { rowIndex = startRow + irow; - if (indepSet[irow] == 0) + if (indepSet[irow] == 0) { colIndex = FStartRow + rowCount; HYPRE_IJMatrixSetValues(IJPFF,1,&one,(const int *) &rowIndex, @@ -1191,14 +1190,14 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, } } ierr = HYPRE_IJMatrixAssemble(IJPFF); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPFF, (void **) &hyprePFF); //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFF); sprintf(paramString, "HYPRE_ParCSR" ); mli_PFFMat = new MLI_Matrix((void *)hyprePFF,paramString,NULL); ierr = HYPRE_IJMatrixAssemble(IJPFC); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJPFC, (void **) &hyprePFC); //hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hyprePFC); hypreAPFC = hypre_ParMatmul(hypreA, hyprePFC); @@ -1206,12 +1205,12 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, hypreAfc = hypre_ParMatmul(hyprePFFT, hypreAPFC); rowStarts = hypre_ParCSRMatrixRowStarts(hyprePFFT); newRowStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newRowStarts[irow] = rowStarts[irow]; hypre_ParCSRMatrixRowStarts(hypreAfc) = newRowStarts; colStarts = hypre_ParCSRMatrixColStarts(hypreAPFC); newColStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newColStarts[irow] = colStarts[irow]; hypre_ParCSRMatrixColStarts(hypreAfc) = newColStarts; hypre_ParCSRMatrixOwnsRowStarts(hypreAfc) = 1; @@ -1287,7 +1286,7 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, strcpy(paramString, "relaxWeight"); aratio = 0.0; XData = (double *) hypre_VectorData(hypre_ParVectorLocalVector(hypreX)); - XaccData = (double *) + XaccData = (double *) hypre_VectorData(hypre_ParVectorLocalVector(hypreXacc)); for (iV = 0; iV < numVectors; iV++) { @@ -1321,7 +1320,7 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, ratio1 = habs(XData[irow]); XaccData[irow] = ratio1; if (ratio1 > aratio) aratio = ratio1; - } + } printf("\tTrial %3d : Jacobi norms = %16.8e %16.8e %16.8e\n",iT, rnorm0,rnorm1,aratio); if (rnorm0 > 1.0e-10) aratio = rnorm1 / rnorm0; @@ -1332,36 +1331,36 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, /* --------------------------------------------------- */ /* select fine points */ /* --------------------------------------------------- */ - - if (iV == numVectors) aratio /= (double) numVectors; - if (aratio < targetMu) + + if (iV == numVectors) aratio /= (double) numVectors; + if (aratio < targetMu) { if (stopRefine == 0) { - for (irow = 0; irow < localNRows; irow++) + for (irow = 0; irow < localNRows; irow++) { if (indepSet[irow] == 1) /* if I am a C-point */ { count = 0; - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { colInd = ADiagJ[jcol]; /* for each of my neighbors */ - if (indepSet[colInd] == 0) /* if it is a F-point */ + if (indepSet[colInd] == 0) /* if it is a F-point */ { /* if I depend strongly on it, increment counter */ - for (kcol= SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) + for (kcol= SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) { - if (SDiagJ[kcol] == colInd) + if (SDiagJ[kcol] == colInd) { count++; break; } } - if (kcol == SDiagI[irow+1]) + if (kcol == SDiagI[irow+1]) { for (kcol=SDiagI[colInd];kcol 0.1)) @@ -1391,24 +1390,24 @@ MLI_Matrix *MLI_Method_AMGRS::performCR(MLI_Matrix *mli_Amat, int *indepSet, aratio = targetMu; //stopRefine = 1; indepSet[irow] = 1; /* set it to a coarse point */ - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { colInd = ADiagJ[jcol]; - if (indepSet[colInd] == 0) /* if it is a F-point */ + if (indepSet[colInd] == 0) /* if it is a F-point */ { - for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) + for (kcol = SDiagI[irow]; kcol < SDiagI[irow+1]; kcol++) { - if (SDiagJ[kcol] == colInd) + if (SDiagJ[kcol] == colInd) { indepSet[colInd] = -1; break; } } - if (kcol == SDiagI[irow+1]) + if (kcol == SDiagI[irow+1]) { - for (kcol=SDiagI[colInd];kcol= 0.0) { for (jcol = 0; jcol < rowSize; jcol++) - if (colInd[jcol] != rowIndex && - (indepSet[colInd[jcol]-startRow] == 0) && - colVal[jcol] > 0.0) + if (colInd[jcol] != rowIndex && + (indepSet[colInd[jcol]-startRow] == 0) && + colVal[jcol] > 0.0) colValue += colVal[jcol]; } else { for (jcol = 0; jcol < rowSize; jcol++) - if (colInd[jcol] != rowIndex && - (indepSet[colInd[jcol]-startRow] == 0) && - colVal[jcol] < 0.0) + if (colInd[jcol] != rowIndex && + (indepSet[colInd[jcol]-startRow] == 0) && + colVal[jcol] < 0.0) colValue += colVal[jcol]; } colValue = 1.0 / colValue; @@ -1559,7 +1558,7 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, HYPRE_IJMatrixSetValues(IJInvD,1,&one,(const int *) &colIndex, (const int *) &colIndex, (const double *) &colValue); rowCount++; - HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, + HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) hypreA, rowIndex, &rowSize, &colInd, &colVal); } } @@ -1569,11 +1568,11 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, /* ------------------------------------------------------ */ ierr = HYPRE_IJMatrixAssemble(IJInvD); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJInvD, (void **) &hypreInvD); ierr += HYPRE_IJMatrixSetObjectType(IJInvD, -1); ierr += HYPRE_IJMatrixDestroy(IJInvD); - assert( !ierr ); + hypre_assert( !ierr ); /* ------------------------------------------------------ */ /* generate polynomial of Aff and invD */ @@ -1587,8 +1586,8 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ADiagI = hypre_CSRMatrixI(ADiag); ADiagJ = hypre_CSRMatrixJ(ADiag); ADiagA = hypre_CSRMatrixData(ADiag); - for (irow = 0; irow < AffNRows; irow++) - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (irow = 0; irow < AffNRows; irow++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) ADiagA[jcol] = - ADiagA[jcol]; } else if (PDegree == 1) @@ -1601,11 +1600,11 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ADiagI = hypre_CSRMatrixI(ADiag); ADiagJ = hypre_CSRMatrixJ(ADiag); ADiagA = hypre_CSRMatrixData(ADiag); - for (irow = 0; irow < AffNRows; irow++) + for (irow = 0; irow < AffNRows; irow++) { - for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) + for (jcol = ADiagI[irow]; jcol < ADiagI[irow+1]; jcol++) { - if (ADiagJ[jcol] == irow) + if (ADiagJ[jcol] == irow) ADiagA[jcol] = - omega*DDiagA[irow]*(2.0-omega*ADiagA[jcol]); else ADiagA[jcol] = omega * omega * DDiagA[irow] * ADiagA[jcol]; } @@ -1613,14 +1612,14 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, hypre_ParCSRMatrixOwnsColStarts(hypreInvD) = 0; rowStarts = hypre_ParCSRMatrixRowStarts(hypreA); newRowStarts = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); - for (irow = 0; irow <= nprocs; irow++) + for (irow = 0; irow <= nprocs; irow++) newRowStarts[irow] = rowStarts[irow]; hypre_ParCSRMatrixRowStarts(hypreP) = newRowStarts; #else ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1, AffStartRow,AffStartRow+AffNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[AffNRows]; maxRowLeng = 0; ADiag = hypre_ParCSRMatrixDiag(hypreAff); @@ -1636,11 +1635,11 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0) newRowSize++; rowLengs[irow] = newRowSize; - if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; + if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; newColInd = new int[maxRowLeng]; newColVal = new double[maxRowLeng]; @@ -1658,7 +1657,7 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, { if (ADiagJ[jcol] != irow && ADiagA[jcol]*ADiagA[index] < 0.0) { - newColInd[newRowSize] = AffStartRow + ADiagJ[jcol]; + newColInd[newRowSize] = AffStartRow + ADiagJ[jcol]; newColVal[newRowSize++] = ADiagA[jcol]; } else @@ -1673,12 +1672,12 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreAD); hypreP = hypre_ParMatmul(hypreAD, hypreInvD); hypre_ParCSRMatrixOwnsRowStarts(hypreP) = 1; @@ -1705,17 +1704,17 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,AffStartRow,AffStartRow+AffNRows-1, AffStartRow,AffStartRow+AffNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[AffNRows]; maxRowLeng = 0; for (irow = 0; irow < AffNRows; irow++) { newRowSize = 0; for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++) - newColInd[newRowSize] = ADDiagJ[jcol]; + newColInd[newRowSize] = ADDiagJ[jcol]; for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++) - newColInd[newRowSize] = AD2DiagJ[jcol]; - if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; + newColInd[newRowSize] = AD2DiagJ[jcol]; + if (newRowSize > maxRowLeng) maxRowLeng = newRowSize; hypre_qsort0(newColInd, 0, newRowSize-1); ncount = 0; for ( jcol = 0; jcol < newRowSize; jcol++ ) @@ -1724,14 +1723,14 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, { ncount++; newColInd[ncount] = newColInd[jcol]; - } + } } newRowSize = ncount + 1; rowLengs[irow] = newRowSize; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; nnz = 0; for (irow = 0; irow < AffNRows; irow++) @@ -1740,16 +1739,16 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, newRowSize = 0; for (jcol = ADDiagI[irow]; jcol < ADDiagI[irow+1]; jcol++) { - newColInd[newRowSize] = ADDiagJ[jcol]; - if (ADDiagJ[jcol] == irow) - newColVal[newRowSize++] = 3.0 * (1.0 - ADDiagA[jcol]); + newColInd[newRowSize] = ADDiagJ[jcol]; + if (ADDiagJ[jcol] == irow) + newColVal[newRowSize++] = 3.0 * (1.0 - ADDiagA[jcol]); else - newColVal[newRowSize++] = - 3.0 * ADDiagA[jcol]; + newColVal[newRowSize++] = - 3.0 * ADDiagA[jcol]; } for (jcol = AD2DiagI[irow]; jcol < AD2DiagI[irow+1]; jcol++) { - newColInd[newRowSize] = AD2DiagJ[jcol]; - newColVal[newRowSize++] = AD2DiagA[jcol]; + newColInd[newRowSize] = AD2DiagJ[jcol]; + newColVal[newRowSize++] = AD2DiagA[jcol]; } hypre_qsort1(newColInd, newColVal, 0, newRowSize-1); ncount = 0; @@ -1762,26 +1761,26 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ncount++; newColVal[ncount] = newColVal[jcol]; newColInd[ncount] = newColInd[jcol]; - } + } } newRowSize = ncount + 1; for ( jcol = 0; jcol < newRowSize; jcol++ ) newColVal[jcol] = - (DDiagA[irow] * newColVal[jcol]); - + ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); nnz += newRowSize; - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP); ierr += HYPRE_IJMatrixSetObjectType(IJP, -1); ierr += HYPRE_IJMatrixDestroy(IJP); - assert(!ierr); + hypre_assert(!ierr); hypre_ParCSRMatrixDestroy(hypreAD); hypre_ParCSRMatrixDestroy(hypreAD2); } @@ -1808,23 +1807,23 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixCreate(comm,startRow,startRow+localNRows-1, AccStartRow,AccStartRow+AccNRows-1,&IJP); ierr = HYPRE_IJMatrixSetObjectType(IJP, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); rowLengs = new int[localNRows]; maxRowLeng = 0; ncount = 0; for (irow = 0; irow < localNRows; irow++) { if (indepSet[irow] == 1) rowLengs[irow] = 1; - else + else { rowLengs[irow] = tPDiagI[ncount+1] - tPDiagI[ncount]; ncount++; } - if (rowLengs[irow] > maxRowLeng) maxRowLeng = rowLengs[irow]; + if (rowLengs[irow] > maxRowLeng) maxRowLeng = rowLengs[irow]; } ierr = HYPRE_IJMatrixSetRowSizes(IJP, rowLengs); ierr = HYPRE_IJMatrixInitialize(IJP); - assert(!ierr); + hypre_assert(!ierr); delete [] rowLengs; fCount = 0; cCount = 0; @@ -1839,14 +1838,14 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, newColInd[0] = AccStartRow + cCount; newColVal[0] = 1.0; cCount++; - } + } else { newRowSize = 0; for (jcol = tPDiagI[fCount]; jcol < tPDiagI[fCount+1]; jcol++) { - newColInd[newRowSize] = tPDiagJ[jcol] + AccStartRow; - newColVal[newRowSize++] = tPDiagA[jcol]; + newColInd[newRowSize] = tPDiagJ[jcol] + AccStartRow; + newColVal[newRowSize++] = tPDiagA[jcol]; } fCount++; } @@ -1860,7 +1859,7 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ncount = 0; for (jcol = 0; jcol < newRowSize; jcol++) { - if (habs(newColVal[jcol]) > dtemp) + if (habs(newColVal[jcol]) > dtemp) { newColInd[ncount] = newColInd[jcol]; newColVal[ncount++] = newColVal[jcol]; @@ -1878,17 +1877,17 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, ierr = HYPRE_IJMatrixSetValues(IJP, 1, &newRowSize, (const int *) &rowIndex, (const int *) newColInd, (const double *) newColVal); - assert(!ierr); + hypre_assert(!ierr); } delete [] newColInd; delete [] newColVal; ierr = HYPRE_IJMatrixAssemble(IJP); - assert( !ierr ); + hypre_assert( !ierr ); hypre_ParCSRMatrixDestroy(hypreP); HYPRE_IJMatrixGetObject(IJP, (void **) &hypreP); ierr += HYPRE_IJMatrixSetObjectType(IJP, -1); ierr += HYPRE_IJMatrixDestroy(IJP); - assert(!ierr); + hypre_assert(!ierr); /* ------------------------------------------------------ */ /* package the P matrix */ @@ -1901,4 +1900,3 @@ MLI_Matrix *MLI_Method_AMGRS::createPmat(int *indepSet, MLI_Matrix *mli_Amat, delete funcPtr; return mli_Pmat; } - diff --git a/src/FEI_mv/femli/mli_method_amgsa.cxx b/src/FEI_mv/femli/mli_method_amgsa.cxx index 5b5f1a7d7..b3f00bae5 100644 --- a/src/FEI_mv/femli/mli_method_amgsa.cxx +++ b/src/FEI_mv/femli/mli_method_amgsa.cxx @@ -12,7 +12,6 @@ #endif #include -#include #include "HYPRE.h" #include "HYPRE_IJ_mv.h" #include "mli_utils.h" @@ -24,15 +23,15 @@ #define MABS(x) (((x) > 0) ? (x) : -(x)) /* ********************************************************************* * - * functions external to MLI + * functions external to MLI * --------------------------------------------------------------------- */ #ifdef MLI_ARPACK extern "C" { /* ARPACK function to compute eigenvalues/eigenvectors */ - void dnstev_(int *n, int *nev, char *which, double *sigmar, - double *sigmai, int *colptr, int *rowind, double *nzvals, + void dnstev_(int *n, int *nev, char *which, double *sigmar, + double *sigmai, int *colptr, int *rowind, double *nzvals, double *dr, double *di, double *z, int *ldz, int *info, double *tol); } @@ -69,7 +68,7 @@ MLI_Method_AMGSA::MLI_Method_AMGSA( MPI_Comm comm ) : MLI_Method( comm ) saData_ = new int*[40]; /* node to aggregate data */ saDataAux_ = NULL; spectralNorms_ = new double[40]; /* calculated max eigen */ - for ( int i = 0; i < 40; i++ ) + for ( int i = 0; i < 40; i++ ) { saCounts_[i] = 0; saData_[i] = NULL; @@ -146,7 +145,7 @@ MLI_Method_AMGSA::~MLI_Method_AMGSA() if ( preSmootherWgt_ != NULL ) delete [] preSmootherWgt_; if ( postSmootherWgt_ != NULL ) delete [] postSmootherWgt_; if ( coarseSolverWgt_ != NULL ) delete [] coarseSolverWgt_ ; - if ( ddObj_!= NULL ) + if ( ddObj_!= NULL ) { if ( ddObj_->sendProcs != NULL ) delete [] ddObj_->sendProcs; if ( ddObj_->recvProcs != NULL ) delete [] ddObj_->recvProcs; @@ -157,12 +156,12 @@ MLI_Method_AMGSA::~MLI_Method_AMGSA() if ( ddObj_->SNodeEqnList != NULL ) delete [] ddObj_->SNodeEqnList; delete ddObj_; } - if ( ARPACKSuperLUExists_ ) + if ( ARPACKSuperLUExists_ ) { strcpy( paramString, "destroy" ); #ifdef MLI_ARPACK int info; - dnstev_(NULL, NULL, paramString, NULL, NULL, NULL, NULL, NULL, NULL, + dnstev_(NULL, NULL, paramString, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &info, &arpackTol_); #endif } @@ -185,7 +184,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) comm = getComm(); MPI_Comm_rank( comm, &mypid ); sscanf(in_name, "%s", param1); - if ( outputLevel_ > 1 && mypid == 0 ) + if ( outputLevel_ > 1 && mypid == 0 ) printf("\tMLI_Method_AMGSA::setParam = %s\n", in_name); if ( !strcmp(param1, "setOutputLevel" )) { @@ -224,7 +223,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) return ( setCoarsenScheme( MLI_METHOD_AMGSA_LOCAL ) ); else if ( !strcmp(param2, "hybrid" ) ) return ( setCoarsenScheme( MLI_METHOD_AMGSA_HYBRID ) ); - else + else { printf("MLI_Method_AMGSA::setParams ERROR : setCoarsenScheme not"); printf(" valid. Valid options are : local \n"); @@ -286,7 +285,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[2] : total degree of freedom \n"); printf(" argument[3] : aggregate information \n"); return 1; - } + } level = *(int *) argv[0]; nAggr = *(int *) argv[1]; length = *(int *) argv[2]; @@ -308,7 +307,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } prePost = MLI_SMOOTHER_PRE; nSweeps = *(int *) argv[0]; weights = (double *) argv[1]; @@ -324,7 +323,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } prePost = MLI_SMOOTHER_POST; nSweeps = *(int *) argv[0]; weights = (double *) argv[1]; @@ -350,7 +349,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[0] : number of relaxation sweeps \n"); printf(" argument[1] : relaxation weights\n"); return 1; - } + } else if ( strcmp(param2, "SuperLU") ) { nSweeps = *(int *) argv[0]; @@ -374,7 +373,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[2] : null space information \n"); printf(" argument[3] : vector length \n"); return 1; - } + } nDOF = *(int *) argv[0]; numNS = *(int *) argv[1]; nullspace = (double *) argv[2]; @@ -389,7 +388,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" 1 argument.\n"); printf(" argument[0] : adjustment vectors \n"); return 1; - } + } nsAdjust = (double *) argv[0]; return ( adjustNullSpace( nsAdjust ) ); } @@ -403,7 +402,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[1] : equation number offset \n"); printf(" argument[2] : list of equation numbers \n"); return 1; - } + } length = *(int *) argv[0]; offset = *(int *) argv[1]; indices = (int *) argv[2]; @@ -422,7 +421,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[4] : null space dimension \n"); printf(" argument[5] : scalings (can be null) \n"); return 1; - } + } nnodes = *(int *) argv[0]; nDOF = *(int *) argv[1]; nsDim = *(int *) argv[2]; @@ -441,11 +440,11 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) printf(" argument[1] : level number \n"); printf(" argument[2] : label information \n"); return 1; - } + } length = *(int *) argv[0]; level = *(int *) argv[1]; labels = (int *) argv[2]; - if ( saLabels_ == NULL ) + if ( saLabels_ == NULL ) { saLabels_ = new int*[maxLevels_]; for ( is = 0; is < maxLevels_; is++ ) saLabels_[is] = NULL; @@ -468,7 +467,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) else if ( !strcmp(param1, "setParamFile" )) { param3 = (char *) argv[0]; - strcpy( paramFile_, param3 ); + strcpy( paramFile_, param3 ); return 0; } else if ( !strcmp(param1, "printNodalCoord" )) @@ -500,7 +499,7 @@ int MLI_Method_AMGSA::setParams(char *in_name, int argc, char *argv[]) } /***************************************************************************** - * get parameters + * get parameters *--------------------------------------------------------------------------*/ int MLI_Method_AMGSA::getParams(char *in_name, int *argc, char *argv[]) @@ -535,7 +534,7 @@ int MLI_Method_AMGSA::getParams(char *in_name, int *argc, char *argv[]) * generate multilevel structure * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setup( MLI *mli ) +int MLI_Method_AMGSA::setup( MLI *mli ) { int level, mypid, nRows, nullspaceDimKeep, ii, jj; double startTime, elapsedTime, maxEigen, maxEigenT, dtemp=0.0; @@ -550,7 +549,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) #define DEBUG #ifdef DEBUG - int *partition, ANRows, AStart, AEnd; + int *partition, ANRows, AStart, AEnd; double *XData, rnorm; HYPRE_IJVector IJX, IJY; hypre_ParCSRMatrix *hypreA; @@ -603,11 +602,11 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* --------------------------------------------------------------- */ #if 1 - if (useSAMGDDFlag_ == 2) + if (useSAMGDDFlag_ == 2) { return(setupExtendedDomainDecomp(mli)); } - if (useSAMGDDFlag_ == 3) + if (useSAMGDDFlag_ == 3) { return(setupExtendedDomainDecomp2(mli)); } @@ -633,7 +632,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* matrices */ /* --------------------------------------------------------------- */ - if (useSAMGeFlag_) + if (useSAMGeFlag_) { level = 0; fedata = mli->getFEData( level ); @@ -651,7 +650,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* subdomains */ /* --------------------------------------------------------------- */ - if (useSAMGDDFlag_ == 1) + if (useSAMGDDFlag_ == 1) { level = 0; fedata = mli->getFEData( level ); @@ -685,13 +684,13 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* --------------------------------------------------------------- */ if (calibrationSize_ > 0) return(setupCalibration(mli)); - + /* --------------------------------------------------------------- */ /* if no null spaces have been provided nor computed, set null */ /* space dimension equal to node degree of freedom */ /* --------------------------------------------------------------- */ - if (nullspaceDim_ != nodeDofs_ && nullspaceVec_ == NULL + if (nullspaceDim_ != nodeDofs_ && nullspaceVec_ == NULL && numSmoothVec_ == 0) nullspaceDim_ = nodeDofs_; @@ -709,7 +708,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) #if HAVE_LOBPCG relaxNullSpaces(mli_Amat); #endif - + for (level = 0; level < numLevels_; level++ ) { if (mypid == 0 && outputLevel_ > 0) @@ -726,7 +725,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* -------------------------------------------------- */ mli_Amat = mli->getSystemMatrix(level); - assert (mli_Amat != NULL); + hypre_assert (mli_Amat != NULL); /* -------------------------------------------------- */ /* perform coarsening */ @@ -736,16 +735,16 @@ int MLI_Method_AMGSA::setup( MLI *mli ) { case MLI_METHOD_AMGSA_LOCAL : if (level == 0) - maxEigen = genP(mli_Amat,&mli_Pmat,saCounts_[0],saData_[0]); + maxEigen = genP(mli_Amat,&mli_Pmat,saCounts_[0],saData_[0]); else - maxEigen = genP(mli_Amat, &mli_Pmat, 0, NULL); + maxEigen = genP(mli_Amat, &mli_Pmat, 0, NULL); break; case MLI_METHOD_AMGSA_HYBRID : if (level == 0) - maxEigen = genP(mli_Amat,&mli_Pmat,saCounts_[0],saData_[0]); + maxEigen = genP(mli_Amat,&mli_Pmat,saCounts_[0],saData_[0]); else - maxEigen = genP(mli_Amat, &mli_Pmat, 0, NULL); + maxEigen = genP(mli_Amat, &mli_Pmat, 0, NULL); break; } if (maxEigen != 0.0) spectralNorms_[level] = maxEigen; @@ -761,7 +760,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) mli->setSystemMatrix(level+1, mli_cAmat); elapsedTime = (MLI_Utils_WTime() - startTime); RAPTime_ += elapsedTime; - if (mypid == 0 && outputLevel_ > 0) + if (mypid == 0 && outputLevel_ > 0) printf("\tRAP computed, time = %e seconds.\n", elapsedTime); #if 0 @@ -787,16 +786,16 @@ int MLI_Method_AMGSA::setup( MLI *mli ) switch (coarsenScheme_) { case MLI_METHOD_AMGSA_LOCAL : - maxEigenT = genP(mli_ATmat, &mli_Rmat, saCounts_[level], - saData_[level]); - if ( maxEigenT < 0.0 ) + maxEigenT = genP(mli_ATmat, &mli_Rmat, saCounts_[level], + saData_[level]); + if ( maxEigenT < 0.0 ) printf("MLI_Method_AMGSA::setup ERROR : maxEigenT < 0.\n"); break; case MLI_METHOD_AMGSA_HYBRID : - maxEigenT = genP(mli_ATmat, &mli_Rmat, saCounts_[level], - saData_[level]); - if ( maxEigenT < 0.0 ) + maxEigenT = genP(mli_ATmat, &mli_Rmat, saCounts_[level], + saData_[level]); + if ( maxEigenT < 0.0 ) printf("MLI_Method_AMGSA::setup ERROR : maxEigenT < 0.\n"); break; } @@ -806,7 +805,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) sprintf(paramString, "HYPRE_ParCSRT"); funcPtr = new MLI_Function(); MLI_Utils_HypreParCSRMatrixGetDestroyFunc(funcPtr); - sprintf(paramString, "HYPRE_ParCSRT" ); + sprintf(paramString, "HYPRE_ParCSRT" ); mli_Rmat = new MLI_Matrix( (void *) hypreRT, paramString, funcPtr ); delete funcPtr; } @@ -822,7 +821,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* is the coarsest grid. So quit. */ /* -------------------------------------------------- */ - if (spectralNorms_[level] == 1.0e39) + if (spectralNorms_[level] == 1.0e39) { spectralNorms_[level] = 0.0; level++; @@ -839,7 +838,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) /* perform special treatment.) */ /* -------------------------------------------------- */ - if ( useSAMGDDFlag_ == 1 && numLevels_ == 2 && + if ( useSAMGDDFlag_ == 1 && numLevels_ == 2 && !strcmp(preSmoother_, "ARPACKSuperLU") ) { setupFEDataBasedSuperLUSmoother(mli, level); @@ -857,14 +856,14 @@ int MLI_Method_AMGSA::setup( MLI *mli ) #endif continue; } - else if ( useSAMGDDFlag_ == 1 && numLevels_ == 2 && + else if ( useSAMGDDFlag_ == 1 && numLevels_ == 2 && !strcmp(preSmoother_, "SeqSuperLU") && saDataAux_ != NULL) { smootherPtr = MLI_Solver_CreateFromName(preSmoother_); sprintf( paramString, "setSubProblems" ); - targv[0] = (char *) &(saDataAux_[0][0]); - targv[1] = (char *) &(saDataAux_[0][1]); - targv[2] = (char *) &(saDataAux_[1]); + targv[0] = (char *) &(saDataAux_[0][0]); + targv[1] = (char *) &(saDataAux_[0][1]); + targv[2] = (char *) &(saDataAux_[1]); smootherPtr->setParams(paramString, 3, targv); smootherPtr->setup(mli_Amat); mli->setSmoother(level, MLI_SMOOTHER_PRE, smootherPtr); @@ -877,12 +876,12 @@ int MLI_Method_AMGSA::setup( MLI *mli ) targv[1] = (char *) preSmootherWgt_; sprintf( paramString, "relaxWeight" ); smootherPtr->setParams(paramString, 2, targv); - if ( !strcmp(preSmoother_, "Jacobi") ) + if ( !strcmp(preSmoother_, "Jacobi") ) { sprintf( paramString, "setModifiedDiag" ); smootherPtr->setParams(paramString, 0, NULL); } - if ( !strcmp(preSmoother_, "MLS") ) + if ( !strcmp(preSmoother_, "MLS") ) { sprintf( paramString, "maxEigen" ); targv[0] = (char *) &maxEigen; @@ -908,7 +907,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) targv[1] = (char *) postSmootherWgt_; sprintf( paramString, "relaxWeight" ); smootherPtr->setParams(paramString, 2, targv); - if ( !strcmp(postSmoother_, "MLS") ) + if ( !strcmp(postSmoother_, "MLS") ) { sprintf( paramString, "maxEigen" ); targv[0] = (char *) &maxEigen; @@ -941,7 +940,7 @@ int MLI_Method_AMGSA::setup( MLI *mli ) if (nRows > 10000) { if ( outputLevel_ > 1 && mypid == 0 ) - printf("ML_Method_AMGSA::message - nCoarse too large => GMRESSGS.\n"); + printf("ML_Method_AMGSA::message - nCoarse too large => GMRESSGS.\n"); strcpy(coarseSolver_, "GMRESSGS"); csolvePtr = MLI_Solver_CreateFromName( coarseSolver_ ); sprintf(paramString, "maxIterations %d", coarseSolverNum_); @@ -993,7 +992,7 @@ int MLI_Method_AMGSA::setOutputLevel( int level ) } /* ********************************************************************* * - * set number of levels + * set number of levels * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::setNumLevels( int nlevels ) @@ -1006,7 +1005,7 @@ int MLI_Method_AMGSA::setNumLevels( int nlevels ) * set smoother * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setSmoother(int prePost, char *stype, int num, +int MLI_Method_AMGSA::setSmoother(int prePost, char *stype, int num, double *wgt) { int i; @@ -1047,7 +1046,7 @@ int MLI_Method_AMGSA::setSmoother(int prePost, char *stype, int num, } /* ********************************************************************* * - * set coarse solver + * set coarse solver * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::setCoarseSolver( char *stype, int num, double *wgt ) @@ -1063,7 +1062,7 @@ int MLI_Method_AMGSA::setCoarseSolver( char *stype, int num, double *wgt ) delete [] coarseSolverWgt_ ; if ( wgt != NULL && strcmp(coarseSolver_, "SuperLU") ) { - coarseSolverWgt_ = new double[coarseSolverNum_]; + coarseSolverWgt_ = new double[coarseSolverNum_]; for (i = 0; i < coarseSolverNum_; i++) coarseSolverWgt_ [i] = wgt[i]; } else coarseSolverWgt_ = NULL; @@ -1071,17 +1070,17 @@ int MLI_Method_AMGSA::setCoarseSolver( char *stype, int num, double *wgt ) } /* ********************************************************************* * - * set coarsening scheme + * set coarsening scheme * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::setCoarsenScheme( int scheme ) { - if ( scheme == MLI_METHOD_AMGSA_LOCAL ) + if ( scheme == MLI_METHOD_AMGSA_LOCAL ) { coarsenScheme_ = MLI_METHOD_AMGSA_LOCAL; return 0; } - else if ( scheme == MLI_METHOD_AMGSA_HYBRID ) + else if ( scheme == MLI_METHOD_AMGSA_HYBRID ) { coarsenScheme_ = MLI_METHOD_AMGSA_HYBRID; return 0; @@ -1175,11 +1174,11 @@ int MLI_Method_AMGSA::setCalcSpectralNorm() } /* ********************************************************************* * - * load the initial aggregate information + * load the initial aggregate information * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::setAggregateInfo(int level, int aggrCnt, int length, - int *aggrInfo) + int *aggrInfo) { if ( level != 0 ) { @@ -1198,8 +1197,8 @@ int MLI_Method_AMGSA::setAggregateInfo(int level, int aggrCnt, int length, * load the null space * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setNullSpace( int nDOF, int ndim, double *nullvec, - int length ) +int MLI_Method_AMGSA::setNullSpace( int nDOF, int ndim, double *nullvec, + int length ) { #if 0 if ( (nullvec == NULL) && (nDOF != ndim) ) @@ -1266,7 +1265,7 @@ int MLI_Method_AMGSA::resetNullSpaceComponents(int length, int start, * (abridged from similar function in ML) * --------------------------------------------------------------------- */ -int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, +int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, double *coords, int numNS, double *scalings) { int i, j, k, offset, voffset, mypid; @@ -1302,12 +1301,12 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, if ((printToFile_ & 2) != 0 && nodeDofs_ == 3 ) { - sprintf(fname, "nodalCoord.%d", mypid); + sprintf(fname, "nodalCoord.%d", mypid); fp = fopen(fname, "w"); fprintf(fp, "%d\n", num_nodes); - for ( i = 0 ; i < num_nodes; i++ ) + for ( i = 0 ; i < num_nodes; i++ ) { - for ( j = 0 ; j < nodeDofs_; j++ ) + for ( j = 0 ; j < nodeDofs_; j++ ) fprintf(fp," %25.16e", coords[i*nodeDofs_+j]); fprintf(fp,"\n"); } @@ -1317,14 +1316,14 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, nullspaceVec_ = new double[nullspaceLen_ * nullspaceDim_]; for( i = 0 ; i < nullspaceLen_*nullspaceDim_; i++ ) nullspaceVec_[i] = 0.0; - for( i = 0 ; i < num_nodes; i++ ) + for( i = 0 ; i < num_nodes; i++ ) { if ( nodeDofs_ == 1 ) { for( k = 0; k < nsDim; k++ ) nullspaceVec_[k*nullspaceLen_+i] = 0.0; nullspaceVec_[i] = 1.0; - if ( nullspaceDim_ == 4 ) + if ( nullspaceDim_ == 4 ) { for( k = 0; k < nsDim; k++ ) nullspaceVec_[(k+1)*nullspaceLen_+i] = coords[i*nsDim+k]; @@ -1332,7 +1331,7 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, } else if ( nodeDofs_ == 3 ) { - if ( nullspaceDim_ == 6 ) + if ( nullspaceDim_ == 6 ) { voffset = i * nodeDofs_; for ( j = 0; j < 3; j++ ) @@ -1350,7 +1349,7 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, { offset = k * nullspaceLen_ + voffset + j; if ( j == k-3 ) nullspaceVec_[offset] = 0.0; - else + else { if (j+k == 4) nullspaceVec_[offset] = coords[i*3+2]; else if (j+k == 5) nullspaceVec_[offset] = coords[i*3+1]; @@ -1359,11 +1358,11 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, } } } - j = 0; k = 5; offset = k * nullspaceLen_ + voffset + j; + j = 0; k = 5; offset = k * nullspaceLen_ + voffset + j; nullspaceVec_[offset] *= -1.0; - j = 1; k = 3; offset = k * nullspaceLen_ + voffset + j; + j = 1; k = 3; offset = k * nullspaceLen_ + voffset + j; nullspaceVec_[offset] *= -1.0; - j = 2; k = 4; offset = k * nullspaceLen_ + voffset + j; + j = 2; k = 4; offset = k * nullspaceLen_ + voffset + j; nullspaceVec_[offset] *= -1.0; } else if ( (nullspaceDim_ == 12 || nullspaceDim_ == 21 || nullspaceDim_ == 24) && @@ -1444,15 +1443,15 @@ int MLI_Method_AMGSA::setNodalCoordinates(int num_nodes,int nDOF,int nsDim, } if ( scalings != NULL ) { - for ( i = 0 ; i < nullspaceDim_; i++ ) - for ( j = 0 ; j < nullspaceLen_; j++ ) + for ( i = 0 ; i < nullspaceDim_; i++ ) + for ( j = 0 ; j < nullspaceLen_; j++ ) nullspaceVec_[i*nullspaceLen_+j] /= scalings[j]; } return 0; } /* ********************************************************************* * - * set parameter for calibration AMG + * set parameter for calibration AMG * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::setCalibrationSize( int size ) @@ -1487,12 +1486,12 @@ int MLI_Method_AMGSA::print() printf("\t*** drop tolerance for P = %e\n", dropTolForP_); printf("\t*** A-norm scheme = %d\n", calcNormScheme_); printf("\t*** minimum coarse size = %d\n", minCoarseSize_); - printf("\t*** pre smoother type = %s\n", preSmoother_); + printf("\t*** pre smoother type = %s\n", preSmoother_); printf("\t*** pre smoother nsweeps = %d\n", preSmootherNum_); - printf("\t*** post smoother type = %s\n", postSmoother_); + printf("\t*** post smoother type = %s\n", postSmoother_); printf("\t*** post smoother nsweeps = %d\n", postSmootherNum_); - printf("\t*** coarse solver type = %s\n", coarseSolver_); - printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNum_); + printf("\t*** coarse solver type = %s\n", coarseSolver_); + printf("\t*** coarse solver nsweeps = %d\n", coarseSolverNum_); printf("\t*** calibration size = %d\n", calibrationSize_); printf("\t********************************************************\n"); } @@ -1565,7 +1564,7 @@ int MLI_Method_AMGSA::printStatistics(MLI *mli) printf("\t*%3d %10d %5d %5d %11d %8.3e %8.3e *\n",level, globalNRows, maxNnz, minNnz, thisNnz, maxVal, minVal); } - if ( level == 0 ) + if ( level == 0 ) { fineNnz = thisNnz; dfineNnz = dthisNnz; @@ -1631,7 +1630,7 @@ int MLI_Method_AMGSA::printStatistics(MLI *mli) * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::getNullSpace(int &nDOF,int &ndim,double *&nullvec, - int &leng) + int &leng) { nDOF = currNodeDofs_; ndim = nullspaceDim_; @@ -1654,12 +1653,12 @@ int MLI_Method_AMGSA::copy( MLI_Method *new_obj ) new_amgsa->maxLevels_ = maxLevels_; new_amgsa->setOutputLevel( outputLevel_ ); new_amgsa->setNumLevels( numLevels_ ); - new_amgsa->setSmoother( MLI_SMOOTHER_PRE, preSmoother_, + new_amgsa->setSmoother( MLI_SMOOTHER_PRE, preSmoother_, preSmootherNum_, preSmootherWgt_ ); - new_amgsa->setSmoother( MLI_SMOOTHER_POST, postSmoother_, + new_amgsa->setSmoother( MLI_SMOOTHER_POST, postSmoother_, postSmootherNum_, postSmootherWgt_ ); new_amgsa->setCoarseSolver(coarseSolver_,coarseSolverNum_, - coarseSolverWgt_ ); + coarseSolverWgt_ ); new_amgsa->setCoarsenScheme( coarsenScheme_ ); new_amgsa->setMinCoarseSize( minCoarseSize_ ); if ( calcNormScheme_ ) new_amgsa->setCalcSpectralNorm(); @@ -1699,13 +1698,15 @@ HYPRE_ParCSRMatrix lobHYPREA; int Funct_Solve(HYPRE_ParVector b,HYPRE_ParVector x) { int ierr=0; - ierr=HYPRE_ParCSRPCGSolve(lobHYPRESolver,lobHYPREA,b,x);assert2(ierr); + ierr=HYPRE_ParCSRPCGSolve(lobHYPRESolver,lobHYPREA,b,x); + hypre_assert(ierr); return 0; } int Func_Matvec(HYPRE_ParVector x,HYPRE_ParVector y) { int ierr=0; - ierr=HYPRE_ParCSRMatrixMatvec(1.0,lobHYPREA,x,0.0,y);assert2(ierr); + ierr=HYPRE_ParCSRMatrixMatvec(1.0,lobHYPREA,x,0.0,y); + hypre_assert(ierr); return 0; } #ifdef __cplusplus @@ -1714,7 +1715,7 @@ int Func_Matvec(HYPRE_ParVector x,HYPRE_ParVector y) #endif /* ********************************************************************* * - * relax null spaces + * relax null spaces * --------------------------------------------------------------------- */ int MLI_Method_AMGSA::relaxNullSpaces(MLI_Matrix *mli_Amat) @@ -1734,7 +1735,7 @@ int MLI_Method_AMGSA::relaxNullSpaces(MLI_Matrix *mli_Amat) comm = getComm(); MPI_Comm_rank( comm, &mypid ); hypreA = (hypre_ParCSRMatrix *) mli_Amat->getMatrix(); - HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, + HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA, &partitioning ); startRow = partitioning[mypid]; endRow = partitioning[mypid+1] - 1; @@ -1770,7 +1771,7 @@ int MLI_Method_AMGSA::relaxNullSpaces(MLI_Matrix *mli_Amat) HYPRE_ParCSRPCGCreate(comm, &lobHYPRESolver); HYPRE_ParCSRPCGSetMaxIter(lobHYPRESolver, 10); HYPRE_ParCSRPCGSetTol(lobHYPRESolver, 1.0e-1); - HYPRE_ParCSRPCGSetup(lobHYPRESolver, lobHYPREA, + HYPRE_ParCSRPCGSetup(lobHYPRESolver, lobHYPREA, (HYPRE_ParVector) lobVecs[0], (HYPRE_ParVector) lobVecs[1]); HYPRE_ParCSRPCGSetPrecond(lobHYPRESolver, HYPRE_ParCSRDiagScale, HYPRE_ParCSRDiagScaleSetup, HYPrecon); diff --git a/src/FEI_mv/femli/mli_solver_seqsuperlu.cxx b/src/FEI_mv/femli/mli_solver_seqsuperlu.cxx index f149400ca..8767f944f 100644 --- a/src/FEI_mv/femli/mli_solver_seqsuperlu.cxx +++ b/src/FEI_mv/femli/mli_solver_seqsuperlu.cxx @@ -15,7 +15,6 @@ #include #include -#include #include "mli_solver_seqsuperlu.h" #include "HYPRE.h" #include "_hypre_parcsr_mv.h" diff --git a/src/FEI_mv/femli/mli_utils.c b/src/FEI_mv/femli/mli_utils.c index e79c503b5..51adae712 100644 --- a/src/FEI_mv/femli/mli_utils.c +++ b/src/FEI_mv/femli/mli_utils.c @@ -7,15 +7,14 @@ /****************************************************************************** * - * Utilities functions + * Utilities functions * *****************************************************************************/ /*-------------------------------------------------------------------------- - * include files + * include files *--------------------------------------------------------------------------*/ -#include #include #include #include "HYPRE.h" @@ -25,12 +24,12 @@ #include "_hypre_lapack.h" /*-------------------------------------------------------------------------- - * external function + * external function *--------------------------------------------------------------------------*/ #ifdef __cplusplus extern "C" { -#else +#else extern #endif int hypre_BoomerAMGBuildCoarseOperator(hypre_ParCSRMatrix*,hypre_ParCSRMatrix*, @@ -46,7 +45,7 @@ int MLI_Utils_IntTreeUpdate(int treeLeng, int *tree,int *treeInd); #define habs(x) (((x) > 0) ? x : -(x)) /***************************************************************************** - * destructor for hypre_ParCSRMatrix conforming to MLI requirements + * destructor for hypre_ParCSRMatrix conforming to MLI requirements *--------------------------------------------------------------------------*/ int MLI_Utils_HypreParCSRMatrixGetDestroyFunc(MLI_Function *funcPtr) @@ -56,7 +55,7 @@ int MLI_Utils_HypreParCSRMatrixGetDestroyFunc(MLI_Function *funcPtr) } /***************************************************************************** - * destructor for hypre_CSRMatrix conforming to MLI requirements + * destructor for hypre_CSRMatrix conforming to MLI requirements *--------------------------------------------------------------------------*/ int MLI_Utils_HypreCSRMatrixGetDestroyFunc( MLI_Function *funcPtr ) @@ -66,7 +65,7 @@ int MLI_Utils_HypreCSRMatrixGetDestroyFunc( MLI_Function *funcPtr ) } /***************************************************************************** - * destructor for hypre_ParVector conforming to MLI requirements + * destructor for hypre_ParVector conforming to MLI requirements *--------------------------------------------------------------------------*/ int MLI_Utils_HypreParVectorGetDestroyFunc( MLI_Function *funcPtr ) @@ -76,7 +75,7 @@ int MLI_Utils_HypreParVectorGetDestroyFunc( MLI_Function *funcPtr ) } /***************************************************************************** - * destructor for hypre_Vector conforming to MLI requirements + * destructor for hypre_Vector conforming to MLI requirements *--------------------------------------------------------------------------*/ int MLI_Utils_HypreVectorGetDestroyFunc( MLI_Function *funcPtr ) @@ -116,20 +115,20 @@ int MLI_Utils_HypreMatrixFormJacobi(void *A, double alpha, void **J) * initialize new matrix * ----------------------------------------------------------------------*/ - ierr = HYPRE_IJMatrixCreate(comm, startRow, startRow+localNRows-1, + ierr = HYPRE_IJMatrixCreate(comm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJmat); ierr += HYPRE_IJMatrixSetObjectType(IJmat, HYPRE_PARCSR); - assert( !ierr ); + hypre_assert( !ierr ); maxnnz = 0; rowLengths = hypre_CTAlloc(int, localNRows, HYPRE_MEMORY_HOST); - if ( rowLengths == NULL ) + if ( rowLengths == NULL ) { printf("FormJacobi ERROR : memory allocation.\n"); exit(1); } for ( irow = 0; irow < localNRows; irow++ ) { - rownum = startRow + irow; + rownum = startRow + irow; hypre_ParCSRMatrixGetRow(Amat, rownum, &rowSize, &colInd, NULL); rowLengths[irow] = rowSize; if ( rowSize <= 0 ) @@ -144,11 +143,11 @@ int MLI_Utils_HypreMatrixFormJacobi(void *A, double alpha, void **J) maxnnz = ( rowLengths[irow] > maxnnz ) ? rowLengths[irow] : maxnnz; } ierr = HYPRE_IJMatrixSetRowSizes(IJmat, rowLengths); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixInitialize(IJmat); /* ----------------------------------------------------------------------- - * load the new matrix + * load the new matrix * ----------------------------------------------------------------------*/ newColInd = hypre_CTAlloc(int, maxnnz, HYPRE_MEMORY_HOST); @@ -156,7 +155,7 @@ int MLI_Utils_HypreMatrixFormJacobi(void *A, double alpha, void **J) for ( irow = 0; irow < localNRows; irow++ ) { - rownum = startRow + irow; + rownum = startRow + irow; hypre_ParCSRMatrixGetRow(Amat, rownum, &rowSize, &colInd, &colVal); dtemp = 1.0; for ( icol = 0; icol < rowSize; icol++ ) @@ -168,9 +167,9 @@ int MLI_Utils_HypreMatrixFormJacobi(void *A, double alpha, void **J) newColInd[icol] = colInd[icol]; newColVal[icol] = - alpha * colVal[icol] * dtemp; if ( colInd[icol] == rownum ) newColVal[icol] += 1.0; - } + } newRowSize = rowSize; - if ( rowLengths[irow] == rowSize+1 ) + if ( rowLengths[irow] == rowSize+1 ) { newColInd[newRowSize] = rownum; newColVal[newRowSize++] = 1.0; @@ -191,21 +190,21 @@ int MLI_Utils_HypreMatrixFormJacobi(void *A, double alpha, void **J) hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) Jmat); (*J) = (void *) Jmat; - free( newColInd ); - free( newColVal ); - free( rowLengths ); - free( rowPart ); + hypre_TFree(newColInd , HYPRE_MEMORY_HOST); + hypre_TFree(newColVal , HYPRE_MEMORY_HOST); + hypre_TFree(rowLengths , HYPRE_MEMORY_HOST); + hypre_TFree(rowPart , HYPRE_MEMORY_HOST); return 0; } /*************************************************************************** * Given a local degree of freedom, construct an array for that for all *--------------------------------------------------------------------------*/ - + int MLI_Utils_GenPartition(MPI_Comm comm, int nlocal, int **rowPart) { int i, nprocs, mypid, *garray, count=0, count2; - + MPI_Comm_rank(comm, &mypid); MPI_Comm_size(comm, &nprocs); garray = hypre_CTAlloc(int, nprocs+1, HYPRE_MEMORY_HOST); @@ -255,11 +254,11 @@ int MLI_Utils_ScaleVec(hypre_ParCSRMatrix *Amat, hypre_ParVector *vec) * ----------------------------------------------------------------*/ norm2 = hypre_ParVectorInnerProd(vec, vec); hypre_ParVectorScale(1./sqrt(norm2), vec); - + /* ----------------------------------------------------------------- * multiply by matrix, perform inner product, and scale * ----------------------------------------------------------------*/ - + norm1 = hypre_ParVectorInnerProd(vec, vec); hypre_ParCSRMatrixMatvec(1.0, Amat, vec, 0.0, temp); norm2 = hypre_ParVectorInnerProd(vec, temp); @@ -268,7 +267,7 @@ int MLI_Utils_ScaleVec(hypre_ParCSRMatrix *Amat, hypre_ParVector *vec) hypre_ParVectorDestroy(temp); return 0; -} +} /*************************************************************************** * Given a matrix, find its maximum eigenvalue @@ -293,7 +292,7 @@ int MLI_Utils_ComputeSpectralRadius(hypre_ParCSRMatrix *Amat, double *maxEigen) HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix)Amat,&partition); startRow = partition[mypid]; endRow = partition[mypid+1]; - free( partition ); + hypre_TFree(partition, HYPRE_MEMORY_HOST); /* ----------------------------------------------------------------- * create two temporary vectors @@ -311,10 +310,10 @@ int MLI_Utils_ComputeSpectralRadius(hypre_ParCSRMatrix *Amat, double *maxEigen) /* ----------------------------------------------------------------- * perform the power iterations * ----------------------------------------------------------------*/ - + ierr += HYPRE_IJVectorGetObject(IJvec1, (void **) &vec1); ierr += HYPRE_IJVectorGetObject(IJvec2, (void **) &vec2); - assert(!ierr); + hypre_assert(!ierr); HYPRE_ParVectorSetRandomValues( vec1, 2934731 ); HYPRE_ParCSRMatrixMatvec(1.0,(HYPRE_ParCSRMatrix) Amat,vec1,0.0,vec2 ); HYPRE_ParVectorInnerProd( vec2, vec2, &norm2); @@ -331,13 +330,13 @@ int MLI_Utils_ComputeSpectralRadius(hypre_ParCSRMatrix *Amat, double *maxEigen) HYPRE_IJVectorDestroy(IJvec1); HYPRE_IJVectorDestroy(IJvec2); return 0; -} +} /****************************************************************************** * compute Ritz Values that approximates extreme eigenvalues *--------------------------------------------------------------------------*/ -int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, +int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, int scaleFlag) { int i, j, k, its, maxIter, nprocs, mypid, localNRows, globalNRows; @@ -353,12 +352,12 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, double *pData, *apData; /*----------------------------------------------------------------- - * fetch matrix information + * fetch matrix information *-----------------------------------------------------------------*/ comm = hypre_ParCSRMatrixComm(A); - MPI_Comm_rank(comm,&mypid); - MPI_Comm_size(comm,&nprocs); + MPI_Comm_rank(comm,&mypid); + MPI_Comm_size(comm,&nprocs); ADiag = hypre_ParCSRMatrixDiag(A); ADiagA = hypre_CSRMatrixData(ADiag); @@ -418,13 +417,13 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, } /*----------------------------------------------------------------- - * compute initial residual vector norm + * compute initial residual vector norm *-----------------------------------------------------------------*/ hypre_ParVectorSetRandomValues(rVec, 1209837); hypre_ParVectorSetConstantValues(pVec, 0.0); hypre_ParVectorSetConstantValues(zVec, 0.0); - rho = hypre_ParVectorInnerProd(rVec, rVec); + rho = hypre_ParVectorInnerProd(rVec, rVec); rnorm = sqrt(rho); rnormArray[0] = rnorm; if ( rnorm == 0.0 ) @@ -438,15 +437,15 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, } /*----------------------------------------------------------------- - * main loop + * main loop *-----------------------------------------------------------------*/ for ( its = 0; its < maxIter; its++ ) { rhom1 = rho; - rho = hypre_ParVectorInnerProd(rVec, rVec); + rho = hypre_ParVectorInnerProd(rVec, rVec); if (its == 0) beta = 0.0; - else + else { beta = rho / rhom1; Tmat[its-1][its] = -beta; @@ -466,11 +465,11 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, else for ( i = 0; i < localNRows; i++ ) apData[i] = zData[i]; - sigma = hypre_ParVectorInnerProd(pVec, apVec); + sigma = hypre_ParVectorInnerProd(pVec, apVec); alpha = rho / sigma; alphaArray[its] = sigma; hypre_ParVectorAxpy( -alpha, apVec, rVec ); - rnorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); + rnorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); rnormArray[its+1] = rnorm; if ( rnorm < 1.0E-8 * rnormArray[0] ) { @@ -480,7 +479,7 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, } /*----------------------------------------------------------------- - * construct T + * construct T *-----------------------------------------------------------------*/ Tmat[0][0] = alphaArray[0]; @@ -568,11 +567,12 @@ int MLI_Utils_ComputeExtremeRitzValues(hypre_ParCSRMatrix *A, double *ritz, hypre_ParVectorDestroy( pVec ); hypre_ParVectorDestroy( apVec ); } - free(alphaArray); - free(rnormArray); - for (i = 0; i <= maxIter; i++) if ( Tmat[i] != NULL ) free( Tmat[i] ); - free(Tmat); - free(srdiag); + hypre_TFree(alphaArray, HYPRE_MEMORY_HOST); + hypre_TFree(rnormArray, HYPRE_MEMORY_HOST); + for (i = 0; i <= maxIter; i++) + hypre_TFree(Tmat[i], HYPRE_MEMORY_HOST); + hypre_TFree(Tmat, HYPRE_MEMORY_HOST); + hypre_TFree(srdiag, HYPRE_MEMORY_HOST); return 0; } @@ -601,8 +601,8 @@ int MLI_Utils_ComputeMatrixMaxNorm(hypre_ParCSRMatrix *A, double *norm, AOffdI = hypre_CSRMatrixI(AOffd); localNRows = hypre_CSRMatrixNumRows(ADiag); comm = hypre_ParCSRMatrixComm(A); - MPI_Comm_rank(comm,&mypid); - + MPI_Comm_rank(comm,&mypid); + maxVal = 0.0; for (i = 0; i < localNRows; i++) { @@ -629,7 +629,7 @@ int MLI_Utils_ComputeMatrixMaxNorm(hypre_ParCSRMatrix *A, double *norm, /*************************************************************************** * Given a local degree of freedom, construct an array for that for all *--------------------------------------------------------------------------*/ - + double MLI_Utils_WTime() { clock_t ticks; @@ -642,7 +642,7 @@ double MLI_Utils_WTime() /*************************************************************************** * Given a Hypre ParCSR matrix, output the matrix to a file *--------------------------------------------------------------------------*/ - + int MLI_Utils_HypreMatrixPrint(void *in_mat, char *name) { MPI_Comm comm; @@ -656,12 +656,12 @@ int MLI_Utils_HypreMatrixPrint(void *in_mat, char *name) mat = (hypre_ParCSRMatrix *) in_mat; hypre_mat = (HYPRE_ParCSRMatrix) mat; - comm = hypre_ParCSRMatrixComm(mat); + comm = hypre_ParCSRMatrixComm(mat); MPI_Comm_rank( comm, &mypid ); HYPRE_ParCSRMatrixGetRowPartitioning( hypre_mat, &rowPart); localNRows = rowPart[mypid+1] - rowPart[mypid]; startRow = rowPart[mypid]; - free( rowPart ); + hypre_TFree(rowPart, HYPRE_MEMORY_HOST); sprintf(fname, "%s.%d", name, mypid); fp = fopen( fname, "w"); @@ -685,10 +685,10 @@ int MLI_Utils_HypreMatrixPrint(void *in_mat, char *name) } /*************************************************************************** - * Given 2 Hypre ParCSR matrix A and P, create trans(P) * A * P + * Given 2 Hypre ParCSR matrix A and P, create trans(P) * A * P *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreMatrixComputeRAP(void *Pmat, void *Amat, void **RAPmat) + +int MLI_Utils_HypreMatrixComputeRAP(void *Pmat, void *Amat, void **RAPmat) { hypre_ParCSRMatrix *hypreP, *hypreA, *hypreRAP; hypreP = (hypre_ParCSRMatrix *) Pmat; @@ -699,9 +699,9 @@ int MLI_Utils_HypreMatrixComputeRAP(void *Pmat, void *Amat, void **RAPmat) } /*************************************************************************** - * Get matrix information of a Hypre ParCSR matrix + * Get matrix information of a Hypre ParCSR matrix *--------------------------------------------------------------------------*/ - + int MLI_Utils_HypreMatrixGetInfo(void *Amat, int *matInfo, double *valInfo) { int mypid, nprocs, icol, isum[4], ibuf[4], *partition, thisNnz; @@ -719,7 +719,7 @@ int MLI_Utils_HypreMatrixGetInfo(void *Amat, int *matInfo, double *valInfo) localNRows = partition[mypid+1] - partition[mypid]; startrow = partition[mypid]; globalNRows = partition[nprocs]; - free( partition ); + hypre_TFree(partition, HYPRE_MEMORY_HOST); maxVal = -1.0E-30; minVal = +1.0E30; maxNnz = 0; @@ -766,8 +766,8 @@ int MLI_Utils_HypreMatrixGetInfo(void *Amat, int *matInfo, double *valInfo) /*************************************************************************** * Given a Hypre ParCSR matrix, compress it *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) + +int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) { int mypid, *partition, startRow, localNRows; int newLNRows, newStartRow, blksize2; @@ -781,7 +781,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) /* ---------------------------------------------------------------- * fetch information about incoming matrix * ----------------------------------------------------------------*/ - + hypreA = (hypre_ParCSRMatrix *) Amat; mpiComm = hypre_ParCSRMatrixComm(hypreA); MPI_Comm_rank(mpiComm, &mypid); @@ -789,7 +789,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,&partition); startRow = partition[mypid]; localNRows = partition[mypid+1] - startRow; - free( partition ); + hypre_TFree(partition, HYPRE_MEMORY_HOST); if ( blksize < 0 ) blksize2 = - blksize; else blksize2 = blksize; if ( localNRows % blksize2 != 0 ) @@ -805,11 +805,11 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) newLNRows = localNRows / blksize2; newStartRow = startRow / blksize2; - ierr = HYPRE_IJMatrixCreate(mpiComm, newStartRow, + ierr = HYPRE_IJMatrixCreate(mpiComm, newStartRow, newStartRow+newLNRows-1, newStartRow, newStartRow+newLNRows-1, &IJAmat2); ierr += HYPRE_IJMatrixSetObjectType(IJAmat2, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /* ---------------------------------------------------------------- * compute the row lengths of the new matrix @@ -831,7 +831,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) } ierr = HYPRE_IJMatrixSetRowSizes(IJAmat2, rowLengths); ierr += HYPRE_IJMatrixInitialize(IJAmat2); - assert(!ierr); + hypre_assert(!ierr); /* ---------------------------------------------------------------- * load the compressed matrix @@ -864,7 +864,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) newVal[k] = newVal[k] * newVal[k]; for ( j = 1; j < newSize; j++ ) { - if (newInd[j] == newInd[k]) + if (newInd[j] == newInd[k]) newVal[k] += (newVal[j] * newVal[j]); else { @@ -882,7 +882,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) newVal2[k] = newVal[k]; for ( j = 1; j < newSize; j++ ) { - if (newInd[j] == newInd[k]) + if (newInd[j] == newInd[k]) { newVal2[k] += newVal[j]; if ( habs(newVal[j]) > habs(newVal[k]) ) @@ -898,7 +898,7 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) newSize = k + 1; for ( j = 0; j < newSize; j++ ) { - if ( newInd[j] == newStartRow+irow ) + if ( newInd[j] == newStartRow+irow ) newVal[j] = (newVal[j])/((double) blksize2); else newVal[j] = (newVal[j])/((double) blksize2); @@ -914,17 +914,17 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) rowNum = newStartRow + irow; HYPRE_IJMatrixSetValues(IJAmat2, 1, &newSize,(const int *) &rowNum, (const int *) newInd, (const double *) newVal); - free( newInd ); - free( newVal ); - free( newVal2 ); + hypre_TFree(newInd, HYPRE_MEMORY_HOST); + hypre_TFree(newVal, HYPRE_MEMORY_HOST); + hypre_TFree(newVal2, HYPRE_MEMORY_HOST); } ierr = HYPRE_IJMatrixAssemble(IJAmat2); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJAmat2, (void **) &hypreA2); /*hypre_MatvecCommPkgCreate((hypre_ParCSRMatrix *) hypreA2);*/ HYPRE_IJMatrixSetObjectType( IJAmat2, -1 ); HYPRE_IJMatrixDestroy( IJAmat2 ); - if ( rowLengths != NULL ) free( rowLengths ); + hypre_TFree(rowLengths, HYPRE_MEMORY_HOST); (*Amat2) = (void *) hypreA2; return 0; } @@ -932,11 +932,11 @@ int MLI_Utils_HypreMatrixCompress(void *Amat, int blksize, void **Amat2) /*************************************************************************** * Given a Hypre ParCSR matrix, compress it *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, - void **Smat2, void *Amat) + +int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, + void **Smat2, void *Amat) { - int mypid, *partition, startRow, localNRows, newLNRows; + int mypid, *partition, startRow, localNRows, newLNRows; int newStartRow, maxRowLeng, index, ierr, irow, sRowNum; int *rowLengths=NULL, rowNum, rowSize, *colInd, *sInd=NULL; int *newInd=NULL, newSize, j, k, nprocs, searchInd; @@ -949,7 +949,7 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, /* ---------------------------------------------------------------- * fetch information about incoming matrix * ----------------------------------------------------------------*/ - + hypreS = (hypre_ParCSRMatrix *) Smat; hypreA = (hypre_ParCSRMatrix *) Amat; mpiComm = hypre_ParCSRMatrixComm(hypreA); @@ -958,7 +958,7 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) hypreA,&partition); startRow = partition[mypid]; localNRows = partition[mypid+1] - startRow; - free( partition ); + hypre_TFree(partition, HYPRE_MEMORY_HOST); if ( localNRows % blkSize != 0 ) { printf("MLI_DecompressMatrix ERROR : nrows not divisible by blksize.\n"); @@ -972,11 +972,11 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, newLNRows = localNRows / blkSize; newStartRow = startRow / blkSize; - ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, + ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJSmat2); ierr += HYPRE_IJMatrixSetObjectType(IJSmat2, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); /* ---------------------------------------------------------------- * compute the row lengths of the new matrix @@ -995,8 +995,8 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, } ierr = HYPRE_IJMatrixSetRowSizes(IJSmat2, rowLengths); ierr += HYPRE_IJMatrixInitialize(IJSmat2); - assert(!ierr); - if ( rowLengths != NULL ) free( rowLengths ); + hypre_assert(!ierr); + hypre_TFree(rowLengths, HYPRE_MEMORY_HOST); /* ---------------------------------------------------------------- * load the decompressed matrix @@ -1024,10 +1024,10 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, { index = colInd[k] / blkSize; searchInd = MLI_Utils_BinarySearch(index, sInd, sRowSize); - if ( searchInd >= 0 && colInd[k] == index*blkSize+j ) + if ( searchInd >= 0 && colInd[k] == index*blkSize+j ) newInd[k] = colInd[k]; else newInd[k] = -1; - } + } newSize = 0; for ( k = 0; k < rowSize; k++ ) if ( newInd[k] >= 0 ) newInd[newSize++] = newInd[k]; @@ -1036,11 +1036,11 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, (const int *) newInd, (const double *) newVal); } } - if ( newInd != NULL ) free( newInd ); - if ( newVal != NULL ) free( newVal ); - if ( sInd != NULL ) free( sInd ); + hypre_TFree(newInd, HYPRE_MEMORY_HOST); + hypre_TFree(newVal, HYPRE_MEMORY_HOST); + hypre_TFree(sInd, HYPRE_MEMORY_HOST); ierr = HYPRE_IJMatrixAssemble(IJSmat2); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJSmat2, (void **) &hypreS2); HYPRE_IJMatrixSetObjectType( IJSmat2, -1 ); HYPRE_IJMatrixDestroy( IJSmat2 ); @@ -1051,7 +1051,7 @@ int MLI_Utils_HypreBoolMatrixDecompress(void *Smat, int blkSize, /*************************************************************************** * perform QR factorization *--------------------------------------------------------------------------*/ - + int MLI_Utils_QR(double *qArray, double *rArray, int nrows, int ncols) { int icol, irow, pcol, retFlag=0; @@ -1075,26 +1075,26 @@ int MLI_Utils_QR(double *qArray, double *rArray, int nrows, int ncols) prevQ = &qArray[pcol*nrows]; alpha = 0.0; for ( irow = 0; irow < nrows; irow++ ) - alpha += (currQ[irow] * prevQ[irow]); + alpha += (currQ[irow] * prevQ[irow]); currR[pcol] = alpha; for ( irow = 0; irow < nrows; irow++ ) - currQ[irow] -= ( alpha * prevQ[irow] ); + currQ[irow] -= ( alpha * prevQ[irow] ); } for ( pcol = icol; pcol < ncols; pcol++ ) currR[pcol] = 0.0; innerProd = 0.0; for ( irow = 0; irow < nrows; irow++ ) - innerProd += (currQ[irow] * currQ[irow]); + innerProd += (currQ[irow] * currQ[irow]); innerProd = sqrt( innerProd ); - if ( innerProd < 1.0e-18 ) + if ( innerProd < 1.0e-18 ) { return icol + 1; - } + } else { currR[icol] = innerProd; alpha = 1.0 / innerProd; for ( irow = 0; irow < nrows; irow++ ) - currQ[irow] = alpha * currQ[irow]; + currQ[irow] = alpha * currQ[irow]; } } #ifdef MLI_DEBUG_DETAILED @@ -1119,24 +1119,24 @@ int MLI_Utils_QR(double *qArray, double *rArray, int nrows, int ncols) /*************************************************************************** * perform SVD factorization * - * Inputs: + * Inputs: * uArray = input matrix (array of length m*n) * m = number of rows of input matrix * n = number of cols of input matrix * - * Outputs: + * Outputs: * uArray = min(m,n) by m; left singular vectors * sArray = min(m,n) singular values (decreasing order) - * vtArray = min(m,n) rows of transpose of + * vtArray = min(m,n) rows of transpose of * - * Work space: + * Work space: * workArray = array of length workLen * workLen = suggest 5*(m+n) *--------------------------------------------------------------------------*/ #include "fortran.h" - -int MLI_Utils_SVD(double *uArray, double *sArray, double *vtArray, + +int MLI_Utils_SVD(double *uArray, double *sArray, double *vtArray, double *workArray, int m, int n, int workLen) { #ifndef MIN @@ -1154,7 +1154,7 @@ int MLI_Utils_SVD(double *uArray, double *sArray, double *vtArray, int info; hypre_dgesvd(&jobu, &jobvt, &m, &n, uArray, - &m, sArray, (double *) NULL, &m, vtArray, &dim, workArray, + &m, sArray, (double *) NULL, &m, vtArray, &dim, workArray, &workLen, &info); #endif @@ -1181,8 +1181,8 @@ int MLI_Utils_singular_vectors(int n, double *uArray) hypre_dgesvd(&jobu, &jobvt, &n, &n, uArray, &n, sArray, NULL, &n, NULL, &n, workArray, &workLen, &info); - free(workArray); - free(sArray); + hypre_TFree(workArray, HYPRE_MEMORY_HOST); + hypre_TFree(sArray, HYPRE_MEMORY_HOST); #endif return info; @@ -1197,7 +1197,7 @@ int MLI_Utils_singular_vectors(int n, double *uArray) * le_vectors = pointer to storage space where vectors will be returned *--------------------------------------------------------------------------*/ -int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, +int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, int maxIter, int num_vecs_to_return, double *le_vectors) { int i, j, k, its, nprocs, mypid, localNRows, globalNRows; @@ -1211,12 +1211,12 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, double rVecNorm; /*----------------------------------------------------------------- - * fetch matrix information + * fetch matrix information *-----------------------------------------------------------------*/ comm = hypre_ParCSRMatrixComm(A); - MPI_Comm_rank(comm,&mypid); - MPI_Comm_size(comm,&nprocs); + MPI_Comm_rank(comm,&mypid); + MPI_Comm_size(comm,&nprocs); HYPRE_ParCSRMatrixGetRowPartitioning((HYPRE_ParCSRMatrix) A, &partition); startRow = partition[mypid]; @@ -1265,13 +1265,13 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, } /*----------------------------------------------------------------- - * compute initial residual vector norm + * compute initial residual vector norm *-----------------------------------------------------------------*/ hypre_ParVectorSetRandomValues(rVec, 1209837); hypre_ParVectorSetConstantValues(pVec, 0.0); hypre_ParVectorSetConstantValues(zVec, 0.0); - rho = hypre_ParVectorInnerProd(rVec, rVec); + rho = hypre_ParVectorInnerProd(rVec, rVec); rnorm = sqrt(rho); rnormArray[0] = rnorm; if ( rnorm == 0.0 ) @@ -1290,23 +1290,23 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, lanczos_p = lanczos; /*----------------------------------------------------------------- - * main loop + * main loop *-----------------------------------------------------------------*/ for ( its = 0; its < maxIter; its++ ) { - for ( i = 0; i < localNRows; i++ ) + for ( i = 0; i < localNRows; i++ ) zData[i] = rData[i]; /* scale copy lanczos vector r for use later */ - rVecNorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); - for ( i = 0; i < localNRows; i++ ) + rVecNorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); + for ( i = 0; i < localNRows; i++ ) *lanczos_p++ = rData[i] / rVecNorm; rhom1 = rho; - rho = hypre_ParVectorInnerProd(rVec, zVec); + rho = hypre_ParVectorInnerProd(rVec, zVec); if (its == 0) beta = 0.0; - else + else { beta = rho / rhom1; Tmat[its-1][its] = -beta; @@ -1314,11 +1314,11 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, HYPRE_ParVectorScale( beta, (HYPRE_ParVector) pVec ); hypre_ParVectorAxpy( one, zVec, pVec ); hypre_ParCSRMatrixMatvec(one, A, pVec, 0.0, apVec); - sigma = hypre_ParVectorInnerProd(pVec, apVec); + sigma = hypre_ParVectorInnerProd(pVec, apVec); alpha = rho / sigma; alphaArray[its] = sigma; hypre_ParVectorAxpy( -alpha, apVec, rVec ); - rnorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); + rnorm = sqrt(hypre_ParVectorInnerProd(rVec, rVec)); rnormArray[its+1] = rnorm; if ( rnorm < 1.0E-8 * rnormArray[0] ) { @@ -1331,7 +1331,7 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, } /*----------------------------------------------------------------- - * construct T + * construct T *-----------------------------------------------------------------*/ Tmat[0][0] = alphaArray[0]; @@ -1394,8 +1394,8 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, } } - free(Umat); - free(lanczos); + hypre_TFree(Umat, HYPRE_MEMORY_HOST); + hypre_TFree(lanczos, HYPRE_MEMORY_HOST); /* ----------------------------------------------------------------* * de-allocate storage for temporary vectors @@ -1408,18 +1408,19 @@ int MLI_Utils_ComputeLowEnergyLanczos(hypre_ParCSRMatrix *A, hypre_ParVectorDestroy( pVec ); hypre_ParVectorDestroy( apVec ); } - free(alphaArray); - free(rnormArray); - for (i = 0; i <= maxIter; i++) if ( Tmat[i] != NULL ) free( Tmat[i] ); - free(Tmat); + hypre_TFree(alphaArray, HYPRE_MEMORY_HOST); + hypre_TFree(rnormArray, HYPRE_MEMORY_HOST); + for (i = 0; i <= maxIter; i++) + hypre_TFree(Tmat[i], HYPRE_MEMORY_HOST); + hypre_TFree(Tmat, HYPRE_MEMORY_HOST); return 0; } /*************************************************************************** * read a matrix file and create a hypre_ParCSRMatrix from it *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, int blksize, void **Amat, int scaleFlag, double **scaleVec) { int mypid, nprocs, currProc, globalNRows, localNRows, startRow; @@ -1470,8 +1471,8 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, fscanf( fp, "%lg", &colVal ); fscanf( fp, "%d", &colNum ); if ( scaleFlag && colNum == irow ) diag[irow] = colVal; - } - } + } + } currBufSize = localNRows * 27; matIA = hypre_TAlloc(int, (localNRows+1) , HYPRE_MEMORY_HOST); @@ -1500,11 +1501,11 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, matJA[j] = tempJA[j]; matAA[j] = tempAA[j]; } - free( tempJA ); - free( tempAA ); + hypre_TFree(tempJA , HYPRE_MEMORY_HOST); + hypre_TFree(tempAA , HYPRE_MEMORY_HOST); } fscanf( fp, "%d", &colNum ); - } + } matIA[irow-startRow+1] = nnz; } for ( irow = startRow+localNRows; irow < globalNRows; irow++ ) @@ -1515,14 +1516,14 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, fscanf( fp, "%lg", &colVal ); fscanf( fp, "%d", &colNum ); if ( scaleFlag && colNum == irow ) diag[irow] = colVal; - } - } + } + } fclose( fp ); } MPI_Barrier( mpiComm ); currProc++; } - printf("%5d : MLI_Utils_HypreMatrixReadTuminFormat : nlocal, nnz = %d %d\n", + printf("%5d : MLI_Utils_HypreMatrixReadTuminFormat : nlocal, nnz = %d %d\n", mypid, localNRows, nnz); rowLengths = hypre_TAlloc(int, localNRows , HYPRE_MEMORY_HOST); for ( irow = 0; irow < localNRows; irow++ ) @@ -1531,17 +1532,17 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJmat); ierr = HYPRE_IJMatrixSetObjectType(IJmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJmat); - assert(!ierr); + hypre_assert(!ierr); for ( irow = 0; irow < localNRows; irow++ ) { length = rowLengths[irow]; rowNum = irow + startRow; inds = &(matJA[matIA[irow]]); vals = &(matAA[matIA[irow]]); - if ( scaleFlag ) + if ( scaleFlag ) { scale = 1.0 / sqrt( diag[irow] ); for ( j = 0; j < length; j++ ) @@ -1549,15 +1550,15 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, } ierr = HYPRE_IJMatrixSetValues(IJmat, 1, &length,(const int *) &rowNum, (const int *) inds, (const double *) vals); - assert( !ierr ); + hypre_assert( !ierr ); } - free( rowLengths ); - free( matIA ); - free( matJA ); - free( matAA ); + hypre_TFree(rowLengths , HYPRE_MEMORY_HOST); + hypre_TFree(matIA , HYPRE_MEMORY_HOST); + hypre_TFree(matJA , HYPRE_MEMORY_HOST); + hypre_TFree(matAA , HYPRE_MEMORY_HOST); ierr = HYPRE_IJMatrixAssemble(IJmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJmat, (void**) &hypreA); HYPRE_IJMatrixSetObjectType(IJmat, -1); HYPRE_IJMatrixDestroy(IJmat); @@ -1567,7 +1568,7 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, diag2 = hypre_TAlloc(double, localNRows, HYPRE_MEMORY_HOST); for ( irow = 0; irow < localNRows; irow++ ) diag2[irow] = diag[startRow+irow]; - free( diag ); + hypre_TFree(diag, HYPRE_MEMORY_HOST); } (*scaleVec) = diag2; return ierr; @@ -1576,8 +1577,8 @@ int MLI_Utils_HypreMatrixReadTuminFormat(char *filename, MPI_Comm mpiComm, /*************************************************************************** * read a matrix file and create a hypre_ParCSRMatrix from it *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, int blksize, void **Amat, int scaleFlag, double **scaleVec) { int mypid, nprocs, currProc, globalNRows, localNRows, startRow; @@ -1628,13 +1629,13 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, matJA = hypre_TAlloc(int, currBufSize , HYPRE_MEMORY_HOST); matAA = hypre_TAlloc(double, currBufSize , HYPRE_MEMORY_HOST); - if (scaleFlag == 1) + if (scaleFlag == 1) diag = hypre_TAlloc(double, globalNRows, HYPRE_MEMORY_HOST); for ( irow = 0; irow < globalNnz; irow++ ) { fscanf( fp, "%d %d %lg", &rowNum, &colNum, &colVal ); rowNum--; - if ( scaleFlag == 1 && rowNum == colNum-1 ) + if ( scaleFlag == 1 && rowNum == colNum-1 ) diag[rowNum] = colVal; if ( rowNum >= startRow ) break; } @@ -1648,7 +1649,7 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, { fscanf( fp, "%d %d %lg", &rowNum, &colNum, &colVal ); rowNum--; - if ( scaleFlag == 1 && rowNum == colNum-1 ) + if ( scaleFlag == 1 && rowNum == colNum-1 ) diag[rowNum] = colVal; if ( rowNum >= startRow+localNRows ) break; if ( rowNum != currRow ) @@ -1658,7 +1659,7 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, } matJA[nnz] = colNum - 1; matAA[nnz++] = colVal; - } + } if ( j == globalNnz ) matIA[rowNum+1-startRow] = nnz; else matIA[rowNum-startRow] = nnz; @@ -1666,16 +1667,16 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, { fscanf( fp, "%d %d %lg", &rowNum, &colNum, &colVal ); rowNum--; - if ( scaleFlag == 1 && rowNum == colNum-1 ) + if ( scaleFlag == 1 && rowNum == colNum-1 ) diag[rowNum] = colVal; - } + } fclose( fp ); printf("Processor %d finished reading matrix file.\n", mypid); } MPI_Barrier( mpiComm ); currProc++; } - printf("%5d : MLI_Utils_HypreMatrixRead : nlocal, nnz = %d %d\n", + printf("%5d : MLI_Utils_HypreMatrixRead : nlocal, nnz = %d %d\n", mypid, localNRows, nnz); rowLengths = hypre_TAlloc(int, localNRows , HYPRE_MEMORY_HOST); for ( irow = 0; irow < localNRows; irow++ ) @@ -1684,17 +1685,17 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJmat); ierr = HYPRE_IJMatrixSetObjectType(IJmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJmat); - assert(!ierr); + hypre_assert(!ierr); for ( irow = 0; irow < localNRows; irow++ ) { length = rowLengths[irow]; rowNum = irow + startRow; inds = &(matJA[matIA[irow]]); vals = &(matAA[matIA[irow]]); - if ( scaleFlag == 1 ) + if ( scaleFlag == 1 ) { scale = 1.0 / sqrt( diag[rowNum] ); for ( j = 0; j < length; j++ ) @@ -1709,15 +1710,15 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, } ierr = HYPRE_IJMatrixSetValues(IJmat, 1, &length,(const int *) &rowNum, (const int *) inds, (const double *) vals); - assert( !ierr ); + hypre_assert( !ierr ); } - free( rowLengths ); - free( matIA ); - free( matJA ); - free( matAA ); + hypre_TFree(rowLengths , HYPRE_MEMORY_HOST); + hypre_TFree(matIA , HYPRE_MEMORY_HOST); + hypre_TFree(matJA , HYPRE_MEMORY_HOST); + hypre_TFree(matAA , HYPRE_MEMORY_HOST); ierr = HYPRE_IJMatrixAssemble(IJmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJmat, (void**) &hypreA); HYPRE_IJMatrixSetObjectType(IJmat, -1); HYPRE_IJMatrixDestroy(IJmat); @@ -1727,7 +1728,7 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, diag2 = hypre_TAlloc(double, localNRows, HYPRE_MEMORY_HOST); for ( irow = 0; irow < localNRows; irow++ ) diag2[irow] = diag[startRow+irow]; - free( diag ); + hypre_TFree(diag, HYPRE_MEMORY_HOST); } (*scaleVec) = diag2; #if 0 @@ -1735,7 +1736,7 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, fp = fopen(fname, "w"); for ( irow = 0; irow < localNRows; irow++ ) { - rowNum = startRow + irow; + rowNum = startRow + irow; hypre_ParCSRMatrixGetRow(hypreA, rowNum, &length, &inds, &vals); for ( colNum = 0; colNum < length; colNum++ ) fprintf(fp, "%d %d %e\n", rowNum, inds[colNum], vals[colNum]); @@ -1750,8 +1751,8 @@ int MLI_Utils_HypreMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, /*************************************************************************** * read matrix files and create a hypre_ParCSRMatrix from them *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, void **Amat, int scaleFlag, double **scaleVec) { int mypid, nprocs, globalNRows, localNRows, localNnz, startRow; @@ -1775,7 +1776,7 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, exit(1); } fscanf( fp, "%d %d", &localNRows, &localNnz ); - printf("%5d : MLI_Utils_HypreParMatrixRead : nlocal, nnz = %d %d\n", + printf("%5d : MLI_Utils_HypreParMatrixRead : nlocal, nnz = %d %d\n", mypid, localNRows, localNnz); fflush(stdout); if ( localNRows < 0 || localNnz > 1000000000 ) @@ -1792,12 +1793,12 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, if ( j == mypid ) startRow = globalNRows; globalNRows += rowsArray[j]; } - free( rowsArray ); + hypre_TFree(rowsArray, HYPRE_MEMORY_HOST); matIA = hypre_TAlloc(int, (localNRows+1) , HYPRE_MEMORY_HOST); matJA = hypre_TAlloc(int, localNnz , HYPRE_MEMORY_HOST); matAA = hypre_TAlloc(double, localNnz , HYPRE_MEMORY_HOST); - if (scaleFlag == 1) + if (scaleFlag == 1) { diag = hypre_TAlloc(double, globalNRows, HYPRE_MEMORY_HOST); diag2 = hypre_TAlloc(double, globalNRows, HYPRE_MEMORY_HOST); @@ -1818,7 +1819,7 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, } matJA[index] = colNum - 1; matAA[index++] = colVal; - } + } matIA[localNRows] = index; fclose(fp); @@ -1835,17 +1836,17 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJmat); ierr = HYPRE_IJMatrixSetObjectType(IJmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJmat); - assert(!ierr); + hypre_assert(!ierr); for ( irow = 0; irow < localNRows; irow++ ) { length = rowLengths[irow]; rowNum = irow + startRow; inds = &(matJA[matIA[irow]]); vals = &(matAA[matIA[irow]]); - if ( scaleFlag == 1 ) + if ( scaleFlag == 1 ) { scale = 1.0 / sqrt( diag2[rowNum] ); for ( j = 0; j < length; j++ ) @@ -1860,26 +1861,26 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, } ierr = HYPRE_IJMatrixSetValues(IJmat, 1, &length,(const int *) &rowNum, (const int *) inds, (const double *) vals); - assert( !ierr ); + hypre_assert( !ierr ); } - free( rowLengths ); - free( matIA ); - free( matJA ); - free( matAA ); + hypre_TFree(rowLengths, HYPRE_MEMORY_HOST); + hypre_TFree(matIA, HYPRE_MEMORY_HOST); + hypre_TFree(matJA, HYPRE_MEMORY_HOST); + hypre_TFree(matAA, HYPRE_MEMORY_HOST); ierr = HYPRE_IJMatrixAssemble(IJmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJmat, (void**) &hypreA); HYPRE_IJMatrixSetObjectType(IJmat, -1); HYPRE_IJMatrixDestroy(IJmat); (*Amat) = (void *) hypreA; if ( scaleFlag == 1 ) { - free( diag ); + hypre_TFree(diag, HYPRE_MEMORY_HOST); diag = hypre_TAlloc(double, localNRows, HYPRE_MEMORY_HOST); for ( irow = 0; irow < localNRows; irow++ ) diag[irow] = diag2[startRow+irow]; - free( diag2 ); + hypre_TFree(diag2, HYPRE_MEMORY_HOST); } (*scaleVec) = diag; @@ -1889,11 +1890,11 @@ int MLI_Utils_HypreParMatrixReadIJAFormat(char *filename, MPI_Comm mpiComm, /*************************************************************************** * read a matrix file in HB format (sequential) *--------------------------------------------------------------------------*/ - -int MLI_Utils_HypreMatrixReadHBFormat(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_HypreMatrixReadHBFormat(char *filename, MPI_Comm mpiComm, void **Amat) { - int *matIA, *matJA, *rowLengths, length, rowNum, startRow,*inds; + int *matIA, *matJA, *rowLengths, length, rowNum, startRow,*inds; int irow, lineLeng=200, localNRows, localNCols, localNnz, ierr; int rhsl; double *matAA, *vals; @@ -1939,10 +1940,10 @@ int MLI_Utils_HypreMatrixReadHBFormat(char *filename, MPI_Comm mpiComm, ierr = HYPRE_IJMatrixCreate(mpiComm, startRow, startRow+localNRows-1, startRow, startRow+localNRows-1, &IJmat); ierr = HYPRE_IJMatrixSetObjectType(IJmat, HYPRE_PARCSR); - assert(!ierr); + hypre_assert(!ierr); ierr = HYPRE_IJMatrixSetRowSizes(IJmat, rowLengths); ierr = HYPRE_IJMatrixInitialize(IJmat); - assert(!ierr); + hypre_assert(!ierr); for (irow = 0; irow < localNRows; irow++) { length = rowLengths[irow]; @@ -1951,15 +1952,15 @@ int MLI_Utils_HypreMatrixReadHBFormat(char *filename, MPI_Comm mpiComm, vals = &(matAA[matIA[irow]]); ierr = HYPRE_IJMatrixSetValues(IJmat, 1, &length,(const int *) &rowNum, (const int *) inds, (const double *) vals); - assert( !ierr ); + hypre_assert( !ierr ); } - free(rowLengths); - free(matIA); - free(matJA); - free(matAA); + hypre_TFree(rowLengths, HYPRE_MEMORY_HOST); + hypre_TFree(matIA, HYPRE_MEMORY_HOST); + hypre_TFree(matJA, HYPRE_MEMORY_HOST); + hypre_TFree(matAA, HYPRE_MEMORY_HOST); ierr = HYPRE_IJMatrixAssemble(IJmat); - assert( !ierr ); + hypre_assert( !ierr ); HYPRE_IJMatrixGetObject(IJmat, (void**) &hypreA); HYPRE_IJMatrixSetObjectType(IJmat, -1); HYPRE_IJMatrixDestroy(IJmat); @@ -1968,10 +1969,10 @@ int MLI_Utils_HypreMatrixReadHBFormat(char *filename, MPI_Comm mpiComm, } /*************************************************************************** - * read a vector from a file + * read a vector from a file *--------------------------------------------------------------------------*/ - -int MLI_Utils_DoubleVectorRead(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_DoubleVectorRead(char *filename, MPI_Comm mpiComm, int length, int start, double *vec) { int mypid, nprocs, currProc, globalNRows; @@ -2016,7 +2017,7 @@ int MLI_Utils_DoubleVectorRead(char *filename, MPI_Comm mpiComm, fscanf( fp, "%d", &k ); fscanf( fp, "%lg", &value ); if ( numparams == 3 ) fscanf( fp, "%d", &k2 ); - } + } for ( irow = start; irow < start+length; irow++ ) { fscanf( fp, "%d", &k ); @@ -2032,16 +2033,16 @@ int MLI_Utils_DoubleVectorRead(char *filename, MPI_Comm mpiComm, MPI_Barrier( mpiComm ); currProc++; } - printf("%5d : MLI_Utils_DoubleVectorRead : nlocal, start = %d %d\n", + printf("%5d : MLI_Utils_DoubleVectorRead : nlocal, start = %d %d\n", mypid, length, start); return 0; } /*************************************************************************** - * read a vector from a file + * read a vector from a file *--------------------------------------------------------------------------*/ - -int MLI_Utils_DoubleParVectorRead(char *filename, MPI_Comm mpiComm, + +int MLI_Utils_DoubleParVectorRead(char *filename, MPI_Comm mpiComm, int length, int start, double *vec) { int mypid, nprocs, localNRows; @@ -2056,7 +2057,7 @@ int MLI_Utils_DoubleParVectorRead(char *filename, MPI_Comm mpiComm, fp = fopen( fname, "r" ); if ( fp == NULL ) { - printf("MLI_Utils_DoubleParVectorRead ERROR : file %s not found.\n", + printf("MLI_Utils_DoubleParVectorRead ERROR : file %s not found.\n", fname); return -1; } @@ -2079,7 +2080,7 @@ int MLI_Utils_DoubleParVectorRead(char *filename, MPI_Comm mpiComm, /*************************************************************************** * conform to the preconditioner set up from HYPRE *--------------------------------------------------------------------------*/ - + int MLI_Utils_ParCSRMLISetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { @@ -2096,7 +2097,7 @@ int MLI_Utils_ParCSRMLISetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /*************************************************************************** * conform to the preconditioner apply from HYPRE *--------------------------------------------------------------------------*/ - + int MLI_Utils_ParCSRMLISolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { @@ -2117,7 +2118,7 @@ int MLI_Utils_ParCSRMLISolve( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /*************************************************************************** * constructor for m-Jacobi preconditioner *--------------------------------------------------------------------------*/ - + int MLI_Utils_mJacobiCreate(MPI_Comm comm, HYPRE_Solver *solver) { HYPRE_MLI_mJacobi *jacobiPtr; @@ -2138,13 +2139,13 @@ int MLI_Utils_mJacobiCreate(MPI_Comm comm, HYPRE_Solver *solver) /*************************************************************************** * destructor for m-Jacobi preconditioner *--------------------------------------------------------------------------*/ - + int MLI_Utils_mJacobiDestroy(HYPRE_Solver solver) { HYPRE_MLI_mJacobi *jacobiPtr = (HYPRE_MLI_mJacobi *) solver; if (jacobiPtr == NULL) return 1; - if (jacobiPtr->diagonal_ != NULL) free(jacobiPtr->diagonal_); - if (jacobiPtr->hypreRes_ != NULL) + hypre_TFree(jacobiPtr->diagonal_, HYPRE_MEMORY_HOST); + if (jacobiPtr->hypreRes_ != NULL) HYPRE_ParVectorDestroy(jacobiPtr->hypreRes_); jacobiPtr->diagonal_ = NULL; jacobiPtr->hypreRes_ = NULL; @@ -2154,7 +2155,7 @@ int MLI_Utils_mJacobiDestroy(HYPRE_Solver solver) /*************************************************************************** * set polynomial degree *--------------------------------------------------------------------------*/ - + int MLI_Utils_mJacobiSetParams(HYPRE_Solver solver, int degree) { HYPRE_MLI_mJacobi *jacobiPtr = (HYPRE_MLI_mJacobi *) solver; @@ -2166,7 +2167,7 @@ int MLI_Utils_mJacobiSetParams(HYPRE_Solver solver, int degree) /*************************************************************************** * conform to the preconditioner set up from HYPRE *--------------------------------------------------------------------------*/ - + int MLI_Utils_mJacobiSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x) { @@ -2178,7 +2179,7 @@ int MLI_Utils_mJacobiSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, jacobiPtr = (HYPRE_MLI_mJacobi *) solver; if (jacobiPtr == NULL) return 1; - if (jacobiPtr->diagonal_ != NULL) free(jacobiPtr->diagonal_); + hypre_TFree(jacobiPtr->diagonal_, HYPRE_MEMORY_HOST); hypreX = (hypre_ParVector *) x; nrows = hypre_VectorSize(hypre_ParVectorLocalVector(hypreX)); jacobiPtr->diagonal_ = hypre_TAlloc(double, nrows , HYPRE_MEMORY_HOST); @@ -2211,14 +2212,14 @@ int MLI_Utils_mJacobiSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, } jacobiPtr->diagonal_[i] = 1.0 / jacobiPtr->diagonal_[i]; } - if (jacobiPtr->hypreRes_ != NULL) + if (jacobiPtr->hypreRes_ != NULL) HYPRE_ParVectorDestroy(jacobiPtr->hypreRes_); gnrows = hypre_ParVectorGlobalSize(hypreX); partition = hypre_ParVectorPartitioning(hypreX); MPI_Comm_size(jacobiPtr->comm_, &nprocs); newPartition = hypre_TAlloc(int, (nprocs+1) , HYPRE_MEMORY_HOST); for (i = 0; i <= nprocs; i++) newPartition[i] = partition[i]; - HYPRE_ParVectorCreate(jacobiPtr->comm_, gnrows, newPartition, + HYPRE_ParVectorCreate(jacobiPtr->comm_, gnrows, newPartition, &(jacobiPtr->hypreRes_)); HYPRE_ParVectorInitialize(jacobiPtr->hypreRes_); return 0; @@ -2227,7 +2228,7 @@ int MLI_Utils_mJacobiSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /*************************************************************************** * conform to the preconditioner apply from HYPRE *--------------------------------------------------------------------------*/ - + int MLI_Utils_mJacobiSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x) { @@ -2261,7 +2262,7 @@ int MLI_Utils_mJacobiSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, /*************************************************************************** * solve the system using HYPRE pcg *--------------------------------------------------------------------------*/ - + int MLI_Utils_HyprePCGSolve( CMLI *cmli, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x ) { @@ -2315,7 +2316,7 @@ int MLI_Utils_HyprePCGSolve( CMLI *cmli, HYPRE_Matrix A, /*************************************************************************** * solve the system using HYPRE gmres *--------------------------------------------------------------------------*/ - + int MLI_Utils_HypreGMRESSolve(void *precon, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x, char *pname) { @@ -2418,7 +2419,7 @@ int MLI_Utils_HypreGMRESSolve(void *precon, HYPRE_Matrix A, /*************************************************************************** * solve the system using HYPRE fgmres *--------------------------------------------------------------------------*/ - + int MLI_Utils_HypreFGMRESSolve(void *precon, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x, char *pname) { @@ -2484,11 +2485,11 @@ int MLI_Utils_HypreFGMRESSolve(void *precon, HYPRE_Matrix A, gmresPrecond = (HYPRE_Solver) precon; HYPRE_ParCSRFGMRESSetMaxIter(gmresSolver, 5); /* change this in amgcr too */ HYPRE_ParCSRFGMRESSetLogging(gmresSolver, 0); - HYPRE_ParCSRFGMRESSetPrecond(gmresSolver, MLI_Utils_mJacobiSolve, + HYPRE_ParCSRFGMRESSetPrecond(gmresSolver, MLI_Utils_mJacobiSolve, MLI_Utils_mJacobiSetup, gmresPrecond); } setupTime = MLI_Utils_WTime(); - HYPRE_ParCSRFGMRESSetup(gmresSolver, hypreA, (HYPRE_ParVector) b, + HYPRE_ParCSRFGMRESSetup(gmresSolver, hypreA, (HYPRE_ParVector) b, (HYPRE_ParVector) x); solveTime = MLI_Utils_WTime(); setupTime = solveTime - setupTime; @@ -2515,7 +2516,7 @@ int MLI_Utils_HypreFGMRESSolve(void *precon, HYPRE_Matrix A, /*************************************************************************** * solve the system using HYPRE bicgstab *--------------------------------------------------------------------------*/ - + int MLI_Utils_HypreBiCGSTABSolve( CMLI *cmli, HYPRE_Matrix A, HYPRE_Vector b, HYPRE_Vector x ) { @@ -2735,7 +2736,7 @@ int MLI_Utils_DbleQSort2a(double *dlist, int *ilist, int left, int right) } /*************************************************************************** - * merge sort on integers + * merge sort on integers *--------------------------------------------------------------------------*/ int MLI_Utils_IntMergeSort(int nList, int *listLengs, int **lists, @@ -2752,28 +2753,28 @@ int MLI_Utils_IntMergeSort(int nList, int *listLengs, int **lists, if ( totalLeng <= 0 ) return 1; #if 0 - for ( i = 0; i < nList; i++ ) + for ( i = 0; i < nList; i++ ) { sortFlag = 0; - for ( j = 1; j < listLengs[i]; j++ ) + for ( j = 1; j < listLengs[i]; j++ ) if ( lists[i][j] < lists[i][j-1] ) { sortFlag = 1; break; - } + } if ( sortFlag == 1 ) MLI_Utils_IntQSort2(lists[i], lists2[i], 0, listLengs[i]-1); } #endif - newList = hypre_TAlloc(int, totalLeng , HYPRE_MEMORY_HOST); + newList = hypre_TAlloc(int, totalLeng , HYPRE_MEMORY_HOST); indices = hypre_TAlloc(int, nList , HYPRE_MEMORY_HOST); tree = hypre_TAlloc(int, nList , HYPRE_MEMORY_HOST); treeInd = hypre_TAlloc(int, nList , HYPRE_MEMORY_HOST); for ( i = 0; i < nList; i++ ) indices[i] = 0; - for ( i = 0; i < nList; i++ ) + for ( i = 0; i < nList; i++ ) { - if ( listLengs[i] > 0 ) + if ( listLengs[i] > 0 ) { tree[i] = lists[i][0]; treeInd[i] = i; @@ -2813,16 +2814,16 @@ int MLI_Utils_IntMergeSort(int nList, int *listLengs, int **lists, MLI_Utils_IntTreeUpdate(nList, tree, treeInd); parseCnt++; } - (*newListOut) = newList; - (*newNListOut) = newListCnt; - free( indices ); - free( tree ); - free( treeInd ); + (*newListOut) = newList; + (*newNListOut) = newListCnt; + hypre_TFree(indices, HYPRE_MEMORY_HOST); + hypre_TFree(tree, HYPRE_MEMORY_HOST); + hypre_TFree(treeInd, HYPRE_MEMORY_HOST); return 0; } /*************************************************************************** - * tree sort on integers + * tree sort on integers *--------------------------------------------------------------------------*/ int MLI_Utils_IntTreeUpdate(int treeLeng, int *tree, int *treeInd) @@ -2852,12 +2853,12 @@ int MLI_Utils_IntTreeUpdate(int treeLeng, int *tree, int *treeInd) nextp1 = next + 1; minInd = seed; minVal = tree[seed]; - if ( next < treeLeng && tree[next] < minVal ) + if ( next < treeLeng && tree[next] < minVal ) { minInd = next; minVal = tree[next]; } - if ( nextp1 < treeLeng && tree[nextp1] < minVal ) + if ( nextp1 < treeLeng && tree[nextp1] < minVal ) { minInd = next + 1; minVal = tree[nextp1]; diff --git a/src/IJ_mv/CMakeLists.txt b/src/IJ_mv/CMakeLists.txt index 74dabdd2e..5a7c4d5ec 100644 --- a/src/IJ_mv/CMakeLists.txt +++ b/src/IJ_mv/CMakeLists.txt @@ -4,8 +4,12 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) set(HDRS + aux_parcsr_matrix.h + aux_par_vector.h HYPRE_IJ_mv.h _hypre_IJ_mv.h + IJ_matrix.h + IJ_vector.h ) set(SRCS @@ -21,12 +25,23 @@ set(SRCS IJMatrix_parcsr.c IJVector.c IJVector_parcsr.c + IJMatrix_parcsr_device.c + IJVector_parcsr_device.c ) - -convert_filenames_to_full_paths(HDRS) -convert_filenames_to_full_paths(SRCS) -set(HYPRE_HEADERS ${HYPRE_HEADERS} ${HDRS} PARENT_SCOPE) -set(HYPRE_SOURCES ${HYPRE_SOURCES} ${SRCS} PARENT_SCOPE) +target_sources(${PROJECT_NAME} + PRIVATE ${SRCS} + ${HDRS} +) +if (HYPRE_USING_CUDA) + set(CUDA_SRCS + IJMatrix_parcsr_device.c + IJVector_parcsr_device.c + ) + convert_filenames_to_full_paths(CUDA_SRCS) + set(HYPRE_CUDA_SOURCES ${HYPRE_CUDA_SOURCES} ${CUDA_SRCS} PARENT_SCOPE) +endif () +convert_filenames_to_full_paths(HDRS) +set(HYPRE_HEADERS ${HYPRE_HEADERS} ${HDRS} PARENT_SCOPE) diff --git a/src/IJ_mv/HYPRE_IJMatrix.c b/src/IJ_mv/HYPRE_IJMatrix.c index cce2e0ed3..59f7ace08 100644 --- a/src/IJ_mv/HYPRE_IJMatrix.c +++ b/src/IJ_mv/HYPRE_IJMatrix.c @@ -35,24 +35,18 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, hypre_IJMatrix *ijmatrix; -#ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_BigInt row0, col0, rowN, colN; -#else - HYPRE_BigInt *recv_buf; - HYPRE_Int i, i4; - HYPRE_Int square; -#endif ijmatrix = hypre_CTAlloc(hypre_IJMatrix, 1, HYPRE_MEMORY_HOST); - hypre_IJMatrixComm(ijmatrix) = comm; - hypre_IJMatrixObject(ijmatrix) = NULL; - hypre_IJMatrixTranslator(ijmatrix) = NULL; - hypre_IJMatrixAssumedPart(ijmatrix) = NULL; - hypre_IJMatrixObjectType(ijmatrix) = HYPRE_UNITIALIZED; - hypre_IJMatrixAssembleFlag(ijmatrix) = 0; - hypre_IJMatrixPrintLevel(ijmatrix) = 0; - hypre_IJMatrixOMPFlag(ijmatrix) = 0; + hypre_IJMatrixComm(ijmatrix) = comm; + hypre_IJMatrixObject(ijmatrix) = NULL; + hypre_IJMatrixTranslator(ijmatrix) = NULL; + hypre_IJMatrixAssumedPart(ijmatrix) = NULL; + hypre_IJMatrixObjectType(ijmatrix) = HYPRE_UNITIALIZED; + hypre_IJMatrixAssembleFlag(ijmatrix) = 0; + hypre_IJMatrixPrintLevel(ijmatrix) = 0; + hypre_IJMatrixOMPFlag(ijmatrix) = 0; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm, &myid); @@ -86,12 +80,10 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION - info = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); - row_partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); - col_partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); + row_partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); + col_partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); row_partitioning[0] = ilower; row_partitioning[1] = iupper+1; @@ -102,7 +94,7 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, as the global first row and column index */ /* proc 0 has the first row and col */ - if (myid==0) + if (myid == 0) { info[0] = ilower; info[1] = jlower; @@ -129,100 +121,6 @@ HYPRE_IJMatrixCreate( MPI_Comm comm, hypre_TFree(info, HYPRE_MEMORY_HOST); - -#else - - info = hypre_CTAlloc(HYPRE_BigInt, 4, HYPRE_MEMORY_HOST); - recv_buf = hypre_CTAlloc(HYPRE_BigInt, 4*num_procs, HYPRE_MEMORY_HOST); - row_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); - - info[0] = ilower; - info[1] = iupper; - info[2] = jlower; - info[3] = jupper; - - /* Generate row- and column-partitioning through information exchange - across all processors, check whether the matrix is square, and - if the partitionings match. i.e. no overlaps or gaps, - if there are overlaps or gaps in the row partitioning or column - partitioning , ierr will be set to -9 or -10, respectively */ - - hypre_MPI_Allgather(info,4,HYPRE_MPI_BIG_INT,recv_buf,4,HYPRE_MPI_BIG_INT,comm); - - row_partitioning[0] = recv_buf[0]; - square = 1; - for (i=0; i < num_procs-1; i++) - { - i4 = 4*i; - if ( recv_buf[i4+1] != (recv_buf[i4+4]-1) ) - { - hypre_error(HYPRE_ERROR_GENERIC); - hypre_TFree(ijmatrix, HYPRE_MEMORY_HOST); - hypre_TFree(info, HYPRE_MEMORY_HOST); - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - hypre_TFree(row_partitioning, HYPRE_MEMORY_HOST); - return hypre_error_flag; - } - else - { - row_partitioning[i+1] = recv_buf[i4+4]; - } - - if ((square && (recv_buf[i4] != recv_buf[i4+2])) || - (recv_buf[i4+1] != recv_buf[i4+3]) ) - { - square = 0; - } - } - i4 = (num_procs-1)*4; - row_partitioning[num_procs] = recv_buf[i4+1]+1; - - if ((recv_buf[i4] != recv_buf[i4+2]) || (recv_buf[i4+1] != recv_buf[i4+3])) - { - square = 0; - } - - if (square) - { - col_partitioning = row_partitioning; - } - else - { - col_partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); - col_partitioning[0] = recv_buf[2]; - for (i=0; i < num_procs-1; i++) - { - i4 = 4*i; - if (recv_buf[i4+3] != recv_buf[i4+6]-1) - { - hypre_error(HYPRE_ERROR_GENERIC); - hypre_TFree(ijmatrix, HYPRE_MEMORY_HOST); - hypre_TFree(info, HYPRE_MEMORY_HOST); - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - hypre_TFree(row_partitioning, HYPRE_MEMORY_HOST); - hypre_TFree(col_partitioning, HYPRE_MEMORY_HOST); - return hypre_error_flag; - } - else - { - col_partitioning[i+1] = recv_buf[i4+6]; - } - } - col_partitioning[num_procs] = recv_buf[num_procs*4-1]+1; - } - - hypre_IJMatrixGlobalFirstRow(ijmatrix) = row_partitioning[0]; - hypre_IJMatrixGlobalFirstCol(ijmatrix) = col_partitioning[0]; - hypre_IJMatrixGlobalNumRows(ijmatrix) = row_partitioning[num_procs] - - row_partitioning[0]; - hypre_IJMatrixGlobalNumCols(ijmatrix) = col_partitioning[num_procs] - - col_partitioning[0]; - - hypre_TFree(info, HYPRE_MEMORY_HOST); - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - -#endif - hypre_IJMatrixRowPartitioning(ijmatrix) = row_partitioning; hypre_IJMatrixColPartitioning(ijmatrix) = col_partitioning; @@ -304,6 +202,30 @@ HYPRE_IJMatrixInitialize( HYPRE_IJMatrix matrix ) } +HYPRE_Int +HYPRE_IJMatrixInitialize_v2( HYPRE_IJMatrix matrix, HYPRE_MemoryLocation memory_location ) +{ + hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; + + if (!ijmatrix) + { + hypre_error_in_arg(1); + return hypre_error_flag; + } + + if ( hypre_IJMatrixObjectType(ijmatrix) == HYPRE_PARCSR ) + { + hypre_IJMatrixInitializeParCSR_v2( ijmatrix, memory_location ) ; + } + else + { + hypre_error_in_arg(1); + } + + return hypre_error_flag; + +} + /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ @@ -347,7 +269,7 @@ hypre_PrefixSumInt(HYPRE_Int nvals, } else { - + /* Compute preliminary partial sums (in parallel) within each interval */ #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE @@ -355,20 +277,20 @@ hypre_PrefixSumInt(HYPRE_Int nvals, for (j = 0; j < nvals; j += bsize) { HYPRE_Int i, n = hypre_min((j+bsize), nvals); - + sums[0] = 0; for (i = j+1; i < n; i++) { sums[i] = sums[i-1] + vals[i-1]; } } - + /* Compute final partial sums (in serial) for the first entry of every interval */ for (j = bsize; j < nvals; j += bsize) { sums[j] = sums[j-bsize] + sums[j-1] + vals[j-1]; } - + /* Compute final partial sums (in parallel) for the remaining entries */ #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE @@ -376,7 +298,7 @@ hypre_PrefixSumInt(HYPRE_Int nvals, for (j = bsize; j < nvals; j += bsize) { HYPRE_Int i, n = hypre_min((j+bsize), nvals); - + for (i = j+1; i < n; i++) { sums[i] += sums[j]; @@ -400,7 +322,6 @@ HYPRE_IJMatrixSetValues( HYPRE_IJMatrix matrix, const HYPRE_Complex *values ) { hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; - HYPRE_Int *row_indexes; if (nrows == 0) { @@ -413,11 +334,13 @@ HYPRE_IJMatrixSetValues( HYPRE_IJMatrix matrix, return hypre_error_flag; } + /* if (!ncols) { hypre_error_in_arg(3); return hypre_error_flag; } + */ if (!rows) { @@ -443,54 +366,23 @@ HYPRE_IJMatrixSetValues( HYPRE_IJMatrix matrix, return hypre_error_flag; } - /* Compute row_indexes and call Values2 routine (TODO: add OpenMP)*/ - row_indexes = hypre_CTAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); - hypre_PrefixSumInt(nrows, ncols, row_indexes); - HYPRE_IJMatrixSetValues2(matrix, nrows, ncols, rows, row_indexes, cols, values); - hypre_TFree(row_indexes, HYPRE_MEMORY_HOST); + HYPRE_IJMatrixSetValues2(matrix, nrows, ncols, rows, NULL, cols, values); return hypre_error_flag; } /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ - HYPRE_Int -HYPRE_IJMatrixSetConstantValues( HYPRE_IJMatrix matrix, HYPRE_Complex value) -{ - hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; - - if (!ijmatrix) - { - hypre_error_in_arg(1); - return hypre_error_flag; - } - - if ( hypre_IJMatrixObjectType(ijmatrix) == HYPRE_PARCSR ) - { - return( hypre_IJMatrixSetConstantValuesParCSR( ijmatrix, value)); - } - else - { - hypre_error_in_arg(1); - } - - return hypre_error_flag; -} - -/*-------------------------------------------------------------------------- - *--------------------------------------------------------------------------*/ - -HYPRE_Int -HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, - HYPRE_Int nrows, - HYPRE_Int *ncols, - const HYPRE_BigInt *rows, - const HYPRE_BigInt *cols, - const HYPRE_Complex *values ) +HYPRE_IJMatrixSetValues2( HYPRE_IJMatrix matrix, + HYPRE_Int nrows, + HYPRE_Int *ncols, + const HYPRE_BigInt *rows, + const HYPRE_Int *row_indexes, + const HYPRE_BigInt *cols, + const HYPRE_Complex *values ) { hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; - HYPRE_Int *row_indexes; if (nrows == 0) { @@ -509,11 +401,13 @@ HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, return hypre_error_flag; } + /* if (!ncols) { hypre_error_in_arg(3); return hypre_error_flag; } + */ if (!rows) { @@ -523,13 +417,13 @@ HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, if (!cols) { - hypre_error_in_arg(5); + hypre_error_in_arg(6); return hypre_error_flag; } if (!values) { - hypre_error_in_arg(6); + hypre_error_in_arg(7); return hypre_error_flag; } @@ -539,11 +433,79 @@ HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, return hypre_error_flag; } - /* Compute row_indexes and call Values2 routine */ - row_indexes = hypre_CTAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); - hypre_PrefixSumInt(nrows, ncols, row_indexes); - HYPRE_IJMatrixAddToValues2(matrix, nrows, ncols, rows, row_indexes, cols, values); - hypre_TFree(row_indexes, HYPRE_MEMORY_HOST); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJMatrixMemoryLocation(matrix) ); + + if (exec == HYPRE_EXEC_DEVICE) + { + hypre_IJMatrixSetAddValuesParCSRDevice(ijmatrix, nrows, ncols, rows, row_indexes, cols, values, "set"); + } + else +#endif + { + HYPRE_Int *row_indexes_tmp = (HYPRE_Int *) row_indexes; + HYPRE_Int *ncols_tmp = ncols; + + if (!ncols_tmp) + { + HYPRE_Int i; + ncols_tmp = hypre_TAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); + for (i = 0; i < nrows; i++) + { + ncols_tmp[i] = 1; + } + } + + if (!row_indexes) + { + row_indexes_tmp = hypre_CTAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); + hypre_PrefixSumInt(nrows, ncols_tmp, row_indexes_tmp); + } + + if (hypre_IJMatrixOMPFlag(ijmatrix)) + { + hypre_IJMatrixSetValuesOMPParCSR(ijmatrix, nrows, ncols_tmp, rows, row_indexes_tmp, cols, values); + } + else + { + hypre_IJMatrixSetValuesParCSR(ijmatrix, nrows, ncols_tmp, rows, row_indexes_tmp, cols, values); + } + + if (!ncols) + { + hypre_TFree(ncols_tmp, HYPRE_MEMORY_HOST); + } + + if (!row_indexes) + { + hypre_TFree(row_indexes_tmp, HYPRE_MEMORY_HOST); + } + } + + return hypre_error_flag; +} + +/*-------------------------------------------------------------------------- + *--------------------------------------------------------------------------*/ +HYPRE_Int +HYPRE_IJMatrixSetConstantValues( HYPRE_IJMatrix matrix, HYPRE_Complex value) +{ + hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; + + if (!ijmatrix) + { + hypre_error_in_arg(1); + return hypre_error_flag; + } + + if ( hypre_IJMatrixObjectType(ijmatrix) == HYPRE_PARCSR ) + { + return( hypre_IJMatrixSetConstantValuesParCSR( ijmatrix, value)); + } + else + { + hypre_error_in_arg(1); + } return hypre_error_flag; } @@ -552,13 +514,12 @@ HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, *--------------------------------------------------------------------------*/ HYPRE_Int -HYPRE_IJMatrixSetValues2( HYPRE_IJMatrix matrix, - HYPRE_Int nrows, - HYPRE_Int *ncols, - const HYPRE_BigInt *rows, - const HYPRE_Int *row_indexes, - const HYPRE_BigInt *cols, - const HYPRE_Complex *values ) +HYPRE_IJMatrixAddToValues( HYPRE_IJMatrix matrix, + HYPRE_Int nrows, + HYPRE_Int *ncols, + const HYPRE_BigInt *rows, + const HYPRE_BigInt *cols, + const HYPRE_Complex *values ) { hypre_IJMatrix *ijmatrix = (hypre_IJMatrix *) matrix; @@ -579,11 +540,13 @@ HYPRE_IJMatrixSetValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } + /* if (!ncols) { hypre_error_in_arg(3); return hypre_error_flag; } + */ if (!rows) { @@ -591,21 +554,15 @@ HYPRE_IJMatrixSetValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } - if (!row_indexes) - { - hypre_error_in_arg(5); - return hypre_error_flag; - } - if (!cols) { - hypre_error_in_arg(6); + hypre_error_in_arg(5); return hypre_error_flag; } if (!values) { - hypre_error_in_arg(7); + hypre_error_in_arg(6); return hypre_error_flag; } @@ -615,17 +572,9 @@ HYPRE_IJMatrixSetValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } - if (hypre_IJMatrixOMPFlag(ijmatrix)) - { - hypre_IJMatrixSetValuesOMPParCSR(ijmatrix, nrows, ncols, rows, row_indexes, cols, values); - } - else - { - hypre_IJMatrixSetValuesParCSR(ijmatrix, nrows, ncols, rows, row_indexes, cols, values); - } + HYPRE_IJMatrixAddToValues2(matrix, nrows, ncols, rows, NULL, cols, values); return hypre_error_flag; - } /*-------------------------------------------------------------------------- @@ -659,11 +608,13 @@ HYPRE_IJMatrixAddToValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } + /* if (!ncols) { hypre_error_in_arg(3); return hypre_error_flag; } + */ if (!rows) { @@ -671,12 +622,6 @@ HYPRE_IJMatrixAddToValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } - if (!row_indexes) - { - hypre_error_in_arg(5); - return hypre_error_flag; - } - if (!cols) { hypre_error_in_arg(6); @@ -695,13 +640,53 @@ HYPRE_IJMatrixAddToValues2( HYPRE_IJMatrix matrix, return hypre_error_flag; } - if (hypre_IJMatrixOMPFlag(ijmatrix)) +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJMatrixMemoryLocation(matrix) ); + + if (exec == HYPRE_EXEC_DEVICE) { - hypre_IJMatrixAddToValuesOMPParCSR(ijmatrix, nrows, ncols, rows, row_indexes, cols, values); + hypre_IJMatrixSetAddValuesParCSRDevice(ijmatrix, nrows, ncols, rows, row_indexes, cols, values, "add"); } else +#endif { - hypre_IJMatrixAddToValuesParCSR(ijmatrix, nrows, ncols, rows, row_indexes, cols, values); + HYPRE_Int *row_indexes_tmp = (HYPRE_Int *) row_indexes; + HYPRE_Int *ncols_tmp = ncols; + + if (!ncols_tmp) + { + HYPRE_Int i; + ncols_tmp = hypre_TAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); + for (i = 0; i < nrows; i++) + { + ncols_tmp[i] = 1; + } + } + + if (!row_indexes) + { + row_indexes_tmp = hypre_CTAlloc(HYPRE_Int, nrows, HYPRE_MEMORY_HOST); + hypre_PrefixSumInt(nrows, ncols_tmp, row_indexes_tmp); + } + + if (hypre_IJMatrixOMPFlag(ijmatrix)) + { + hypre_IJMatrixAddToValuesOMPParCSR(ijmatrix, nrows, ncols_tmp, rows, row_indexes_tmp, cols, values); + } + else + { + hypre_IJMatrixAddToValuesParCSR(ijmatrix, nrows, ncols_tmp, rows, row_indexes_tmp, cols, values); + } + + if (!ncols) + { + hypre_TFree(ncols_tmp, HYPRE_MEMORY_HOST); + } + + if (!row_indexes) + { + hypre_TFree(row_indexes_tmp, HYPRE_MEMORY_HOST); + } } return hypre_error_flag; @@ -723,7 +708,18 @@ HYPRE_IJMatrixAssemble( HYPRE_IJMatrix matrix ) if ( hypre_IJMatrixObjectType(ijmatrix) == HYPRE_PARCSR ) { - return( hypre_IJMatrixAssembleParCSR( ijmatrix ) ); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJMatrixMemoryLocation(matrix) ); + + if (exec == HYPRE_EXEC_DEVICE) + { + return( hypre_IJMatrixAssembleParCSRDevice( ijmatrix ) ); + } + else +#endif + { + return( hypre_IJMatrixAssembleParCSR( ijmatrix ) ); + } } else { @@ -914,17 +910,10 @@ HYPRE_IJMatrixGetLocalRange( HYPRE_IJMatrix matrix, hypre_MPI_Comm_rank(comm, &my_id); -#ifdef HYPRE_NO_GLOBAL_PARTITION *ilower = row_partitioning[0]; *iupper = row_partitioning[1]-1; *jlower = col_partitioning[0]; *jupper = col_partitioning[1]-1; -#else - *ilower = row_partitioning[my_id]; - *iupper = row_partitioning[my_id+1]-1; - *jlower = col_partitioning[my_id]; - *jupper = col_partitioning[my_id+1]-1; -#endif return hypre_error_flag; } @@ -1044,6 +1033,8 @@ HYPRE_IJMatrixSetMaxOffProcElmts( HYPRE_IJMatrix matrix, } /*-------------------------------------------------------------------------- + * HYPRE_IJMatrixRead + * create IJMatrix on host memory *--------------------------------------------------------------------------*/ HYPRE_Int @@ -1075,7 +1066,8 @@ HYPRE_IJMatrixRead( const char *filename, HYPRE_IJMatrixCreate(comm, ilower, iupper, jlower, jupper, &matrix); HYPRE_IJMatrixSetObjectType(matrix, type); - HYPRE_IJMatrixInitialize(matrix); + + HYPRE_IJMatrixInitialize_v2(matrix, HYPRE_MEMORY_HOST); /* It is important to ensure that whitespace follows the index value to help * catch mistakes in the input file. See comments in IJVectorRead(). */ @@ -1107,26 +1099,13 @@ HYPRE_IJMatrixRead( const char *filename, } /*-------------------------------------------------------------------------- + * HYPRE_IJMatrixPrint *--------------------------------------------------------------------------*/ HYPRE_Int HYPRE_IJMatrixPrint( HYPRE_IJMatrix matrix, const char *filename ) { - MPI_Comm comm; - HYPRE_BigInt *row_partitioning; - HYPRE_BigInt *col_partitioning; - HYPRE_BigInt ilower, iupper, jlower, jupper; - HYPRE_BigInt i, ii; - HYPRE_Int j; - HYPRE_Int ncols; - HYPRE_BigInt *cols; - HYPRE_Complex *values; - HYPRE_Int myid; - char new_filename[255]; - FILE *file; - void *object; - if (!matrix) { hypre_error_in_arg(1); @@ -1139,81 +1118,28 @@ HYPRE_IJMatrixPrint( HYPRE_IJMatrix matrix, return hypre_error_flag; } - comm = hypre_IJMatrixComm(matrix); - hypre_MPI_Comm_rank(comm, &myid); + void *object; + HYPRE_IJMatrixGetObject(matrix, &object); + HYPRE_ParCSRMatrix par_csr = (HYPRE_ParCSRMatrix) object; - hypre_sprintf(new_filename,"%s.%05d", filename, myid); + HYPRE_MemoryLocation memory_location = hypre_IJMatrixMemoryLocation(matrix); - if ((file = fopen(new_filename, "w")) == NULL) + if ( hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_HOST ) { - hypre_error_in_arg(2); - return hypre_error_flag; + hypre_ParCSRMatrixPrintIJ(par_csr, 0, 0, filename); } - - row_partitioning = hypre_IJMatrixRowPartitioning(matrix); - col_partitioning = hypre_IJMatrixColPartitioning(matrix); -#ifdef HYPRE_NO_GLOBAL_PARTITION - ilower = row_partitioning[0]; - iupper = row_partitioning[1] - 1; - jlower = col_partitioning[0]; - jupper = col_partitioning[1] - 1; -#else - ilower = row_partitioning[myid]; - iupper = row_partitioning[myid+1] - 1; - jlower = col_partitioning[myid]; - jupper = col_partitioning[myid+1] - 1; -#endif - hypre_fprintf(file, "%b %b %b %b\n", ilower, iupper, jlower, jupper); - - HYPRE_IJMatrixGetObject(matrix, &object); - - for (i = ilower; i <= iupper; i++) + else { - if ( hypre_IJMatrixObjectType(matrix) == HYPRE_PARCSR ) - { -#ifdef HYPRE_NO_GLOBAL_PARTITION - ii = i - hypre_IJMatrixGlobalFirstRow(matrix); -#else - ii = i - row_partitioning[0]; -#endif - HYPRE_ParCSRMatrixGetRow((HYPRE_ParCSRMatrix) object, - ii, &ncols, &cols, &values); - for (j = 0; j < ncols; j++) - { -#ifdef HYPRE_NO_GLOBAL_PARTITION - cols[j] += hypre_IJMatrixGlobalFirstCol(matrix); -#else - cols[j] += col_partitioning[0]; -#endif - } - } - - for (j = 0; j < ncols; j++) - { - hypre_fprintf(file, "%b %b %.14e\n", i, cols[j], values[j]); - } - - if ( hypre_IJMatrixObjectType(matrix) == HYPRE_PARCSR ) - { - for (j = 0; j < ncols; j++) - { -#ifdef HYPRE_NO_GLOBAL_PARTITION - cols[j] -= hypre_IJMatrixGlobalFirstCol(matrix); -#else - cols[j] -= col_partitioning[0]; -#endif - } - HYPRE_ParCSRMatrixRestoreRow((HYPRE_ParCSRMatrix) object, - ii, &ncols, &cols, &values); - } + HYPRE_ParCSRMatrix par_csr2 = hypre_ParCSRMatrixClone_v2(par_csr, 1, HYPRE_MEMORY_HOST); + hypre_ParCSRMatrixPrintIJ(par_csr2, 0, 0, filename); + hypre_ParCSRMatrixDestroy(par_csr2); } - fclose(file); - return hypre_error_flag; } /*-------------------------------------------------------------------------- + * HYPRE_IJMatrixSetOMPFlag *--------------------------------------------------------------------------*/ HYPRE_Int diff --git a/src/IJ_mv/HYPRE_IJVector.c b/src/IJ_mv/HYPRE_IJVector.c index 82168f415..ad3dd0391 100644 --- a/src/IJ_mv/HYPRE_IJVector.c +++ b/src/IJ_mv/HYPRE_IJVector.c @@ -28,19 +28,13 @@ HYPRE_IJVectorCreate( MPI_Comm comm, hypre_IJVector *vec; HYPRE_Int num_procs, my_id; HYPRE_BigInt *partitioning; - -#ifdef HYPRE_NO_GLOBAL_PARTITION + HYPRE_BigInt row0, rowN; -#else - HYPRE_BigInt *recv_buf; - HYPRE_BigInt *info; - HYPRE_Int i, i2; -#endif vec = hypre_CTAlloc(hypre_IJVector, 1, HYPRE_MEMORY_HOST); - + if (!vec) - { + { hypre_error(HYPRE_ERROR_MEMORY); return hypre_error_flag; } @@ -60,25 +54,22 @@ HYPRE_IJVectorCreate( MPI_Comm comm, return hypre_error_flag; } - -#ifdef HYPRE_NO_GLOBAL_PARTITION - partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); partitioning[0] = jlower; partitioning[1] = jupper+1; - + /* now we need the global number of rows as well as the global first row index */ /* proc 0 has the first row */ - if (my_id==0) + if (my_id==0) { row0 = jlower; } hypre_MPI_Bcast(&row0, 1, HYPRE_MPI_BIG_INT, 0, comm); - /* proc (num_procs-1) has the last row */ + /* proc (num_procs-1) has the last row */ if (my_id == (num_procs-1)) { rowN = jupper; @@ -87,46 +78,6 @@ HYPRE_IJVectorCreate( MPI_Comm comm, hypre_IJVectorGlobalFirstRow(vec) = row0; hypre_IJVectorGlobalNumRows(vec) = rowN - row0 + 1; - -#else - - info = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); - recv_buf = hypre_CTAlloc(HYPRE_BigInt, 2*num_procs, HYPRE_MEMORY_HOST); - partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); - - info[0] = jlower; - info[1] = jupper; - - hypre_MPI_Allgather(info, 2, HYPRE_MPI_BIG_INT, recv_buf, 2, HYPRE_MPI_BIG_INT, comm); - - partitioning[0] = recv_buf[0]; - for (i=0; i < num_procs-1; i++) - { - i2 = i+i; - if (recv_buf[i2+1] != (recv_buf[i2+2]-1)) - { - /*hypre_printf("Inconsistent partitioning -- HYPRE_IJVectorCreate\n"); */ - hypre_error(HYPRE_ERROR_GENERIC); - hypre_TFree(info, HYPRE_MEMORY_HOST); - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - hypre_TFree(partitioning, HYPRE_MEMORY_HOST); - hypre_TFree(vec, HYPRE_MEMORY_HOST); - return hypre_error_flag; - } - else - partitioning[i+1] = recv_buf[i2+2]; - } - i2 = (num_procs-1)*2; - partitioning[num_procs] = recv_buf[i2+1]+1; - - hypre_TFree(info, HYPRE_MEMORY_HOST); - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - - - hypre_IJVectorGlobalFirstRow(vec) = partitioning[0]; - hypre_IJVectorGlobalNumRows(vec)= partitioning[num_procs]-partitioning[0]; - -#endif hypre_IJVectorComm(vec) = comm; hypre_IJVectorPartitioning(vec) = partitioning; @@ -137,7 +88,7 @@ HYPRE_IJVectorCreate( MPI_Comm comm, hypre_IJVectorPrintLevel(vec) = 0; *vector = (HYPRE_IJVector) vec; - + return hypre_error_flag; } @@ -145,7 +96,7 @@ HYPRE_IJVectorCreate( MPI_Comm comm, * HYPRE_IJVectorDestroy *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorDestroy( HYPRE_IJVector vector ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -154,17 +105,21 @@ HYPRE_IJVectorDestroy( HYPRE_IJVector vector ) { hypre_error_in_arg(1); return hypre_error_flag; - } + } if (hypre_IJVectorPartitioning(vec)) + { hypre_TFree(hypre_IJVectorPartitioning(vec), HYPRE_MEMORY_HOST); + } if (hypre_IJVectorAssumedPart(vec)) - hypre_AssumedPartitionDestroy((hypre_IJAssumedPart*)hypre_IJVectorAssumedPart(vec)); + { + hypre_AssumedPartitionDestroy((hypre_IJAssumedPart*)hypre_IJVectorAssumedPart(vec)); + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { - hypre_IJVectorDestroyPar(vec) ; + hypre_IJVectorDestroyPar(vec); if (hypre_IJVectorTranslator(vec)) { hypre_AuxParVectorDestroy((hypre_AuxParVector *) @@ -186,7 +141,7 @@ HYPRE_IJVectorDestroy( HYPRE_IJVector vector ) * HYPRE_IJVectorInitialize *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorInitialize( HYPRE_IJVector vector ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -195,12 +150,14 @@ HYPRE_IJVectorInitialize( HYPRE_IJVector vector ) { hypre_error_in_arg(1); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { if (!hypre_IJVectorObject(vec)) - hypre_IJVectorCreatePar(vec, hypre_IJVectorPartitioning(vec)); + { + hypre_IJVectorCreatePar(vec, hypre_IJVectorPartitioning(vec)); + } hypre_IJVectorInitializePar(vec); } @@ -212,6 +169,34 @@ HYPRE_IJVectorInitialize( HYPRE_IJVector vector ) return hypre_error_flag; } +HYPRE_Int +HYPRE_IJVectorInitialize_v2( HYPRE_IJVector vector, HYPRE_MemoryLocation memory_location ) +{ + hypre_IJVector *vec = (hypre_IJVector *) vector; + + if (!vec) + { + hypre_error_in_arg(1); + return hypre_error_flag; + } + + if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) + { + if (!hypre_IJVectorObject(vec)) + { + hypre_IJVectorCreatePar(vec, hypre_IJVectorPartitioning(vec)); + } + + hypre_IJVectorInitializePar_v2(vec, memory_location); + } + else + { + hypre_error_in_arg(1); + } + + return hypre_error_flag; +} + /*-------------------------------------------------------------------------- * HYPRE_IJVectorSetPrintLevel *--------------------------------------------------------------------------*/ @@ -237,7 +222,7 @@ HYPRE_IJVectorSetPrintLevel( HYPRE_IJVector vector, * HYPRE_IJVectorSetValues *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorSetValues( HYPRE_IJVector vector, HYPRE_Int nvalues, const HYPRE_BigInt *indices, @@ -251,23 +236,34 @@ HYPRE_IJVectorSetValues( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } if (nvalues < 0) { hypre_error_in_arg(2); return hypre_error_flag; - } + } if (!values) { hypre_error_in_arg(4); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { - return( hypre_IJVectorSetValuesPar(vec, nvalues, indices, values) ); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJVectorMemoryLocation(vector) ); + + if (exec == HYPRE_EXEC_DEVICE) + { + return ( hypre_IJVectorSetAddValuesParDevice(vec, nvalues, indices, values, "set") ); + } + else +#endif + { + return( hypre_IJVectorSetValuesPar(vec, nvalues, indices, values) ); + } } else { @@ -281,7 +277,7 @@ HYPRE_IJVectorSetValues( HYPRE_IJVector vector, * HYPRE_IJVectorAddToValues *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorAddToValues( HYPRE_IJVector vector, HYPRE_Int nvalues, const HYPRE_BigInt *indices, @@ -295,23 +291,34 @@ HYPRE_IJVectorAddToValues( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } if (nvalues < 0) { hypre_error_in_arg(2); return hypre_error_flag; - } + } if (!values) { hypre_error_in_arg(4); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { - return( hypre_IJVectorAddToValuesPar(vec, nvalues, indices, values) ); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJVectorMemoryLocation(vector) ); + + if (exec == HYPRE_EXEC_DEVICE) + { + return ( hypre_IJVectorSetAddValuesParDevice(vec, nvalues, indices, values, "add") ); + } + else +#endif + { + return ( hypre_IJVectorAddToValuesPar(vec, nvalues, indices, values) ); + } } else { @@ -325,8 +332,8 @@ HYPRE_IJVectorAddToValues( HYPRE_IJVector vector, * HYPRE_IJVectorAssemble *--------------------------------------------------------------------------*/ -HYPRE_Int -HYPRE_IJVectorAssemble( HYPRE_IJVector vector ) +HYPRE_Int +HYPRE_IJVectorAssemble( HYPRE_IJVector vector ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -334,13 +341,24 @@ HYPRE_IJVectorAssemble( HYPRE_IJVector vector ) { hypre_error_in_arg(1); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { - return( hypre_IJVectorAssemblePar(vec) ); +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_ExecutionPolicy exec = hypre_GetExecPolicy1( hypre_IJVectorMemoryLocation(vector) ); + + if (exec == HYPRE_EXEC_DEVICE) + { + return( hypre_IJVectorAssembleParDevice(vec) ); + } + else +#endif + { + return( hypre_IJVectorAssemblePar(vec) ); + } } - else + else { hypre_error_in_arg(1); } @@ -352,7 +370,7 @@ HYPRE_IJVectorAssemble( HYPRE_IJVector vector ) * HYPRE_IJVectorGetValues *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorGetValues( HYPRE_IJVector vector, HYPRE_Int nvalues, const HYPRE_BigInt *indices, @@ -366,19 +384,19 @@ HYPRE_IJVectorGetValues( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } if (nvalues < 0) { hypre_error_in_arg(2); return hypre_error_flag; - } + } if (!values) { hypre_error_in_arg(4); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { @@ -396,9 +414,9 @@ HYPRE_IJVectorGetValues( HYPRE_IJVector vector, * HYPRE_IJVectorSetMaxOffProcElmts *--------------------------------------------------------------------------*/ -HYPRE_Int -HYPRE_IJVectorSetMaxOffProcElmts( HYPRE_IJVector vector, - HYPRE_Int max_off_proc_elmts ) +HYPRE_Int +HYPRE_IJVectorSetMaxOffProcElmts( HYPRE_IJVector vector, + HYPRE_Int max_off_proc_elmts ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -406,7 +424,7 @@ HYPRE_IJVectorSetMaxOffProcElmts( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) { @@ -424,7 +442,7 @@ HYPRE_IJVectorSetMaxOffProcElmts( HYPRE_IJVector vector, * HYPRE_IJVectorSetObjectType *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int HYPRE_IJVectorSetObjectType( HYPRE_IJVector vector, HYPRE_Int type ) { @@ -434,7 +452,7 @@ HYPRE_IJVectorSetObjectType( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } hypre_IJVectorObjectType(vec) = type; @@ -455,7 +473,7 @@ HYPRE_IJVectorGetObjectType( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } *type = hypre_IJVectorObjectType(vec); @@ -480,19 +498,14 @@ HYPRE_IJVectorGetLocalRange( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } comm = hypre_IJVectorComm(vec); partitioning = hypre_IJVectorPartitioning(vec); hypre_MPI_Comm_rank(comm, &my_id); -#ifdef HYPRE_NO_GLOBAL_PARTITION *jlower = partitioning[0]; *jupper = partitioning[1]-1; -#else - *jlower = partitioning[my_id]; - *jupper = partitioning[my_id+1]-1; -#endif return hypre_error_flag; } @@ -510,7 +523,7 @@ HYPRE_IJVectorGetObject( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } *object = hypre_IJVectorObject(vec); @@ -519,6 +532,7 @@ HYPRE_IJVectorGetObject( HYPRE_IJVector vector, /*-------------------------------------------------------------------------- * HYPRE_IJVectorRead + * create IJVector on host memory *--------------------------------------------------------------------------*/ HYPRE_Int @@ -535,7 +549,7 @@ HYPRE_IJVectorRead( const char *filename, FILE *file; hypre_MPI_Comm_rank(comm, &myid); - + hypre_sprintf(new_filename,"%s.%05d", filename, myid); if ((file = fopen(new_filename, "r")) == NULL) @@ -548,7 +562,8 @@ HYPRE_IJVectorRead( const char *filename, HYPRE_IJVectorCreate(comm, jlower, jupper, &vector); HYPRE_IJVectorSetObjectType(vector, type); - HYPRE_IJVectorInitialize(vector); + + HYPRE_IJVectorInitialize_v2(vector, HYPRE_MEMORY_HOST); /* It is important to ensure that whitespace follows the index value to help * catch mistakes in the input file. This is done with %*[ \t]. Using a @@ -562,9 +577,13 @@ HYPRE_IJVectorRead( const char *filename, return hypre_error_flag; } if (j < jlower || j > jupper) - HYPRE_IJVectorAddToValues(vector, 1, &j, &value); + { + HYPRE_IJVectorAddToValues(vector, 1, &j, &value); + } else - HYPRE_IJVectorSetValues(vector, 1, &j, &value); + { + HYPRE_IJVectorSetValues(vector, 1, &j, &value); + } } HYPRE_IJVectorAssemble(vector); @@ -587,8 +606,8 @@ HYPRE_IJVectorPrint( HYPRE_IJVector vector, MPI_Comm comm; HYPRE_BigInt *partitioning; HYPRE_BigInt jlower, jupper, j; - HYPRE_Complex value; - HYPRE_Int myid; + HYPRE_Complex *h_values = NULL, *d_values = NULL, *values = NULL; + HYPRE_Int myid, n_local; char new_filename[255]; FILE *file; @@ -596,11 +615,11 @@ HYPRE_IJVectorPrint( HYPRE_IJVector vector, { hypre_error_in_arg(1); return hypre_error_flag; - } + } comm = hypre_IJVectorComm(vector); hypre_MPI_Comm_rank(comm, &myid); - + hypre_sprintf(new_filename,"%s.%05d", filename, myid); if ((file = fopen(new_filename, "w")) == NULL) @@ -610,23 +629,39 @@ HYPRE_IJVectorPrint( HYPRE_IJVector vector, } partitioning = hypre_IJVectorPartitioning(vector); -#ifdef HYPRE_NO_GLOBAL_PARTITION jlower = partitioning[0]; jupper = partitioning[1] - 1; -#else - jlower = partitioning[myid]; - jupper = partitioning[myid+1] - 1; -#endif + n_local = jupper - jlower + 1; + hypre_fprintf(file, "%b %b\n", jlower, jupper); - for (j = jlower; j <= jupper; j++) + HYPRE_MemoryLocation memory_location = hypre_IJVectorMemoryLocation(vector); + + d_values = hypre_TAlloc(HYPRE_Complex, n_local, memory_location); + + HYPRE_IJVectorGetValues(vector, n_local, NULL, d_values); + + if ( hypre_GetActualMemLocation(memory_location) == hypre_MEMORY_HOST ) { - HYPRE_IJVectorGetValues(vector, 1, &j, &value); + values = d_values; + } + else + { + h_values = hypre_TAlloc(HYPRE_Complex, n_local, HYPRE_MEMORY_HOST); + hypre_TMemcpy(h_values, d_values, HYPRE_Complex, n_local, HYPRE_MEMORY_HOST, memory_location); + values = h_values; + } - hypre_fprintf(file, "%b %.14e\n", j, value); + for (j = jlower; j <= jupper; j++) + { + hypre_fprintf(file, "%b %.14e\n", j, values[j-jlower]); } + hypre_TFree(d_values, memory_location); + hypre_TFree(h_values, HYPRE_MEMORY_HOST); + fclose(file); return hypre_error_flag; } + diff --git a/src/IJ_mv/HYPRE_IJ_mv.h b/src/IJ_mv/HYPRE_IJ_mv.h index 269613f4d..19da94121 100644 --- a/src/IJ_mv/HYPRE_IJ_mv.h +++ b/src/IJ_mv/HYPRE_IJ_mv.h @@ -19,23 +19,25 @@ extern "C" { *--------------------------------------------------------------------------*/ /** - * @name IJ System Interface + * @defgroup IJSystemInterface IJ System Interface * * This interface represents a linear-algebraic conceptual view of a * linear system. The 'I' and 'J' in the name are meant to be * mnemonic for the traditional matrix notation A(I,J). * * @memo A linear-algebraic conceptual interface + * + * @{ **/ -/*@{*/ /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ /** * @name IJ Matrices + * + * @{ **/ -/*@{*/ struct hypre_IJMatrix_struct; /** @@ -45,20 +47,20 @@ typedef struct hypre_IJMatrix_struct *HYPRE_IJMatrix; /** * Create a matrix object. Each process owns some unique consecutive - * range of rows, indicated by the global row indices {\tt ilower} and - * {\tt iupper}. The row data is required to be such that the value - * of {\tt ilower} on any process $p$ be exactly one more than the - * value of {\tt iupper} on process $p-1$. Note that the first row of + * range of rows, indicated by the global row indices \e ilower and + * \e iupper. The row data is required to be such that the value + * of \e ilower on any process \f$p\f$ be exactly one more than the + * value of \e iupper on process \f$p-1\f$. Note that the first row of * the global matrix may start with any integer value. In particular, * one may use zero- or one-based indexing. * - * For square matrices, {\tt jlower} and {\tt jupper} typically should - * match {\tt ilower} and {\tt iupper}, respectively. For rectangular - * matrices, {\tt jlower} and {\tt jupper} should define a + * For square matrices, \e jlower and \e jupper typically should + * match \e ilower and \e iupper, respectively. For rectangular + * matrices, \e jlower and \e jupper should define a * partitioning of the columns. This partitioning must be used for - * any vector $v$ that will be used in matrix-vector products with the - * rectangular matrix. The matrix data structure may use {\tt jlower} - * and {\tt jupper} to store the diagonal blocks (rectangular in + * any vector \f$v\f$ that will be used in matrix-vector products with the + * rectangular matrix. The matrix data structure may use \e jlower + * and \e jupper to store the diagonal blocks (rectangular in * general) of the matrix separately from the rest of the matrix. * * Collective. @@ -89,13 +91,21 @@ HYPRE_Int HYPRE_IJMatrixDestroy(HYPRE_IJMatrix matrix); HYPRE_Int HYPRE_IJMatrixInitialize(HYPRE_IJMatrix matrix); /** - * Sets values for {\tt nrows} rows or partial rows of the matrix. - * The arrays {\tt ncols} - * and {\tt rows} are of dimension {\tt nrows} and contain the number + * Prepare a matrix object for setting coefficient values. This + * routine will also re-initialize an already assembled matrix, + * allowing users to modify coefficient values. This routine + * also specifies the memory location, i.e. host or device. + **/ +HYPRE_Int HYPRE_IJMatrixInitialize_v2(HYPRE_IJMatrix matrix, HYPRE_MemoryLocation memory_location); + +/** + * Sets values for \e nrows rows or partial rows of the matrix. + * The arrays \e ncols + * and \e rows are of dimension \e nrows and contain the number * of columns in each row and the row indices, respectively. The - * array {\tt cols} contains the column indices for each of the {\tt - * rows}, and is ordered by rows. The data in the {\tt values} array - * corresponds directly to the column entries in {\tt cols}. Erases + * array \e cols contains the column indices for each of the \e + * rows, and is ordered by rows. The data in the \e values array + * corresponds directly to the column entries in \e cols. Erases * any previous values at the specified locations and replaces them * with new ones, or, if there was no value there before, inserts a * new one if set locally. Note that it is not possible to set values @@ -105,8 +115,8 @@ HYPRE_Int HYPRE_IJMatrixInitialize(HYPRE_IJMatrix matrix); * a zero value. The actual value needs to be set on proc j. * * Note that a threaded version (threaded over the number of rows) - * will be called if - * HYPRE_IJMatrixSetOMPFlag is set to a value != 0. + * will be called if + * HYPRE_IJMatrixSetOMPFlag is set to a value != 0. * This requires that rows[i] != rows[j] for i!= j * and is only efficient if a large number of rows is set in one call * to HYPRE_IJMatrixSetValues. @@ -123,21 +133,21 @@ HYPRE_Int HYPRE_IJMatrixSetValues(HYPRE_IJMatrix matrix, /** * Sets all matrix coefficients of an already assembled matrix to - * {\tt value} + * \e value **/ HYPRE_Int HYPRE_IJMatrixSetConstantValues(HYPRE_IJMatrix matrix, HYPRE_Complex value); /** - * Adds to values for {\tt nrows} rows or partial rows of the matrix. - * Usage details are analogous to \Ref{HYPRE_IJMatrixSetValues}. - * Adds to any previous values at the specified locations, or, if - * there was no value there before, inserts a new one. + * Adds to values for \e nrows rows or partial rows of the matrix. + * Usage details are analogous to \ref HYPRE_IJMatrixSetValues. + * Adds to any previous values at the specified locations, or, if + * there was no value there before, inserts a new one. * AddToValues can be used to add to values on other processors. * * Note that a threaded version (threaded over the number of rows) - * will be called if - * HYPRE_IJMatrixSetOMPFlag is set to a value != 0. + * will be called if + * HYPRE_IJMatrixSetOMPFlag is set to a value != 0. * This requires that rows[i] != rows[j] for i!= j * and is only efficient if a large number of rows is added in one call * to HYPRE_IJMatrixAddToValues. @@ -153,10 +163,10 @@ HYPRE_Int HYPRE_IJMatrixAddToValues(HYPRE_IJMatrix matrix, const HYPRE_Complex *values); /** - * Sets values for {\tt nrows} rows or partial rows of the matrix. + * Sets values for \e nrows rows or partial rows of the matrix. * - * Same as IJMatrixSetValues, but with an additional {\tt row_indexes} array - * that provides indexes into the {\tt cols} and {\tt values} arrays. Because + * Same as IJMatrixSetValues, but with an additional \e row_indexes array + * that provides indexes into the \e cols and \e values arrays. Because * of this, there can be gaps between the row data in these latter two arrays. * **/ @@ -169,10 +179,10 @@ HYPRE_Int HYPRE_IJMatrixSetValues2(HYPRE_IJMatrix matrix, const HYPRE_Complex *values); /** - * Adds to values for {\tt nrows} rows or partial rows of the matrix. + * Adds to values for \e nrows rows or partial rows of the matrix. * - * Same as IJMatrixAddToValues, but with an additional {\tt row_indexes} array - * that provides indexes into the {\tt cols} and {\tt values} arrays. Because + * Same as IJMatrixAddToValues, but with an additional \e row_indexes array + * that provides indexes into the \e cols and \e values arrays. Because * of this, there can be gaps between the row data in these latter two arrays. * **/ @@ -190,8 +200,8 @@ HYPRE_Int HYPRE_IJMatrixAddToValues2(HYPRE_IJMatrix matrix, HYPRE_Int HYPRE_IJMatrixAssemble(HYPRE_IJMatrix matrix); /** - * Gets number of nonzeros elements for {\tt nrows} rows specified in {\tt rows} - * and returns them in {\tt ncols}, which needs to be allocated by the + * Gets number of nonzeros elements for \e nrows rows specified in \e rows + * and returns them in \e ncols, which needs to be allocated by the * user. **/ HYPRE_Int HYPRE_IJMatrixGetRowCounts(HYPRE_IJMatrix matrix, @@ -200,9 +210,9 @@ HYPRE_Int HYPRE_IJMatrixGetRowCounts(HYPRE_IJMatrix matrix, HYPRE_Int *ncols); /** - * Gets values for {\tt nrows} rows or partial rows of the matrix. + * Gets values for \e nrows rows or partial rows of the matrix. * Usage details are mostly - * analogous to \Ref{HYPRE_IJMatrixSetValues}. + * analogous to \ref HYPRE_IJMatrixSetValues. * Note that if nrows is negative, the routine will return * the column_indices and matrix coefficients of the * (-nrows) rows contained in rows. @@ -216,7 +226,7 @@ HYPRE_Int HYPRE_IJMatrixGetValues(HYPRE_IJMatrix matrix, /** * Set the storage type of the matrix object to be constructed. - * Currently, {\tt type} can only be {\tt HYPRE\_PARCSR}. + * Currently, \e type can only be \c HYPRE_PARCSR. * * Not collective, but must be the same on all processes. * @@ -251,7 +261,7 @@ HYPRE_Int HYPRE_IJMatrixGetObject(HYPRE_IJMatrix matrix, /** * (Optional) Set the max number of nonzeros to expect in each row. - * The array {\tt sizes} contains estimated sizes for each row on this + * The array \e sizes contains estimated sizes for each row on this * process. This call can significantly improve the efficiency of * matrix construction, and should always be utilized if possible. * @@ -265,7 +275,7 @@ HYPRE_Int HYPRE_IJMatrixSetRowSizes(HYPRE_IJMatrix matrix, * the diagonal and off-diagonal blocks. The diagonal block is the * submatrix whose column numbers correspond to rows owned by this * process, and the off-diagonal block is everything else. The arrays - * {\tt diag\_sizes} and {\tt offdiag\_sizes} contain estimated sizes + * \e diag_sizes and \e offdiag_sizes contain estimated sizes * for each row of the diagonal and off-diagonal blocks, respectively. * This routine can significantly improve the efficiency of matrix * construction, and should always be utilized if possible. @@ -296,17 +306,17 @@ HYPRE_Int HYPRE_IJMatrixSetPrintLevel(HYPRE_IJMatrix matrix, HYPRE_Int print_level); /** - * (Optional) if set, will use a threaded version of + * (Optional) if set, will use a threaded version of * HYPRE_IJMatrixSetValues and HYPRE_IJMatrixAddToValues. * This is only useful if a large number of rows is set or added to - * at once. + * at once. * - * NOTE that the values in the rows array of HYPRE_IJMatrixSetValues + * NOTE that the values in the rows array of HYPRE_IJMatrixSetValues * or HYPRE_IJMatrixAddToValues must be different from each other !!! - * - * This option is VERY inefficient if only a small number of rows - * is set or added at once and/or - * if reallocation of storage is required and/or + * + * This option is VERY inefficient if only a small number of rows + * is set or added at once and/or + * if reallocation of storage is required and/or * if values are added to off processor values. * **/ @@ -327,15 +337,16 @@ HYPRE_Int HYPRE_IJMatrixRead(const char *filename, HYPRE_Int HYPRE_IJMatrixPrint(HYPRE_IJMatrix matrix, const char *filename); -/*@}*/ +/**@}*/ /*-------------------------------------------------------------------------- *--------------------------------------------------------------------------*/ /** * @name IJ Vectors + * + * @{ **/ -/*@{*/ struct hypre_IJVector_struct; /** @@ -345,10 +356,10 @@ typedef struct hypre_IJVector_struct *HYPRE_IJVector; /** * Create a vector object. Each process owns some unique consecutive - * range of vector unknowns, indicated by the global indices {\tt - * jlower} and {\tt jupper}. The data is required to be such that the - * value of {\tt jlower} on any process $p$ be exactly one more than - * the value of {\tt jupper} on process $p-1$. Note that the first + * range of vector unknowns, indicated by the global indices \e + * jlower and \e jupper. The data is required to be such that the + * value of \e jlower on any process \f$p\f$ be exactly one more than + * the value of \e jupper on process \f$p-1\f$. Note that the first * index of the global vector may start with any integer value. In * particular, one may use zero- or one-based indexing. * @@ -377,6 +388,14 @@ HYPRE_Int HYPRE_IJVectorDestroy(HYPRE_IJVector vector); **/ HYPRE_Int HYPRE_IJVectorInitialize(HYPRE_IJVector vector); +/** + * Prepare a vector object for setting coefficient values. This + * routine will also re-initialize an already assembled vector, + * allowing users to modify coefficient values. This routine + * also specifies the memory location, i.e. host or device. + **/ +HYPRE_Int HYPRE_IJVectorInitialize_v2( HYPRE_IJVector vector, HYPRE_MemoryLocation memory_location ); + /** * (Optional) Sets the maximum number of elements that are expected to be set * (or added) on other processors from this processor @@ -389,8 +408,8 @@ HYPRE_Int HYPRE_IJVectorSetMaxOffProcElmts(HYPRE_IJVector vector, HYPRE_Int max_off_proc_elmts); /** - * Sets values in vector. The arrays {\tt values} and {\tt indices} - * are of dimension {\tt nvalues} and contain the vector values to be + * Sets values in vector. The arrays \e values and \e indices + * are of dimension \e nvalues and contain the vector values to be * set and the corresponding global vector indices, respectively. * Erases any previous values at the specified locations and replaces * them with new ones. Note that it is not possible to set values @@ -408,9 +427,9 @@ HYPRE_Int HYPRE_IJVectorSetValues(HYPRE_IJVector vector, /** * Adds to values in vector. Usage details are analogous to - * \Ref{HYPRE_IJVectorSetValues}. - * Adds to any previous values at the specified locations, or, if - * there was no value there before, inserts a new one. + * \ref HYPRE_IJVectorSetValues. + * Adds to any previous values at the specified locations, or, if + * there was no value there before, inserts a new one. * AddToValues can be used to add to values on other processors. * * Not collective. @@ -427,7 +446,7 @@ HYPRE_Int HYPRE_IJVectorAssemble(HYPRE_IJVector vector); /** * Gets values in vector. Usage details are analogous to - * \Ref{HYPRE_IJVectorSetValues}. + * \ref HYPRE_IJVectorSetValues. * * Not collective. **/ @@ -438,7 +457,7 @@ HYPRE_Int HYPRE_IJVectorGetValues(HYPRE_IJVector vector, /** * Set the storage type of the vector object to be constructed. - * Currently, {\tt type} can only be {\tt HYPRE\_PARCSR}. + * Currently, \e type can only be \c HYPRE_PARCSR. * * Not collective, but must be the same on all processes. * @@ -490,8 +509,8 @@ HYPRE_Int HYPRE_IJVectorRead(const char *filename, HYPRE_Int HYPRE_IJVectorPrint(HYPRE_IJVector vector, const char *filename); -/*@}*/ -/*@}*/ +/**@}*/ +/**@}*/ #ifdef __cplusplus } diff --git a/src/IJ_mv/IJMatrix_parcsr.c b/src/IJ_mv/IJMatrix_parcsr.c index e0eb7e25f..f24d660f9 100644 --- a/src/IJ_mv/IJMatrix_parcsr.c +++ b/src/IJ_mv/IJMatrix_parcsr.c @@ -36,18 +36,17 @@ hypre_IJMatrixCreateParCSR(hypre_IJMatrix *matrix) hypre_MPI_Comm_size(comm,&num_procs); -#ifdef HYPRE_NO_GLOBAL_PARTITION row_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); if (hypre_IJMatrixGlobalFirstRow(matrix)) { - for (i=0; i < 2; i++) + for (i = 0; i < 2; i++) { - row_starts[i] = row_partitioning[i]- hypre_IJMatrixGlobalFirstRow(matrix); + row_starts[i] = row_partitioning[i] - hypre_IJMatrixGlobalFirstRow(matrix); } } else { - for (i=0; i < 2; i++) + for (i = 0; i < 2; i++) { row_starts[i] = row_partitioning[i]; } @@ -58,14 +57,14 @@ hypre_IJMatrixCreateParCSR(hypre_IJMatrix *matrix) col_starts = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); if (hypre_IJMatrixGlobalFirstCol(matrix)) { - for (i=0; i < 2; i++) + for (i = 0; i < 2; i++) { col_starts[i] = col_partitioning[i]-hypre_IJMatrixGlobalFirstCol(matrix); } } else { - for (i=0; i < 2; i++) + for (i = 0; i < 2; i++) { col_starts[i] = col_partitioning[i]; } @@ -80,51 +79,6 @@ hypre_IJMatrixCreateParCSR(hypre_IJMatrix *matrix) hypre_IJMatrixGlobalNumCols(matrix), row_starts, col_starts, 0, 0, 0); -#else - row_starts = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); - if (row_partitioning[0]) - { - for (i=0; i < num_procs+1; i++) - { - row_starts[i] = row_partitioning[i]-row_partitioning[0]; - } - } - else - { - for (i=0; i < num_procs+1; i++) - { - row_starts[i] = row_partitioning[i]; - } - } - - if (row_partitioning != col_partitioning) - { - col_starts = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); - if (col_partitioning[0]) - { - for (i=0; i < num_procs+1; i++) - { - col_starts[i] = col_partitioning[i]-col_partitioning[0]; - } - } - else - { - for (i=0; i < num_procs+1; i++) - { - col_starts[i] = col_partitioning[i]; - } - } - } - else - { - col_starts = row_starts; - } - - par_matrix = hypre_ParCSRMatrixCreate(comm,row_starts[num_procs], - col_starts[num_procs], - row_starts, col_starts, 0, 0, 0); -#endif - hypre_IJMatrixObject(matrix) = par_matrix; return hypre_error_flag; @@ -140,27 +94,16 @@ HYPRE_Int hypre_IJMatrixSetRowSizesParCSR(hypre_IJMatrix *matrix, const HYPRE_Int *sizes) { - HYPRE_Int local_num_rows, local_num_cols; - HYPRE_Int i, my_id; - HYPRE_Int *row_space; + HYPRE_Int local_num_rows, local_num_cols, i, *row_space = NULL; HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); - hypre_AuxParCSRMatrix *aux_matrix; - MPI_Comm comm = hypre_IJMatrixComm(matrix); - - hypre_MPI_Comm_rank(comm,&my_id); -#ifdef HYPRE_NO_GLOBAL_PARTITION local_num_rows = (HYPRE_Int)(row_partitioning[1]-row_partitioning[0]); local_num_cols = (HYPRE_Int)(col_partitioning[1]-col_partitioning[0]); -#else - local_num_rows = (HYPRE_Int)(row_partitioning[my_id+1]-row_partitioning[my_id]); - local_num_cols = (HYPRE_Int)(col_partitioning[my_id+1]-col_partitioning[my_id]); -#endif - aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); - row_space = NULL; + hypre_AuxParCSRMatrix *aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); + if (aux_matrix) { - row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); + row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); } if (!row_space) { @@ -172,12 +115,19 @@ hypre_IJMatrixSetRowSizesParCSR(hypre_IJMatrix *matrix, } if (!aux_matrix) { - hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, - local_num_cols, row_space); + hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, local_num_cols, row_space); hypre_IJMatrixTranslator(matrix) = aux_matrix; } hypre_AuxParCSRMatrixRowSpace(aux_matrix) = row_space; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParCSRMatrixUsrOnProcElmts(aux_matrix) = 0; + for (i = 0; i < local_num_rows; i++) + { + hypre_AuxParCSRMatrixUsrOnProcElmts(aux_matrix) += sizes[i]; + } +#endif + return hypre_error_flag; } @@ -193,55 +143,70 @@ hypre_IJMatrixSetRowSizesParCSR(hypre_IJMatrix *matrix, HYPRE_Int hypre_IJMatrixSetDiagOffdSizesParCSR(hypre_IJMatrix *matrix, const HYPRE_Int *diag_sizes, - const HYPRE_Int *offdiag_sizes) + const HYPRE_Int *offd_sizes) { - HYPRE_Int local_num_rows; - HYPRE_Int i; - hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix *)hypre_IJMatrixObject(matrix); + HYPRE_Int local_num_rows, local_num_cols; + HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); + HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); + local_num_rows = (HYPRE_Int)(row_partitioning[1]-row_partitioning[0]); + local_num_cols = (HYPRE_Int)(col_partitioning[1]-col_partitioning[0]); hypre_AuxParCSRMatrix *aux_matrix = (hypre_AuxParCSRMatrix *)hypre_IJMatrixTranslator(matrix); - hypre_CSRMatrix *diag; - hypre_CSRMatrix *offd; - HYPRE_Int *diag_i; - HYPRE_Int *offd_i; - if (!par_matrix) + if (!aux_matrix) { - hypre_IJMatrixCreateParCSR(matrix); - par_matrix = (hypre_ParCSRMatrix *)hypre_IJMatrixObject(matrix); + hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, local_num_cols, NULL); + hypre_IJMatrixTranslator(matrix) = aux_matrix; } - diag = hypre_ParCSRMatrixDiag(par_matrix); - diag_i = hypre_CSRMatrixI(diag); - local_num_rows = hypre_CSRMatrixNumRows(diag); - if (!diag_i) + if ( hypre_AuxParCSRMatrixDiagSizes(aux_matrix) == NULL) { - diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1, hypre_CSRMatrixMemoryLocation(diag)); + hypre_AuxParCSRMatrixDiagSizes(aux_matrix) = hypre_TAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); } - for (i = 0; i < local_num_rows; i++) - { - diag_i[i+1] = diag_i[i] + diag_sizes[i]; - } - hypre_CSRMatrixI(diag) = diag_i; - hypre_CSRMatrixNumNonzeros(diag) = diag_i[local_num_rows]; - offd = hypre_ParCSRMatrixOffd(par_matrix); - offd_i = hypre_CSRMatrixI(offd); - if (!offd_i) - { - offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1, hypre_CSRMatrixMemoryLocation(offd)); - } - for (i = 0; i < local_num_rows; i++) + + if ( hypre_AuxParCSRMatrixOffdSizes(aux_matrix) == NULL) { - offd_i[i+1] = offd_i[i] + offdiag_sizes[i]; + hypre_AuxParCSRMatrixOffdSizes(aux_matrix) = hypre_TAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); } - hypre_CSRMatrixI(offd) = offd_i; - hypre_CSRMatrixNumNonzeros(offd) = offd_i[local_num_rows]; + + hypre_TMemcpy(hypre_AuxParCSRMatrixDiagSizes(aux_matrix), diag_sizes, HYPRE_Int, local_num_rows, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + hypre_TMemcpy(hypre_AuxParCSRMatrixOffdSizes(aux_matrix), offd_sizes, HYPRE_Int, local_num_rows, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + hypre_AuxParCSRMatrixNeedAux(aux_matrix) = 0; + + return hypre_error_flag; +} + +/****************************************************************************** + * + * hypre_IJMatrixSetMaxOnProcElmtsParCSR + * + *****************************************************************************/ + +HYPRE_Int +hypre_IJMatrixSetMaxOnProcElmtsParCSR(hypre_IJMatrix *matrix, + HYPRE_Int max_on_proc_elmts) +{ +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParCSRMatrix *aux_matrix; + HYPRE_Int local_num_rows, local_num_cols, my_id; + HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); + HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); + MPI_Comm comm = hypre_IJMatrixComm(matrix); + + hypre_MPI_Comm_rank(comm,&my_id); + aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); if (!aux_matrix) { - hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, - hypre_CSRMatrixNumCols(diag), NULL); + local_num_rows = (HYPRE_Int)(row_partitioning[1]-row_partitioning[0]); + local_num_cols = (HYPRE_Int)(col_partitioning[1]-col_partitioning[0]); + hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, local_num_cols, NULL); hypre_IJMatrixTranslator(matrix) = aux_matrix; } - hypre_AuxParCSRMatrixNeedAux(aux_matrix) = 0; + hypre_AuxParCSRMatrixUsrOnProcElmts(aux_matrix) = max_on_proc_elmts; +#endif return hypre_error_flag; } @@ -266,19 +231,18 @@ hypre_IJMatrixSetMaxOffProcElmtsParCSR(hypre_IJMatrix *matrix, aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); if (!aux_matrix) { -#ifdef HYPRE_NO_GLOBAL_PARTITION local_num_rows = (HYPRE_Int)(row_partitioning[1]-row_partitioning[0]); local_num_cols = (HYPRE_Int)(col_partitioning[1]-col_partitioning[0]); -#else - local_num_rows = (HYPRE_Int)(row_partitioning[my_id+1]-row_partitioning[my_id]); - local_num_cols = (HYPRE_Int)(col_partitioning[my_id+1]-col_partitioning[my_id]); -#endif hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, local_num_cols, NULL); hypre_IJMatrixTranslator(matrix) = aux_matrix; } hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix) = max_off_proc_elmts; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParCSRMatrixUsrOffProcElmts(aux_matrix) = max_off_proc_elmts; +#endif + return hypre_error_flag; } @@ -289,13 +253,20 @@ hypre_IJMatrixSetMaxOffProcElmtsParCSR(hypre_IJMatrix *matrix, * initializes AuxParCSRMatrix and ParCSRMatrix as necessary * *****************************************************************************/ - HYPRE_Int hypre_IJMatrixInitializeParCSR(hypre_IJMatrix *matrix) { - hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject(matrix); + return hypre_IJMatrixInitializeParCSR_v2(matrix, hypre_HandleMemoryLocation(hypre_handle())); +} + +HYPRE_Int +hypre_IJMatrixInitializeParCSR_v2(hypre_IJMatrix *matrix, HYPRE_MemoryLocation memory_location) +{ + hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject(matrix); hypre_AuxParCSRMatrix *aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); - HYPRE_Int local_num_rows; + + HYPRE_MemoryLocation memory_location_aux = + hypre_GetExecPolicy1(memory_location) == HYPRE_EXEC_HOST ? HYPRE_MEMORY_HOST : HYPRE_MEMORY_DEVICE; if (hypre_IJMatrixAssembleFlag(matrix) == 0) { @@ -304,49 +275,69 @@ hypre_IJMatrixInitializeParCSR(hypre_IJMatrix *matrix) hypre_IJMatrixCreateParCSR(matrix); par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject(matrix); } - local_num_rows = - hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(par_matrix)); + + HYPRE_Int local_num_rows = hypre_ParCSRMatrixNumRows(par_matrix); + HYPRE_Int i; + hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(par_matrix); + hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(par_matrix); + if (!aux_matrix) { - hypre_AuxParCSRMatrixCreate( - &aux_matrix, local_num_rows, - hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(par_matrix)), NULL); + hypre_AuxParCSRMatrixCreate(&aux_matrix, local_num_rows, hypre_ParCSRMatrixNumCols(par_matrix), NULL); hypre_IJMatrixTranslator(matrix) = aux_matrix; } - hypre_ParCSRMatrixInitialize(par_matrix); - hypre_AuxParCSRMatrixInitialize(aux_matrix); + hypre_ParCSRMatrixInitialize_v2(par_matrix, memory_location); + hypre_AuxParCSRMatrixInitialize_v2(aux_matrix, memory_location_aux); + + if (memory_location_aux == HYPRE_MEMORY_HOST) + { + if (hypre_AuxParCSRMatrixDiagSizes(aux_matrix)) + { + for (i = 0; i < local_num_rows; i++) + { + hypre_CSRMatrixI(diag)[i+1] = hypre_CSRMatrixI(diag)[i] + hypre_AuxParCSRMatrixDiagSizes(aux_matrix)[i]; + } + hypre_CSRMatrixNumNonzeros(diag) = hypre_CSRMatrixI(diag)[local_num_rows]; + hypre_CSRMatrixInitialize(diag); + } + + if (hypre_AuxParCSRMatrixOffdSizes(aux_matrix)) + { + for (i = 0; i < local_num_rows; i++) + { + hypre_CSRMatrixI(offd)[i+1] = hypre_CSRMatrixI(offd)[i] + hypre_AuxParCSRMatrixOffdSizes(aux_matrix)[i]; + } + hypre_CSRMatrixNumNonzeros(offd) = hypre_CSRMatrixI(offd)[local_num_rows]; + hypre_CSRMatrixInitialize(offd); + } + } + if (!hypre_AuxParCSRMatrixNeedAux(aux_matrix)) { - HYPRE_Int i, *indx_diag, *indx_offd, *diag_i, *offd_i; - diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); - offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); - indx_diag = hypre_AuxParCSRMatrixIndxDiag(aux_matrix); - indx_offd = hypre_AuxParCSRMatrixIndxOffd(aux_matrix); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif - for (i=0; i < local_num_rows; i++) + for (i = 0; i < local_num_rows; i++) { - indx_diag[i] = diag_i[i]; - indx_offd[i] = offd_i[i]; + hypre_AuxParCSRMatrixIndxDiag(aux_matrix)[i] = hypre_CSRMatrixI(diag)[i]; + hypre_AuxParCSRMatrixIndxOffd(aux_matrix)[i] = hypre_CSRMatrixI(offd)[i]; } } } - else /* AB 4/06 - the assemble routine destroys the aux matrix - so we need - to recreate if initialize is called again*/ + else if ( memory_location_aux == HYPRE_MEMORY_HOST ) { + /* AB 4/06 - the assemble routine destroys the aux matrix - so we need + to recreate if initialize is called again + */ if (!aux_matrix) { - local_num_rows = - hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(par_matrix)); - hypre_AuxParCSRMatrixCreate( - &aux_matrix, local_num_rows, - hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(par_matrix)), NULL); + hypre_AuxParCSRMatrixCreate(&aux_matrix, hypre_ParCSRMatrixNumRows(par_matrix), + hypre_ParCSRMatrixNumCols(par_matrix), NULL); + hypre_AuxParCSRMatrixMemoryLocation(aux_matrix) = HYPRE_MEMORY_HOST; hypre_AuxParCSRMatrixNeedAux(aux_matrix) = 0; hypre_IJMatrixTranslator(matrix) = aux_matrix; } - } return hypre_error_flag; @@ -382,11 +373,7 @@ HYPRE_Int hypre_IJMatrixGetRowCountsParCSR( hypre_IJMatrix *matrix, hypre_MPI_Comm_rank(comm,&my_id); -#ifdef HYPRE_NO_GLOBAL_PARTITION pstart = 0; -#else - pstart = my_id; -#endif #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i, row_index) HYPRE_SMP_SCHEDULE @@ -450,10 +437,6 @@ hypre_IJMatrixGetValuesParCSR( hypre_IJMatrix *matrix, HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); -#ifndef HYPRE_NO_GLOBAL_PARTITION - HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); -#endif - HYPRE_Int i, j, n, ii, indx, pstart; HYPRE_Int num_procs, my_id; HYPRE_BigInt col_0, col_n, row, col_indx, first; @@ -474,17 +457,10 @@ hypre_IJMatrixGetValuesParCSR( hypre_IJMatrix *matrix, } } -#ifdef HYPRE_NO_GLOBAL_PARTITION col_0 = col_starts[0]; col_n = col_starts[1]-1; first = hypre_IJMatrixGlobalFirstCol(matrix); pstart = 0; -#else - col_0 = col_starts[my_id]; - col_n = col_starts[my_id+1]-1; - first = col_partitioning[0]; - pstart = my_id; -#endif diag = hypre_ParCSRMatrixDiag(par_matrix); diag_i = hypre_CSRMatrixI(diag); @@ -689,17 +665,10 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix, row_partitioning = hypre_IJMatrixRowPartitioning(matrix); col_partitioning = hypre_IJMatrixColPartitioning(matrix); -#ifdef HYPRE_NO_GLOBAL_PARTITION col_0 = col_partitioning[0]; col_n = col_partitioning[1]-1; first = hypre_IJMatrixGlobalFirstCol(matrix); pstart = 0; -#else - col_0 = col_partitioning[my_id]; - col_n = col_partitioning[my_id+1]-1; - first = col_partitioning[0]; - pstart = my_id; -#endif if (nrows < 0) { hypre_error_in_arg(2); @@ -717,7 +686,7 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix, for (ii=0; ii < nrows; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -855,7 +824,7 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix, for (ii=0; ii < nrows; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -1060,40 +1029,50 @@ hypre_IJMatrixSetValuesParCSR( hypre_IJMatrix *matrix, * *****************************************************************************/ -HYPRE_Int -hypre_IJMatrixSetConstantValuesParCSR( hypre_IJMatrix *matrix, - HYPRE_Complex value ) +void +hypre_IJMatrixSetConstantValuesParCSRHost( hypre_IJMatrix *matrix, + HYPRE_Complex value ) { - hypre_ParCSRMatrix *par_matrix; - par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject( matrix ); + hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject( matrix ); + hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(par_matrix); + hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(par_matrix); + HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); + HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); + HYPRE_Int nnz_diag = hypre_CSRMatrixNumNonzeros(diag); + HYPRE_Int nnz_offd = hypre_CSRMatrixNumNonzeros(offd); + HYPRE_Int ii; - if (hypre_IJMatrixAssembleFlag(matrix)) /* matrix already assembled*/ +#ifdef HYPRE_USING_OPENMP +#pragma omp parallel for private(ii) HYPRE_SMP_SCHEDULE +#endif + for (ii = 0; ii < nnz_diag; ii++) { - hypre_CSRMatrix *diag, *offd; - HYPRE_Int *diag_i, *offd_i; - HYPRE_Complex *diag_data, *offd_data; - HYPRE_Int num_rows; - HYPRE_Int ii; - diag = hypre_ParCSRMatrixDiag(par_matrix); - offd = hypre_ParCSRMatrixOffd(par_matrix); - diag_i = hypre_CSRMatrixI(diag); - offd_i = hypre_CSRMatrixI(offd); - diag_data = hypre_CSRMatrixData(diag); - offd_data = hypre_CSRMatrixData(offd); - num_rows = hypre_CSRMatrixNumRows(diag); + diag_data[ii] = value; + } #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(ii) HYPRE_SMP_SCHEDULE #endif - for (ii=0; ii < diag_i[num_rows]; ii++) + for (ii = 0; ii < nnz_offd; ii++) + { + offd_data[ii] = value; + } +} + +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSR( hypre_IJMatrix *matrix, + HYPRE_Complex value ) +{ + if (hypre_IJMatrixAssembleFlag(matrix)) /* matrix already assembled*/ + { +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + if (hypre_GetExecPolicy1(hypre_IJMatrixMemoryLocation(matrix)) == HYPRE_EXEC_DEVICE) { - diag_data[ii] = value; + hypre_IJMatrixSetConstantValuesParCSRDevice(matrix, value); } -#ifdef HYPRE_USING_OPENMP -#pragma omp parallel for private(ii) HYPRE_SMP_SCHEDULE + else #endif - for (ii=0; ii < offd_i[num_rows]; ii++) { - offd_data[ii] = value; + hypre_IJMatrixSetConstantValuesParCSRHost(matrix, value); } } else @@ -1169,17 +1148,10 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject( matrix ); row_partitioning = hypre_IJMatrixRowPartitioning(matrix); col_partitioning = hypre_IJMatrixColPartitioning(matrix); -#ifdef HYPRE_NO_GLOBAL_PARTITION col_0 = col_partitioning[0]; col_n = col_partitioning[1]-1; first = hypre_IJMatrixGlobalFirstCol(matrix); pstart = 0; -#else - col_0 = col_partitioning[my_id]; - col_n = col_partitioning[my_id+1]-1; - first = col_partitioning[0]; - pstart = my_id; -#endif if (hypre_IJMatrixAssembleFlag(matrix)) { HYPRE_Int num_cols_offd; @@ -1192,7 +1164,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, for (ii=0; ii < nrows; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -1326,7 +1298,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, hypre_IJMatrixTranslator(matrix) = aux_matrix; } current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); @@ -1374,7 +1346,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, off_proc_data[current_num_elmts++] = values[indx++]; } hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix) = off_proc_i_indx; - hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix) + hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix) = current_num_elmts; } } @@ -1390,7 +1362,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, for (ii=0; ii < nrows; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -1584,7 +1556,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, else { current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); @@ -1628,7 +1600,7 @@ hypre_IJMatrixAddToValuesParCSR( hypre_IJMatrix *matrix, off_proc_data[current_num_elmts++] = values[indx++]; } hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix) = off_proc_i_indx; - hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix) + hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix) = current_num_elmts; } } @@ -1664,294 +1636,15 @@ hypre_IJMatrixDestroyParCSR(hypre_IJMatrix *matrix) * *****************************************************************************/ -#ifndef HYPRE_NO_GLOBAL_PARTITION - -HYPRE_Int -hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, - HYPRE_Int off_proc_i_indx, - HYPRE_Int max_off_proc_elmts, - HYPRE_Int current_num_elmts, - HYPRE_BigInt *off_proc_i, - HYPRE_BigInt *off_proc_j, - HYPRE_Complex *off_proc_data ) -{ - MPI_Comm comm = hypre_IJMatrixComm(matrix); - hypre_MPI_Request *requests = NULL; - hypre_MPI_Status *status = NULL; - HYPRE_Int i, ii, j, j2, jj, n, row_index = 0; - HYPRE_BigInt row; - HYPRE_Int iii, iid, indx, ip; - HYPRE_Int proc_id, num_procs, my_id; - HYPRE_Int num_sends, num_sends3; - HYPRE_Int num_recvs; - HYPRE_Int num_requests; - HYPRE_Int vec_start, vec_len; - HYPRE_Int *send_procs; - HYPRE_Int *chunks; - HYPRE_BigInt *send_i; - HYPRE_Int *send_map_starts; - HYPRE_Int *dbl_send_map_starts; - HYPRE_Int *recv_procs; - HYPRE_Int *recv_chunks; - HYPRE_BigInt *recv_i; - HYPRE_Int *recv_vec_starts; - HYPRE_Int *dbl_recv_vec_starts; - HYPRE_Int *info; - HYPRE_Int *int_buffer; - HYPRE_Int *proc_id_mem; - HYPRE_BigInt *partitioning; - HYPRE_Int *displs; - HYPRE_Int *recv_buf; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; - - hypre_MPI_Comm_size(comm,&num_procs); - hypre_MPI_Comm_rank(comm, &my_id); - partitioning = hypre_IJMatrixRowPartitioning(matrix); - - info = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); - chunks = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); - proc_id_mem = hypre_CTAlloc(HYPRE_Int, off_proc_i_indx/2, HYPRE_MEMORY_HOST); - j=0; - for (i=0; i < off_proc_i_indx; i++) - { - row = off_proc_i[i++]; - //if (row < 0) row = -row-1; - n = (HYPRE_Int)off_proc_i[i]; - proc_id = hypre_FindProc(partitioning,row,num_procs); - proc_id_mem[j++] = proc_id; - info[proc_id] += n; - chunks[proc_id]++; - } - - /* determine send_procs and amount of data to be sent */ - num_sends = 0; - for (i=0; i < num_procs; i++) - { - if (info[i]) - { - num_sends++; - } - } - send_procs = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); - send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST); - dbl_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST); - num_sends3 = 3*num_sends; - int_buffer = hypre_CTAlloc(HYPRE_Int, 3*num_sends, HYPRE_MEMORY_HOST); - j = 0; - j2 = 0; - send_map_starts[0] = 0; - dbl_send_map_starts[0] = 0; - for (i=0; i < num_procs; i++) - { - if (info[i]) - { - send_procs[j++] = i; - send_map_starts[j] = send_map_starts[j-1]+2*chunks[i]+info[i]; - dbl_send_map_starts[j] = dbl_send_map_starts[j-1]+info[i]; - int_buffer[j2++] = i; - int_buffer[j2++] = chunks[i]; - int_buffer[j2++] = info[i]; - } - } - - hypre_TFree(chunks, HYPRE_MEMORY_HOST); - - hypre_MPI_Allgather(&num_sends3,1,HYPRE_MPI_INT,info,1,HYPRE_MPI_INT,comm); - - displs = hypre_CTAlloc(HYPRE_Int, num_procs+1, HYPRE_MEMORY_HOST); - displs[0] = 0; - for (i=1; i < num_procs+1; i++) - { - displs[i] = displs[i-1]+info[i-1]; - } - recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs], HYPRE_MEMORY_HOST); - - hypre_MPI_Allgatherv(int_buffer,num_sends3,HYPRE_MPI_INT,recv_buf,info,displs, - HYPRE_MPI_INT,comm); - - hypre_TFree(int_buffer, HYPRE_MEMORY_HOST); - hypre_TFree(info, HYPRE_MEMORY_HOST); - - /* determine recv procs and amount of data to be received */ - num_recvs = 0; - for (j=0; j < displs[num_procs]; j+=3) - { - if (recv_buf[j] == my_id) - { - num_recvs++; - } - } - - recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - recv_chunks = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST); - dbl_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST); - - j2 = 0; - recv_vec_starts[0] = 0; - dbl_recv_vec_starts[0] = 0; - for (i=0; i < num_procs; i++) - { - for (j=displs[i]; j < displs[i+1]; j+=3) - { - if (recv_buf[j] == my_id) - { - recv_procs[j2] = i; - recv_chunks[j2++] = recv_buf[j+1]; - recv_vec_starts[j2] = recv_vec_starts[j2-1]+2*recv_buf[j+1] - +recv_buf[j+2]; - dbl_recv_vec_starts[j2] = dbl_recv_vec_starts[j2-1]+recv_buf[j+2]; - } - if (j2 == num_recvs) - { - break; - } - } - } - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - hypre_TFree(displs, HYPRE_MEMORY_HOST); - - /* set up data to be sent to send procs */ - /* send_i contains for each send proc : row no., no. of elmts and column - indices, send_data contains corresponding values */ - - send_i = hypre_CTAlloc(HYPRE_BigInt, send_map_starts[num_sends], HYPRE_MEMORY_HOST); - send_data = hypre_CTAlloc(HYPRE_Complex, dbl_send_map_starts[num_sends], HYPRE_MEMORY_HOST); - recv_i = hypre_CTAlloc(HYPRE_BigInt, recv_vec_starts[num_recvs], HYPRE_MEMORY_HOST); - recv_data = hypre_CTAlloc(HYPRE_Complex, dbl_recv_vec_starts[num_recvs], HYPRE_MEMORY_HOST); - - j=0; - jj=0; - for (i=0; i < off_proc_i_indx; i++) - { - row = off_proc_i[i++]; - n = (HYPRE_Int)off_proc_i[i]; - proc_id = proc_id_mem[i/2]; - indx = hypre_BinarySearch(send_procs,proc_id,num_sends); - iii = send_map_starts[indx]; - iid = dbl_send_map_starts[indx]; - send_i[iii++] = row; - send_i[iii++] = (HYPRE_BigInt) n; - for (ii = 0; ii < n; ii++) - { - send_i[iii++] = off_proc_j[jj]; - send_data[iid++] = off_proc_data[jj++]; - } - send_map_starts[indx] = iii; - dbl_send_map_starts[indx] = iid; - } - - hypre_TFree(proc_id_mem, HYPRE_MEMORY_HOST); - - for (i=num_sends; i > 0; i--) - { - send_map_starts[i] = send_map_starts[i-1]; - dbl_send_map_starts[i] = dbl_send_map_starts[i-1]; - } - send_map_starts[0] = 0; - dbl_send_map_starts[0] = 0; - - num_requests = num_recvs+num_sends; - - if (num_requests) - { - requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - status = hypre_CTAlloc(hypre_MPI_Status, num_requests, HYPRE_MEMORY_HOST); - } - - j=0; - for (i=0; i < num_recvs; i++) - { - vec_start = recv_vec_starts[i]; - vec_len = recv_vec_starts[i+1] - vec_start; - ip = recv_procs[i]; - hypre_MPI_Irecv(&recv_i[vec_start], vec_len, HYPRE_MPI_BIG_INT, ip, 0, comm, - &requests[j++]); - } - - for (i=0; i < num_sends; i++) - { - vec_start = send_map_starts[i]; - vec_len = send_map_starts[i+1] - vec_start; - ip = send_procs[i]; - hypre_MPI_Isend(&send_i[vec_start], vec_len, HYPRE_MPI_BIG_INT, ip, 0, comm, - &requests[j++]); - } - - if (num_requests) - { - hypre_MPI_Waitall(num_requests, requests, status); - } - - j=0; - for (i=0; i < num_recvs; i++) - { - vec_start = dbl_recv_vec_starts[i]; - vec_len = dbl_recv_vec_starts[i+1] - vec_start; - ip = recv_procs[i]; - hypre_MPI_Irecv(&recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - - for (i=0; i < num_sends; i++) - { - vec_start = dbl_send_map_starts[i]; - vec_len = dbl_send_map_starts[i+1] - vec_start; - ip = send_procs[i]; - hypre_MPI_Isend(&send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - - if (num_requests) - { - hypre_MPI_Waitall(num_requests, requests, status); - hypre_TFree(requests, HYPRE_MEMORY_HOST); - hypre_TFree(status, HYPRE_MEMORY_HOST); - } - - hypre_TFree(send_i, HYPRE_MEMORY_HOST); - hypre_TFree(send_data, HYPRE_MEMORY_HOST); - hypre_TFree(send_procs, HYPRE_MEMORY_HOST); - hypre_TFree(send_map_starts, HYPRE_MEMORY_HOST); - hypre_TFree(dbl_send_map_starts, HYPRE_MEMORY_HOST); - hypre_TFree(recv_procs, HYPRE_MEMORY_HOST); - hypre_TFree(recv_vec_starts, HYPRE_MEMORY_HOST); - hypre_TFree(dbl_recv_vec_starts, HYPRE_MEMORY_HOST); - - j = 0; - j2 = 0; - for (i=0; i < num_recvs; i++) - { - for (ii=0; ii < recv_chunks[i]; ii++) - { - row = recv_i[j]; - HYPRE_Int rcvi = (HYPRE_Int) recv_i[j+1]; - hypre_IJMatrixAddToValuesParCSR(matrix,1,&rcvi,&row,&row_index, - &recv_i[j+2],&recv_data[j2]); - j2 += recv_i[j+1]; - j += recv_i[j+1]+2; - } - } - hypre_TFree(recv_chunks, HYPRE_MEMORY_HOST); - hypre_TFree(recv_i, HYPRE_MEMORY_HOST); - hypre_TFree(recv_data, HYPRE_MEMORY_HOST); - - return hypre_error_flag; -} - -#else - -/* assumed partition version */ - HYPRE_Int -hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, - HYPRE_Int off_proc_i_indx, - HYPRE_Int max_off_proc_elmts, - HYPRE_Int current_num_elmts, - HYPRE_BigInt *off_proc_i, - HYPRE_BigInt *off_proc_j, - HYPRE_Complex *off_proc_data ) +hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, + HYPRE_Int off_proc_i_indx, + HYPRE_Int max_off_proc_elmts, + HYPRE_Int current_num_elmts, + HYPRE_MemoryLocation memory_location, + HYPRE_BigInt *off_proc_i, + HYPRE_BigInt *off_proc_j, + HYPRE_Complex *off_proc_data ) { MPI_Comm comm = hypre_IJMatrixComm(matrix); @@ -2014,6 +1707,43 @@ hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, global_first_col = hypre_IJMatrixGlobalFirstCol(matrix); global_first_row = hypre_IJMatrixGlobalFirstRow(matrix); + if (memory_location == HYPRE_MEMORY_DEVICE) + { + HYPRE_BigInt *tmp = hypre_TAlloc(HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST); + HYPRE_BigInt *off_proc_i_h = hypre_TAlloc(HYPRE_BigInt, 2*current_num_elmts, HYPRE_MEMORY_HOST); + HYPRE_BigInt *off_proc_j_h = hypre_TAlloc(HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST); + HYPRE_Complex *off_proc_data_h = hypre_TAlloc(HYPRE_Complex, current_num_elmts, HYPRE_MEMORY_HOST); + + hypre_TMemcpy(tmp, off_proc_i, HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(off_proc_j_h, off_proc_j, HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(off_proc_data_h, off_proc_data, HYPRE_Complex, current_num_elmts, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + + for (i = 0; i < current_num_elmts; i++) + { + off_proc_i_h[2*i] = tmp[i]; + off_proc_i_h[2*i+1] = 1; + } + + off_proc_i_indx = current_num_elmts * 2; + + off_proc_i = off_proc_i_h; + off_proc_j = off_proc_j_h; + off_proc_data = off_proc_data_h; + + hypre_TFree(tmp, HYPRE_MEMORY_HOST); + } + + /* call hypre_IJMatrixAddToValuesParCSR directly inside this function + * with one chunk of data */ + HYPRE_Int off_proc_nelm_recv_cur = 0; + HYPRE_Int off_proc_nelm_recv_max = 0; + HYPRE_BigInt *off_proc_i_recv = NULL; + HYPRE_BigInt *off_proc_j_recv = NULL; + HYPRE_Complex *off_proc_data_recv = NULL; + HYPRE_BigInt *off_proc_i_recv_d = NULL; + HYPRE_BigInt *off_proc_j_recv_d = NULL; + HYPRE_Complex *off_proc_data_recv_d = NULL; + num_rows = off_proc_i_indx/2; /* verify that we have created the assumed partition */ @@ -2421,8 +2151,7 @@ hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, if (big_int_size == obj_size_bytes) { col_ptr = (HYPRE_BigInt *) recv_data_ptr; - recv_data_ptr = - (void *) ((char *)recv_data_ptr + num_elements*obj_size_bytes); + recv_data_ptr = (void *) ((char *)recv_data_ptr + num_elements*obj_size_bytes); } else /* copy data */ { @@ -2442,8 +2171,7 @@ hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, if (complex_size == obj_size_bytes) { col_data_ptr = (HYPRE_Complex *) recv_data_ptr; - recv_data_ptr = - (void *) ((char *)recv_data_ptr + num_elements*obj_size_bytes); + recv_data_ptr = (void *) ((char *)recv_data_ptr + num_elements*obj_size_bytes); } else /* copy data */ { @@ -2461,11 +2189,58 @@ hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, } - hypre_IJMatrixAddToValuesParCSR(matrix,1,&num_elements,&row,&row_index, - col_ptr,col_data_ptr); + if (memory_location == HYPRE_MEMORY_HOST) + { + hypre_IJMatrixAddToValuesParCSR(matrix, 1, &num_elements, &row, &row_index, col_ptr, col_data_ptr); + } + else + { + HYPRE_Int nelm_new = off_proc_nelm_recv_cur + num_elements; + + if (nelm_new > off_proc_nelm_recv_max) + { + off_proc_nelm_recv_max = nelm_new * 2; + off_proc_i_recv = hypre_TReAlloc(off_proc_i_recv, HYPRE_BigInt, off_proc_nelm_recv_max, HYPRE_MEMORY_HOST); + off_proc_j_recv = hypre_TReAlloc(off_proc_j_recv, HYPRE_BigInt, off_proc_nelm_recv_max, HYPRE_MEMORY_HOST); + off_proc_data_recv = hypre_TReAlloc(off_proc_data_recv, HYPRE_Complex, off_proc_nelm_recv_max, HYPRE_MEMORY_HOST); + } + + HYPRE_Int i; + for (i = 0; i < num_elements; i++) + { + off_proc_i_recv[off_proc_nelm_recv_cur + i] = row; + } + hypre_TMemcpy(off_proc_j_recv + off_proc_nelm_recv_cur, col_ptr, HYPRE_BigInt, num_elements, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + hypre_TMemcpy(off_proc_data_recv + off_proc_nelm_recv_cur, col_data_ptr, HYPRE_Complex, num_elements, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + + off_proc_nelm_recv_cur = nelm_new; + } + indx += (num_elements*2); } } + + if (memory_location == HYPRE_MEMORY_DEVICE) + { + off_proc_i_recv_d = hypre_TAlloc(HYPRE_BigInt, off_proc_nelm_recv_cur, HYPRE_MEMORY_DEVICE); + off_proc_j_recv_d = hypre_TAlloc(HYPRE_BigInt, off_proc_nelm_recv_cur, HYPRE_MEMORY_DEVICE); + off_proc_data_recv_d = hypre_TAlloc(HYPRE_Complex, off_proc_nelm_recv_cur, HYPRE_MEMORY_DEVICE); + + hypre_TMemcpy(off_proc_i_recv_d, off_proc_i_recv, HYPRE_BigInt, off_proc_nelm_recv_cur, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); + hypre_TMemcpy(off_proc_j_recv_d, off_proc_j_recv, HYPRE_BigInt, off_proc_nelm_recv_cur, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); + hypre_TMemcpy(off_proc_data_recv_d, off_proc_data_recv, HYPRE_Complex, off_proc_nelm_recv_cur, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_IJMatrixSetAddValuesParCSRDevice(matrix, off_proc_nelm_recv_cur, NULL, off_proc_i_recv_d, NULL, off_proc_j_recv_d, + off_proc_data_recv_d, "add"); +#endif + } + hypre_TFree(send_proc_obj.v_elements, HYPRE_MEMORY_HOST); hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST); hypre_TFree(send_proc_obj.id, HYPRE_MEMORY_HOST); @@ -2475,16 +2250,29 @@ hypre_IJMatrixAssembleOffProcValsParCSR( hypre_IJMatrix *matrix, { hypre_TFree(big_int_data, HYPRE_MEMORY_HOST); } + if (complex_data) { hypre_TFree(complex_data, HYPRE_MEMORY_HOST); } - return hypre_error_flag; -} + if (memory_location == HYPRE_MEMORY_DEVICE) + { + hypre_TFree(off_proc_i, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_j, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_data, HYPRE_MEMORY_HOST); + } -#endif + hypre_TFree(off_proc_i_recv, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_j_recv, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_data_recv, HYPRE_MEMORY_HOST); + + hypre_TFree(off_proc_i_recv_d, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_j_recv_d, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_data_recv_d, HYPRE_MEMORY_DEVICE); + return hypre_error_flag; +} /*-------------------------------------------------------------------- * hypre_FillResponseIJOffProcVals @@ -2620,22 +2408,20 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) HYPRE_Int num_cols_offd; HYPRE_Int *diag_pos; HYPRE_BigInt *col_map_offd; + HYPRE_Int *rownnz; HYPRE_Int *row_length; HYPRE_BigInt **aux_j; HYPRE_Complex **aux_data; HYPRE_Int my_id, num_procs; HYPRE_Int num_rows; + HYPRE_Int num_rownnz; HYPRE_Int i_diag, i_offd; HYPRE_BigInt col_0, col_n; HYPRE_Int nnz_offd; HYPRE_BigInt *big_offd_j; HYPRE_BigInt *tmp_j; HYPRE_Complex temp; -#ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_BigInt base = hypre_IJMatrixGlobalFirstCol(matrix); -#else - HYPRE_BigInt base = col_partitioning[0]; -#endif HYPRE_Int off_proc_i_indx; HYPRE_Int max_off_proc_elmts; HYPRE_Int current_num_elmts; @@ -2652,6 +2438,8 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) HYPRE_Int max_num_threads; HYPRE_Int aux_flag, aux_flag_global; + HYPRE_ANNOTATE_FUNC_BEGIN; + max_num_threads = hypre_NumThreads(); /* first find out if anyone has an aux_matrix, and create one if you don't @@ -2678,7 +2466,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) /*cancel_indx = hypre_AuxParCSRMatrixCancelIndx(aux_matrix); if (cancel_indx) { - current_num_elmts=hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + current_num_elmts=hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i=hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j=hypre_AuxParCSRMatrixOffProcJ(aux_matrix); off_proc_data=hypre_AuxParCSRMatrixOffProcData(aux_matrix); @@ -2716,7 +2504,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) } } hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix) = new_off_proc_i_indx; - hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix) = current_num_elmts; + hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix) = current_num_elmts; }*/ off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); hypre_MPI_Allreduce(&off_proc_i_indx, &offd_proc_elmts, 1, HYPRE_MPI_INT, @@ -2724,12 +2512,13 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) if (offd_proc_elmts) { max_off_proc_elmts=hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); - current_num_elmts=hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + current_num_elmts=hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i=hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j=hypre_AuxParCSRMatrixOffProcJ(aux_matrix); off_proc_data=hypre_AuxParCSRMatrixOffProcData(aux_matrix); hypre_IJMatrixAssembleOffProcValsParCSR( matrix,off_proc_i_indx, max_off_proc_elmts, current_num_elmts, + HYPRE_MEMORY_HOST, off_proc_i, off_proc_j, off_proc_data); } } @@ -2738,40 +2527,42 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) { hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); -#ifdef HYPRE_NO_GLOBAL_PARTITION num_rows = (HYPRE_Int)(row_partitioning[1] - row_partitioning[0]); col_0 = col_partitioning[0]; col_n = col_partitioning[1]-1; -#else - num_rows = (HYPRE_Int)(row_partitioning[my_id+1] - row_partitioning[my_id]); - col_0 = col_partitioning[my_id]; - col_n = col_partitioning[my_id+1]-1; -#endif /* move data into ParCSRMatrix if not there already */ if (hypre_AuxParCSRMatrixNeedAux(aux_matrix)) { - HYPRE_Int *diag_array, *offd_array; - diag_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST); - offd_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST); + HYPRE_Int *diag_array; + HYPRE_Int *offd_array; + + /* Update nonzero rows of aux_matrix */ + hypre_AuxParCSRMatrixSetRownnz(aux_matrix); + aux_j = hypre_AuxParCSRMatrixAuxJ(aux_matrix); aux_data = hypre_AuxParCSRMatrixAuxData(aux_matrix); row_length = hypre_AuxParCSRMatrixRowLength(aux_matrix); - diag_pos = hypre_CTAlloc(HYPRE_Int, num_rows, HYPRE_MEMORY_HOST); - i_diag = 0; - i_offd = 0; + num_rownnz = hypre_AuxParCSRMatrixLocalNumRownnz(aux_matrix); + rownnz = hypre_AuxParCSRMatrixRownnz(aux_matrix); + + diag_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST); + offd_array = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST); + diag_pos = hypre_TAlloc(HYPRE_Int, num_rownnz, HYPRE_MEMORY_HOST); + + i_diag = i_offd = 0; #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i, j, i_diag, i_offd) #endif { HYPRE_BigInt *local_j; HYPRE_Complex *local_data; - HYPRE_Int rest, size, ns, ne; + HYPRE_Int ii, rest, size, ns, ne; HYPRE_Int num_threads, my_thread_num; num_threads = hypre_NumActiveThreads(); my_thread_num = hypre_GetThreadNum(); - size = num_rows/num_threads; - rest = num_rows - size*num_threads; + size = num_rownnz/num_threads; + rest = num_rownnz - size*num_threads; if (my_thread_num < rest) { @@ -2784,14 +2575,14 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) ne = (my_thread_num+1)*size + rest; } - i_diag = 0; - i_offd = 0; - for (i=ns; i < ne; i++) + i_diag = i_offd = 0; + for (i = ns; i < ne; i++) { - local_j = aux_j[i]; - local_data = aux_data[i]; + ii = rownnz ? rownnz[i] : i; + local_j = aux_j[ii]; + local_data = aux_data[ii]; diag_pos[i] = -1; - for (j=0; j < row_length[i]; j++) + for (j = 0; j < row_length[ii]; j++) { if (local_j[j] < col_0 || local_j[j] > col_n) { @@ -2800,7 +2591,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) else { i_diag++; - if ((HYPRE_Int)(local_j[j]-col_0) == i) + if ((HYPRE_Int)(local_j[j] - col_0) == i) { diag_pos[i] = j; } @@ -2851,18 +2642,20 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) i_diag = 0; i_offd = 0; } - for (i=ns; i < ne; i++) + + for (i = ns; i < ne; i++) { - diag_i[i] = i_diag; - offd_i[i] = i_offd; - local_j = aux_j[i]; - local_data = aux_data[i]; + ii = rownnz ? rownnz[i] : i; + diag_i[ii] = i_diag; + offd_i[ii] = i_offd; + local_j = aux_j[ii]; + local_data = aux_data[ii]; if (diag_pos[i] > -1) { diag_j[i_diag] = (HYPRE_Int)(local_j[diag_pos[i]] - col_0); diag_data[i_diag++] = local_data[diag_pos[i]]; } - for (j=0; j < row_length[i]; j++) + for (j = 0; j < row_length[ii]; j++) { if (local_j[j] < col_0 || local_j[j] > col_n) { @@ -2876,6 +2669,39 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) } } } + + /* Correct diag_i and offd_i */ + if (rownnz != NULL) + { +#ifdef HYPRE_USING_OPENMP +#pragma omp barrier +#endif + for (i = ns; i < (ne-1); i++) + { + for (ii = rownnz[i] + 1; ii < rownnz[i+1]; ii++) + { + diag_i[ii] = diag_i[rownnz[i+1]]; + offd_i[ii] = offd_i[rownnz[i+1]]; + } + } + + if (my_thread_num < (num_threads - 1)) + { + for (ii = rownnz[ne-1] + 1; ii < rownnz[ne]; ii++) + { + diag_i[ii] = diag_i[rownnz[ne]]; + offd_i[ii] = offd_i[rownnz[ne]]; + } + } + else + { + for (ii = rownnz[ne-1] + 1; ii < num_rows; ii++) + { + diag_i[ii] = diag_i[num_rows]; + offd_i[ii] = offd_i[num_rows]; + } + } + } } /* end parallel region */ hypre_TFree(diag_array, HYPRE_MEMORY_HOST); @@ -2905,7 +2731,7 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) for (i = 0; i < num_rows; i++) { j0 = diag_i[i]; - for (j=j0; j < diag_i[i+1]; j++) + for (j = j0; j < diag_i[i+1]; j++) { if (diag_j[j] == i) { @@ -2935,14 +2761,14 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) nnz_offd = offd_i[num_rows]; if (nnz_offd) { - tmp_j = hypre_CTAlloc(HYPRE_BigInt, nnz_offd, HYPRE_MEMORY_HOST); - for (i=0; i < nnz_offd; i++) + tmp_j = hypre_CTAlloc(HYPRE_BigInt, nnz_offd, HYPRE_MEMORY_HOST); + for (i = 0; i < nnz_offd; i++) { tmp_j[i] = big_offd_j[i]; } hypre_BigQsort0(tmp_j,0,nnz_offd-1); num_cols_offd = 1; - for (i=0; i < nnz_offd-1; i++) + for (i = 0; i < nnz_offd-1; i++) { if (tmp_j[i+1] > tmp_j[i]) { @@ -2950,21 +2776,21 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) } } col_map_offd = hypre_CTAlloc(HYPRE_BigInt, num_cols_offd, HYPRE_MEMORY_HOST); - for (i=0; i < num_cols_offd; i++) + for (i = 0; i < num_cols_offd; i++) { col_map_offd[i] = tmp_j[i]; } #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) #endif - for (i=0; i < nnz_offd; i++) + for (i = 0; i < nnz_offd; i++) { - offd_j[i]=hypre_BigBinarySearch(col_map_offd,big_offd_j[i],num_cols_offd); + offd_j[i] = hypre_BigBinarySearch(col_map_offd,big_offd_j[i],num_cols_offd); } if (base) { - for (i=0; i < num_cols_offd; i++) + for (i = 0; i < num_cols_offd; i++) { col_map_offd[i] -= base; } @@ -2981,6 +2807,8 @@ hypre_IJMatrixAssembleParCSR(hypre_IJMatrix *matrix) hypre_AuxParCSRMatrixDestroy(aux_matrix); hypre_IJMatrixTranslator(matrix) = NULL; + HYPRE_ANNOTATE_FUNC_END; + return hypre_error_flag; } @@ -3057,17 +2885,10 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix, //offproc_cnt = hypre_CTAlloc(HYPRE_Int, max_num_threads, HYPRE_MEMORY_HOST); -#ifdef HYPRE_NO_GLOBAL_PARTITION col_0 = col_partitioning[0]; col_n = col_partitioning[1]-1; first = hypre_IJMatrixGlobalFirstCol(matrix); pstart = 0; -#else - col_0 = col_partitioning[my_id]; - col_n = col_partitioning[my_id+1]-1; - first = col_partitioning[0]; - pstart = my_id; -#endif if (nrows < 0) { hypre_error_in_arg(2); @@ -3100,7 +2921,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix, /*if (aux_matrix) { current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); off_proc_i = hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j = hypre_AuxParCSRMatrixOffProcJ(aux_matrix); @@ -3142,7 +2963,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix, for (ii=ns; ii < ne; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -3329,7 +3150,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix, /*if (aux_matrix) { current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); off_proc_i = hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j = hypre_AuxParCSRMatrixOffProcJ(aux_matrix); @@ -3401,7 +3222,7 @@ hypre_IJMatrixSetValuesOMPParCSR( hypre_IJMatrix *matrix, for (ii=ns; ii < ne; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -3703,17 +3524,10 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, for (i1=0; i1 < max_num_threads; i1++) offproc_cnt[i1] = NULL; -#ifdef HYPRE_NO_GLOBAL_PARTITION col_0 = col_partitioning[0]; col_n = col_partitioning[1]-1; first = hypre_IJMatrixGlobalFirstCol(matrix); pstart = 0; -#else - col_0 = col_partitioning[my_id]; - col_n = col_partitioning[my_id+1]-1; - first = col_partitioning[0]; - pstart = my_id; -#endif if (hypre_IJMatrixAssembleFlag(matrix)) /* matrix already assembled */ { HYPRE_Int num_cols_offd; @@ -3736,7 +3550,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, if (aux_matrix) { current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); off_proc_i = hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j = hypre_AuxParCSRMatrixOffProcJ(aux_matrix); @@ -3776,7 +3590,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, for (ii=ns; ii < ne; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -3948,7 +3762,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, if (aux_matrix) { current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); off_proc_i = hypre_AuxParCSRMatrixOffProcI(aux_matrix); off_proc_j = hypre_AuxParCSRMatrixOffProcJ(aux_matrix); @@ -4019,7 +3833,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, for (ii=ns; ii < ne; ii++) { row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; @@ -4247,14 +4061,14 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, { ii = my_offproc_cnt[i2]; row = rows[ii]; - n = ncols[ii]; + n = ncols ? ncols[ii] : 1; if (n == 0) /* empty row */ { continue; } indx = my_offproc_cnt[i2+1]; current_num_elmts - = hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix); + = hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix); max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(aux_matrix); off_proc_i_indx = hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix); @@ -4298,7 +4112,7 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, off_proc_data[current_num_elmts++] = values[indx++]; } hypre_AuxParCSRMatrixOffProcIIndx(aux_matrix) = off_proc_i_indx; - hypre_AuxParCSRMatrixCurrentNumElmts(aux_matrix) = current_num_elmts; + hypre_AuxParCSRMatrixCurrentOffProcElmts(aux_matrix) = current_num_elmts; } hypre_TFree(offproc_cnt[i1], HYPRE_MEMORY_HOST); } @@ -4307,4 +4121,3 @@ hypre_IJMatrixAddToValuesOMPParCSR( hypre_IJMatrix *matrix, return hypre_error_flag; } - diff --git a/src/IJ_mv/IJMatrix_parcsr_device.c b/src/IJ_mv/IJMatrix_parcsr_device.c new file mode 100644 index 000000000..7253244a8 --- /dev/null +++ b/src/IJ_mv/IJMatrix_parcsr_device.c @@ -0,0 +1,744 @@ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** + * + * IJMatrix_ParCSR interface + * + *****************************************************************************/ + +#include "_hypre_IJ_mv.h" +#include "_hypre_utilities.hpp" + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + +__global__ void +hypreCUDAKernel_IJMatrixValues_dev1(HYPRE_Int n, HYPRE_Int *rowind, HYPRE_Int *row_ptr, HYPRE_Int *row_len, HYPRE_Int *mark) +{ + HYPRE_Int global_thread_id = hypre_cuda_get_grid_thread_id<1,1>(); + + if (global_thread_id < n) + { + HYPRE_Int row = rowind[global_thread_id]; + if (global_thread_id < read_only_load(&row_ptr[row]) + read_only_load(&row_len[row])) + { + mark[global_thread_id] = 0; + } + else + { + mark[global_thread_id] = -1; + } + } +} + +/* E.g. nrows = 3 + * ncols = 2 3 4 + * rows = 10 20 30 + * rows_indexes = 0 4 9 + * (0 1 2 3 | 4 5 6 7 8 | 9 10 11 12 13) + * cols = x x ! ! | * * * ! ! | + + + + ! + * values = . . ! ! | . . . ! ! | . . . . ! + */ + +HYPRE_Int +hypre_IJMatrixSetAddValuesParCSRDevice( hypre_IJMatrix *matrix, + HYPRE_Int nrows, + HYPRE_Int *ncols, /* if NULL, == all ones */ + const HYPRE_BigInt *rows, + const HYPRE_Int *row_indexes, /* if NULL, == ex_scan of ncols, i.e, no gap */ + const HYPRE_BigInt *cols, + const HYPRE_Complex *values, + const char *action ) +{ + HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); + HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); + HYPRE_BigInt row_start = row_partitioning[0]; + HYPRE_BigInt row_end = row_partitioning[1]; + HYPRE_BigInt col_start = col_partitioning[0]; + HYPRE_BigInt col_end = col_partitioning[1]; + HYPRE_Int num_local_rows = row_end - row_start; + HYPRE_Int num_local_cols = col_end - col_start; + const char SorA = action[0] == 's' ? 1 : 0; + + hypre_AuxParCSRMatrix *aux_matrix = (hypre_AuxParCSRMatrix *) hypre_IJMatrixTranslator(matrix); + + HYPRE_Int nelms; + HYPRE_Int *row_ptr = NULL; + + /* expand rows into full expansion of rows based on ncols + * if ncols == NULL, ncols is all ones, so rows are indeed full expansion */ + if (ncols) + { + row_ptr = hypre_TAlloc(HYPRE_Int, nrows + 1, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(row_ptr, ncols, HYPRE_Int, nrows, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + /* RL: have to init the last entry; cuda-memcheck --tool initcheck complains otherwise + * but why? exclusive scan does not need it */ + /* hypre_Memset(row_ptr + nrows, 0, sizeof(HYPRE_Int), HYPRE_MEMORY_DEVICE); */ + hypreDevice_IntegerExclusiveScan(nrows + 1, row_ptr); + hypre_TMemcpy(&nelms, row_ptr+nrows, HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + } + else + { + nelms = nrows; + } + + if (nelms <= 0) + { + return hypre_error_flag; + } + + if (!aux_matrix) + { + hypre_AuxParCSRMatrixCreate(&aux_matrix, num_local_rows, num_local_cols, NULL); + hypre_AuxParCSRMatrixInitialize_v2(aux_matrix, HYPRE_MEMORY_DEVICE); + hypre_IJMatrixTranslator(matrix) = aux_matrix; + } + + HYPRE_Int stack_elmts_max = hypre_AuxParCSRMatrixMaxStackElmts(aux_matrix); + HYPRE_Int stack_elmts_current = hypre_AuxParCSRMatrixCurrentStackElmts(aux_matrix); + HYPRE_Int stack_elmts_required = stack_elmts_current + nelms; + HYPRE_BigInt *stack_i = hypre_AuxParCSRMatrixStackI(aux_matrix); + HYPRE_BigInt *stack_j = hypre_AuxParCSRMatrixStackJ(aux_matrix); + HYPRE_Complex *stack_data = hypre_AuxParCSRMatrixStackData(aux_matrix); + char *stack_sora = hypre_AuxParCSRMatrixStackSorA(aux_matrix); + + if ( stack_elmts_max < stack_elmts_required ) + { + HYPRE_Int stack_elmts_max_new = hypre_max(hypre_AuxParCSRMatrixUsrOnProcElmts (aux_matrix), 0) + + hypre_max(hypre_AuxParCSRMatrixUsrOffProcElmts(aux_matrix), 0); + if ( hypre_AuxParCSRMatrixUsrOnProcElmts (aux_matrix) < 0 || + hypre_AuxParCSRMatrixUsrOffProcElmts(aux_matrix) < 0 ) + { + stack_elmts_max_new = hypre_max(num_local_rows * hypre_AuxParCSRMatrixInitAllocFactor(aux_matrix), stack_elmts_max_new); + stack_elmts_max_new = hypre_max(stack_elmts_max * hypre_AuxParCSRMatrixGrowFactor(aux_matrix), stack_elmts_max_new); + } + stack_elmts_max_new = hypre_max(stack_elmts_required, stack_elmts_max_new); + + hypre_AuxParCSRMatrixStackI(aux_matrix) = stack_i = hypre_TReAlloc_v2(stack_i, HYPRE_BigInt, stack_elmts_max, HYPRE_BigInt, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParCSRMatrixStackJ(aux_matrix) = stack_j = hypre_TReAlloc_v2(stack_j, HYPRE_BigInt, stack_elmts_max, HYPRE_BigInt, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParCSRMatrixStackData(aux_matrix) = stack_data = hypre_TReAlloc_v2(stack_data, HYPRE_Complex, stack_elmts_max, HYPRE_Complex, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParCSRMatrixStackSorA(aux_matrix) = stack_sora = hypre_TReAlloc_v2(stack_sora, char, stack_elmts_max, char, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParCSRMatrixMaxStackElmts(aux_matrix) = stack_elmts_max_new; + } + + HYPRE_THRUST_CALL(fill_n, stack_sora + stack_elmts_current, nelms, SorA); + + if (ncols) + { + hypreDevice_CsrRowPtrsToIndicesWithRowNum(nrows, nelms, row_ptr, (HYPRE_BigInt *) rows, stack_i + stack_elmts_current); + } + else + { + hypre_TMemcpy(stack_i + stack_elmts_current, rows, HYPRE_BigInt, nelms, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + } + + if (row_indexes) + { + HYPRE_Int len, len1; + hypre_TMemcpy(&len1, &row_indexes[nrows-1], HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + if (ncols) + { + hypre_TMemcpy(&len, &ncols[nrows-1], HYPRE_Int, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + } + else + { + len = 1; + } + /* this is the *effective* length of cols and values */ + len += len1; + HYPRE_Int *indicator = hypre_CTAlloc(HYPRE_Int, len, HYPRE_MEMORY_DEVICE); + hypreDevice_CsrRowPtrsToIndices_v2(nrows-1, len1, (HYPRE_Int *) row_indexes, indicator); + /* mark unwanted elements as -1 */ + dim3 bDim = hypre_GetDefaultCUDABlockDimension(); + dim3 gDim = hypre_GetDefaultCUDAGridDimension(len1, "thread", bDim); + HYPRE_CUDA_LAUNCH( hypreCUDAKernel_IJMatrixValues_dev1, gDim, bDim, len1, indicator, (HYPRE_Int *) row_indexes, ncols, indicator ); + + auto new_end = HYPRE_THRUST_CALL( + copy_if, + thrust::make_zip_iterator(thrust::make_tuple(cols, values)), + thrust::make_zip_iterator(thrust::make_tuple(cols + len, values + len)), + indicator, + thrust::make_zip_iterator(thrust::make_tuple(stack_j + stack_elmts_current, + stack_data + stack_elmts_current)), + is_nonnegative() ); + + HYPRE_Int nnz_tmp = thrust::get<0>(new_end.get_iterator_tuple()) - (stack_j + stack_elmts_current); + + hypre_assert(nnz_tmp == nelms); + + hypre_TFree(indicator, HYPRE_MEMORY_DEVICE); + } + else + { + hypre_TMemcpy(stack_j + stack_elmts_current, cols, HYPRE_BigInt, nelms, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(stack_data + stack_elmts_current, values, HYPRE_Complex, nelms, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + } + + hypre_AuxParCSRMatrixCurrentStackElmts(aux_matrix) += nelms; + + hypre_TFree(row_ptr, HYPRE_MEMORY_DEVICE); + + return hypre_error_flag; +} + +template +struct hypre_IJMatrixAssembleFunctor : public thrust::binary_function< thrust::tuple, thrust::tuple, thrust::tuple > +{ + typedef thrust::tuple Tuple; + + __device__ Tuple operator()(const Tuple& x, const Tuple& y ) + { + return thrust::make_tuple( hypre_max(thrust::get<0>(x), thrust::get<0>(y)), thrust::get<1>(x) + thrust::get<1>(y) ); + } +}; + +/* helper routine used in hypre_IJMatrixAssembleParCSRDevice: + * 1. sort (X0, A0) with key (I0, J0) + * [put the diagonal first; see the comments in hypre_cuda_utils.c] + * 2. for each segment in (I0, J0), zero out in A0 all before the last `set' + * 3. reduce A0 [with sum] and reduce X0 [with max] + * N0: input size; N1: size after reduction (<= N0) + * Note: (I1, J1, X1, A1) are not resized to N1 but have size N0 + */ +HYPRE_Int +hypre_IJMatrixAssembleSortAndReduce1(HYPRE_Int N0, HYPRE_BigInt *I0, HYPRE_BigInt *J0, char *X0, HYPRE_Complex *A0, + HYPRE_Int *N1, HYPRE_BigInt **I1, HYPRE_BigInt **J1, char **X1, HYPRE_Complex **A1 ) +{ + hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 2); + + HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE); + HYPRE_BigInt *J = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE); + char *X = hypre_TAlloc(char, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); + + /* + dim3 bDim = hypre_GetDefaultCUDABlockDimension(); + dim3 gDim = hypre_GetDefaultCUDAGridDimension(N0, "thread", bDim); + HYPRE_CUDA_LAUNCH( hypreCUDAKernel_IJMatrixAssembleSortAndReduce1, gDim, bDim, N0, I0, J0, X0, A0 ); + */ + + /* output X: 0: keep, 1: zero-out */ + HYPRE_THRUST_CALL( + exclusive_scan_by_key, + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0+N0, J0+N0))), + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0, J0))), + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), + make_reverse_iterator(thrust::device_pointer_cast(X) +N0), + char(0), + thrust::equal_to< thrust::tuple >(), + thrust::maximum() ); + + HYPRE_THRUST_CALL(replace_if, A0, A0 + N0, X, thrust::identity(), 0.0); + + auto new_end = HYPRE_THRUST_CALL( + reduce_by_key, + thrust::make_zip_iterator(thrust::make_tuple(I0, J0 )), /* keys_first */ + thrust::make_zip_iterator(thrust::make_tuple(I0 + N0, J0 + N0)), /* keys_last */ + thrust::make_zip_iterator(thrust::make_tuple(X0, A0 )), /* values_first */ + thrust::make_zip_iterator(thrust::make_tuple(I, J )), /* keys_output */ + thrust::make_zip_iterator(thrust::make_tuple(X, A )), /* values_output */ + thrust::equal_to< thrust::tuple >(), /* binary_pred */ + hypre_IJMatrixAssembleFunctor() /* binary_op */); + + *N1 = thrust::get<0>(new_end.first.get_iterator_tuple()) - I; + *I1 = I; + *J1 = J; + *X1 = X; + *A1 = A; + + return hypre_error_flag; +} + +template +struct hypre_IJMatrixAssembleFunctor2 : public thrust::binary_function< thrust::tuple, thrust::tuple, thrust::tuple > +{ + typedef thrust::tuple Tuple; + + __device__ Tuple operator()(const Tuple& x, const Tuple& y) + { + const char tx = thrust::get<0>(x); + const char ty = thrust::get<0>(y); + const HYPRE_Complex vx = thrust::get<1>(x); + const HYPRE_Complex vy = thrust::get<1>(y); + const HYPRE_Complex vz = tx == 0 && ty == 0 ? vx + vy : tx ? vx : vy; + return thrust::make_tuple(0, vz); + } +}; + +HYPRE_Int +hypre_IJMatrixAssembleSortAndReduce2(HYPRE_Int N0, HYPRE_Int *I0, HYPRE_Int *J0, char *X0, HYPRE_Complex *A0, + HYPRE_Int *N1, HYPRE_Int **I1, HYPRE_Int **J1, HYPRE_Complex **A1, + HYPRE_Int opt ) +{ + hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, opt); + + HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Int *J = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE); + char *X = hypre_TAlloc(char, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); + + auto new_end = HYPRE_THRUST_CALL( + reduce_by_key, + thrust::make_zip_iterator(thrust::make_tuple(I0, J0 )), /* keys_first */ + thrust::make_zip_iterator(thrust::make_tuple(I0 + N0, J0 + N0)), /* keys_last */ + thrust::make_zip_iterator(thrust::make_tuple(X0, A0 )), /* values_first */ + thrust::make_zip_iterator(thrust::make_tuple(I, J )), /* keys_output */ + thrust::make_zip_iterator(thrust::make_tuple(X, A )), /* values_output */ + thrust::equal_to< thrust::tuple >(), /* binary_pred */ + hypre_IJMatrixAssembleFunctor2() /* binary_op */); + + *N1 = thrust::get<0>(new_end.first.get_iterator_tuple()) - I; + *I1 = I; + *J1 = J; + *A1 = A; + + hypre_TFree(X, HYPRE_MEMORY_DEVICE); + + return hypre_error_flag; +} + +HYPRE_Int +hypre_IJMatrixAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, HYPRE_BigInt *J0, char *X0, HYPRE_Complex *A0, + HYPRE_Int *N1, HYPRE_BigInt **I1, HYPRE_BigInt **J1, HYPRE_Complex **A1) +{ + hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 0); + + HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Int *J = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); + + /* output in X0: 0: keep, 1: zero-out */ + HYPRE_THRUST_CALL( + inclusive_scan_by_key, + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0+N0, J0+N0))), + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0, J0))), + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), + thrust::equal_to< thrust::tuple >(), + thrust::maximum() ); + + HYPRE_THRUST_CALL(replace_if, A0, A0 + N0, X0, thrust::identity(), 0.0); + + auto new_end = HYPRE_THRUST_CALL( + reduce_by_key, + thrust::make_zip_iterator(thrust::make_tuple(I0, J0 )), /* keys_first */ + thrust::make_zip_iterator(thrust::make_tuple(I0 + N0, J0 + N0)), /* keys_last */ + A0, /* values_first */ + thrust::make_zip_iterator(thrust::make_tuple(I, J )), /* keys_output */ + A, /* values_output */ + thrust::equal_to< thrust::tuple >() /* binary_pred */); + + + *N1 = new_end.second - A; + *I1 = I; + *J1 = J; + *A1 = A; + + return hypre_error_flag; +} + +#if 0 +HYPRE_Int +hypre_IJMatrixAssembleSortAndRemove(HYPRE_Int N0, HYPRE_BigInt *I0, HYPRE_BigInt *J0, char *X0, HYPRE_Complex *A0) +{ + hypreDevice_StableSortTupleByTupleKey(N0, I0, J0, X0, A0, 0); + + /* output in X0: 0: keep, 1: remove */ + HYPRE_THRUST_CALL( + inclusive_scan_by_key, + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0+N0, J0+N0))), + make_reverse_iterator(thrust::make_zip_iterator(thrust::make_tuple(I0, J0))), + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), + thrust::equal_to< thrust::tuple >(), + thrust::maximum() ); + + auto new_end = HYPRE_THRUST_CALL( + remove_if, + thrust::make_zip_iterator(thrust::make_tuple(I0, J0, A0)), + thrust::make_zip_iterator(thrust::make_tuple(I0+N0, J0+N0, A0+N0)), + X0, + thrust::identity()); + + HYPRE_Int N1 = thrust::get<0>(new_end.get_iterator_tuple()) - I0; + + hypre_assert(N1 >= 0 && N1 <= N0); + + return N1; +} +#endif + +HYPRE_Int +hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix) +{ + MPI_Comm comm = hypre_IJMatrixComm(matrix); + HYPRE_BigInt *row_partitioning = hypre_IJMatrixRowPartitioning(matrix); + HYPRE_BigInt *col_partitioning = hypre_IJMatrixColPartitioning(matrix); + HYPRE_BigInt row_start = row_partitioning[0]; + HYPRE_BigInt row_end = row_partitioning[1]; + HYPRE_BigInt col_start = col_partitioning[0]; + HYPRE_BigInt col_end = col_partitioning[1]; + HYPRE_Int nrows = row_end - row_start; + HYPRE_Int ncols = col_end - col_start; + + hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix*) hypre_IJMatrixObject(matrix); + hypre_AuxParCSRMatrix *aux_matrix = (hypre_AuxParCSRMatrix*) hypre_IJMatrixTranslator(matrix); + + if (!aux_matrix) + { + return hypre_error_flag; + } + + if (!par_matrix) + { + return hypre_error_flag; + } + + HYPRE_Int nelms = hypre_AuxParCSRMatrixCurrentStackElmts(aux_matrix); + HYPRE_BigInt *stack_i = hypre_AuxParCSRMatrixStackI(aux_matrix); + HYPRE_BigInt *stack_j = hypre_AuxParCSRMatrixStackJ(aux_matrix); + HYPRE_Complex *stack_data = hypre_AuxParCSRMatrixStackData(aux_matrix); + char *stack_sora = hypre_AuxParCSRMatrixStackSorA(aux_matrix); + + in_range pred(row_start, row_end-1); + HYPRE_Int nelms_on = HYPRE_THRUST_CALL(count_if, stack_i, stack_i+nelms, pred); + HYPRE_Int nelms_off = nelms - nelms_on; + HYPRE_Int nelms_off_max; + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); + + /* communicate for aux off-proc and add to remote aux on-proc */ + if (nelms_off_max) + { + HYPRE_Int new_nnz = 0; + HYPRE_BigInt *new_i = NULL; + HYPRE_BigInt *new_j = NULL; + HYPRE_Complex *new_data = NULL; + + if (nelms_off) + { + /* copy off-proc entries out of stack and remove from stack */ + HYPRE_BigInt *off_proc_i = hypre_TAlloc(HYPRE_BigInt, nelms_off, HYPRE_MEMORY_DEVICE); + HYPRE_BigInt *off_proc_j = hypre_TAlloc(HYPRE_BigInt, nelms_off, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *off_proc_data = hypre_TAlloc(HYPRE_Complex, nelms_off, HYPRE_MEMORY_DEVICE); + char *off_proc_sora = hypre_TAlloc(char, nelms_off, HYPRE_MEMORY_DEVICE); + char *is_on_proc = hypre_TAlloc(char, nelms, HYPRE_MEMORY_DEVICE); + + HYPRE_THRUST_CALL(transform, stack_i, stack_i + nelms, is_on_proc, pred); + + auto new_end1 = HYPRE_THRUST_CALL( + copy_if, + thrust::make_zip_iterator(thrust::make_tuple(stack_i, stack_j, stack_data, stack_sora )), /* first */ + thrust::make_zip_iterator(thrust::make_tuple(stack_i + nelms, stack_j + nelms, stack_data + nelms, stack_sora + nelms)), /* last */ + is_on_proc, /* stencil */ + thrust::make_zip_iterator(thrust::make_tuple(off_proc_i, off_proc_j, off_proc_data, off_proc_sora)), /* result */ + thrust::not1(thrust::identity()) ); + + hypre_assert(thrust::get<0>(new_end1.get_iterator_tuple()) - off_proc_i == nelms_off); + + /* remove off-proc entries from stack */ + auto new_end2 = HYPRE_THRUST_CALL( + remove_if, + thrust::make_zip_iterator(thrust::make_tuple(stack_i, stack_j, stack_data, stack_sora )), /* first */ + thrust::make_zip_iterator(thrust::make_tuple(stack_i + nelms, stack_j + nelms, stack_data + nelms, stack_sora + nelms)), /* last */ + is_on_proc, /* stencil */ + thrust::not1(thrust::identity()) ); + + hypre_assert(thrust::get<0>(new_end2.get_iterator_tuple()) - stack_i == nelms_on); + + hypre_AuxParCSRMatrixCurrentStackElmts(aux_matrix) = nelms_on; + + hypre_TFree(is_on_proc, HYPRE_MEMORY_DEVICE); + + /* sort and reduce */ + hypre_IJMatrixAssembleSortAndReduce3(nelms_off, off_proc_i, off_proc_j, off_proc_sora, off_proc_data, &new_nnz, &new_i, &new_j, &new_data); + // new_nnz = hypre_IJMatrixAssembleSortAndRemove(nelms_off, off_proc_i, off_proc_j, off_proc_sora, off_proc_data); + + hypre_TFree(off_proc_i, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_j, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_data, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_sora, HYPRE_MEMORY_DEVICE); + } + + /* send new_i/j/data to remote processes and the receivers call addtovalues */ + hypre_IJMatrixAssembleOffProcValsParCSR(matrix, -1, -1, new_nnz, HYPRE_MEMORY_DEVICE, new_i, new_j, new_data); + + hypre_TFree(new_i, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_j, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_data, HYPRE_MEMORY_DEVICE); + } + + /* Note: the stack might have been changed in hypre_IJMatrixAssembleOffProcValsParCSR, + * so must get the size and the pointers again */ + nelms = hypre_AuxParCSRMatrixCurrentStackElmts(aux_matrix); + stack_i = hypre_AuxParCSRMatrixStackI(aux_matrix); + stack_j = hypre_AuxParCSRMatrixStackJ(aux_matrix); + stack_data = hypre_AuxParCSRMatrixStackData(aux_matrix); + stack_sora = hypre_AuxParCSRMatrixStackSorA(aux_matrix); + +#ifdef HYPRE_DEBUG + /* the stack should only have on-proc elements now */ + HYPRE_Int tmp = HYPRE_THRUST_CALL(count_if, stack_i, stack_i+nelms, pred); + hypre_assert(nelms == tmp); +#endif + + if (nelms) + { + HYPRE_Int new_nnz; + HYPRE_BigInt *new_i; + HYPRE_BigInt *new_j; + HYPRE_Complex *new_data; + char *new_sora; + + /* sort and reduce */ + hypre_IJMatrixAssembleSortAndReduce1(nelms, stack_i, stack_j, stack_sora, stack_data, + &new_nnz, &new_i, &new_j, &new_sora, &new_data); + + /* adjust row indices from global to local */ + HYPRE_Int *new_i_local = hypre_TAlloc(HYPRE_Int, new_nnz, HYPRE_MEMORY_DEVICE); + HYPRE_THRUST_CALL( transform, + new_i, + new_i + new_nnz, + new_i_local, + _1 - row_start ); + + hypre_TFree(new_i, HYPRE_MEMORY_DEVICE); + + HYPRE_Int num_cols_offd_new; + HYPRE_BigInt *col_map_offd_new; + HYPRE_Int *col_map_offd_map; + HYPRE_Int diag_nnz_new; + HYPRE_Int *diag_i_new = NULL; + HYPRE_Int *diag_j_new = NULL; + HYPRE_Complex *diag_a_new = NULL; + char *diag_sora_new = NULL; + HYPRE_Int offd_nnz_new; + HYPRE_Int *offd_i_new = NULL; + HYPRE_Int *offd_j_new = NULL; + HYPRE_Complex *offd_a_new = NULL; + char *offd_sora_new = NULL; + + HYPRE_Int diag_nnz_existed = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixDiag(par_matrix)); + HYPRE_Int offd_nnz_existed = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(par_matrix)); + + hypre_CSRMatrixSplitDevice_core( 0, + nrows, + new_nnz, + NULL, + new_j, + NULL, + NULL, + col_start, + col_end - 1, + hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(par_matrix)), + NULL, + NULL, + NULL, + NULL, + &diag_nnz_new, + NULL, + NULL, + NULL, + NULL, + &offd_nnz_new, + NULL, + NULL, + NULL, + NULL ); + + if (diag_nnz_new) + { + diag_i_new = hypre_TAlloc(HYPRE_Int, diag_nnz_existed + diag_nnz_new, HYPRE_MEMORY_DEVICE); + diag_j_new = hypre_TAlloc(HYPRE_Int, diag_nnz_existed + diag_nnz_new, HYPRE_MEMORY_DEVICE); + diag_a_new = hypre_TAlloc(HYPRE_Complex, diag_nnz_existed + diag_nnz_new, HYPRE_MEMORY_DEVICE); + if (diag_nnz_existed) + { + diag_sora_new = hypre_TAlloc(char, diag_nnz_existed + diag_nnz_new, HYPRE_MEMORY_DEVICE); + } + } + + if (offd_nnz_new) + { + offd_i_new = hypre_TAlloc(HYPRE_Int, offd_nnz_existed + offd_nnz_new, HYPRE_MEMORY_DEVICE); + offd_j_new = hypre_TAlloc(HYPRE_Int, offd_nnz_existed + offd_nnz_new, HYPRE_MEMORY_DEVICE); + offd_a_new = hypre_TAlloc(HYPRE_Complex, offd_nnz_existed + offd_nnz_new, HYPRE_MEMORY_DEVICE); + if (offd_nnz_existed) + { + offd_sora_new = hypre_TAlloc(char, offd_nnz_existed + offd_nnz_new, HYPRE_MEMORY_DEVICE); + } + } + + /* split IJ into diag and offd */ + hypre_CSRMatrixSplitDevice_core( 1, + nrows, + new_nnz, + new_i_local, + new_j, + new_data, + diag_nnz_existed || offd_nnz_existed ? new_sora : NULL, + col_start, + col_end - 1, + hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(par_matrix)), + hypre_ParCSRMatrixDeviceColMapOffd(par_matrix), + &col_map_offd_map, + &num_cols_offd_new, + &col_map_offd_new, + &diag_nnz_new, + diag_i_new + diag_nnz_existed, + diag_j_new + diag_nnz_existed, + diag_a_new + diag_nnz_existed, + diag_nnz_existed ? diag_sora_new + diag_nnz_existed : NULL, + &offd_nnz_new, + offd_i_new + offd_nnz_existed, + offd_j_new + offd_nnz_existed, + offd_a_new + offd_nnz_existed, + offd_nnz_existed ? offd_sora_new + offd_nnz_existed : NULL ); + + hypre_TFree(new_i_local, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_j, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_data, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_sora, HYPRE_MEMORY_DEVICE); + + HYPRE_Int nnz_new; + HYPRE_Int *tmp_i; + HYPRE_Int *tmp_j; + HYPRE_Complex *tmp_a; + + /* expand the existing diag/offd and compress with the new one */ + if (diag_nnz_new > 0) + { + if (diag_nnz_existed) + { + /* the existing parcsr should come first and the entries are "add" */ + hypreDevice_CsrRowPtrsToIndices_v2(nrows, diag_nnz_existed, + hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)), diag_i_new); + + hypre_TMemcpy(diag_j_new, hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)), HYPRE_Int, + diag_nnz_existed, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + + hypre_TMemcpy(diag_a_new, hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)), HYPRE_Complex, + diag_nnz_existed, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + + HYPRE_THRUST_CALL(fill_n, diag_sora_new, diag_nnz_existed, 0); + + hypre_IJMatrixAssembleSortAndReduce2(diag_nnz_existed + diag_nnz_new, diag_i_new, diag_j_new, diag_sora_new, diag_a_new, + &nnz_new, &tmp_i, &tmp_j, &tmp_a, 2); + + hypre_TFree(diag_i_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(diag_j_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(diag_sora_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(diag_a_new, HYPRE_MEMORY_DEVICE); + + tmp_j = hypre_TReAlloc_v2(tmp_j, HYPRE_Int, diag_nnz_existed + diag_nnz_new, HYPRE_Int, nnz_new, HYPRE_MEMORY_DEVICE); + tmp_a = hypre_TReAlloc_v2(tmp_a, HYPRE_Complex, diag_nnz_existed + diag_nnz_new, HYPRE_Complex, nnz_new, HYPRE_MEMORY_DEVICE); + + diag_nnz_new = nnz_new; + diag_i_new = tmp_i; + diag_j_new = tmp_j; + diag_a_new = tmp_a; + } + + hypre_CSRMatrix *diag = hypre_CSRMatrixCreate(nrows, ncols, diag_nnz_new); + hypre_CSRMatrixI(diag) = hypreDevice_CsrRowIndicesToPtrs(nrows, diag_nnz_new, diag_i_new); + hypre_CSRMatrixJ(diag) = diag_j_new; + hypre_CSRMatrixData(diag) = diag_a_new; + hypre_CSRMatrixMemoryLocation(diag) = HYPRE_MEMORY_DEVICE; + + hypre_TFree(diag_i_new, HYPRE_MEMORY_DEVICE); + + hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(par_matrix)); + hypre_ParCSRMatrixDiag(par_matrix) = diag; + } + + if (offd_nnz_new > 0) + { + if (offd_nnz_existed) + { + /* the existing parcsr should come first and the entries are "add" */ + hypreDevice_CsrRowPtrsToIndices_v2(nrows, offd_nnz_existed, + hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)), offd_i_new); + + /* adjust with the new col_map_offd_map */ + HYPRE_THRUST_CALL( gather, + hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)), + hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)) + offd_nnz_existed, + col_map_offd_map, + offd_j_new ); + + hypre_TMemcpy(offd_a_new, hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)), HYPRE_Complex, + offd_nnz_existed, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + + HYPRE_THRUST_CALL(fill_n, offd_sora_new, offd_nnz_existed, 0); + + hypre_IJMatrixAssembleSortAndReduce2(offd_nnz_existed + offd_nnz_new, offd_i_new, offd_j_new, offd_sora_new, offd_a_new, + &nnz_new, &tmp_i, &tmp_j, &tmp_a, 0); + + hypre_TFree(offd_i_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(offd_j_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(offd_sora_new, HYPRE_MEMORY_DEVICE); + hypre_TFree(offd_a_new, HYPRE_MEMORY_DEVICE); + + tmp_j = hypre_TReAlloc_v2(tmp_j, HYPRE_Int, offd_nnz_existed + offd_nnz_new, HYPRE_Int, nnz_new, HYPRE_MEMORY_DEVICE); + tmp_a = hypre_TReAlloc_v2(tmp_a, HYPRE_Complex, offd_nnz_existed + offd_nnz_new, HYPRE_Complex, nnz_new, HYPRE_MEMORY_DEVICE); + + offd_nnz_new = nnz_new; + offd_i_new = tmp_i; + offd_j_new = tmp_j; + offd_a_new = tmp_a; + } + + hypre_CSRMatrix *offd = hypre_CSRMatrixCreate(nrows, num_cols_offd_new, offd_nnz_new); + hypre_CSRMatrixI(offd) = hypreDevice_CsrRowIndicesToPtrs(nrows, offd_nnz_new, offd_i_new); + hypre_CSRMatrixJ(offd) = offd_j_new; + hypre_CSRMatrixData(offd) = offd_a_new; + hypre_CSRMatrixMemoryLocation(offd) = HYPRE_MEMORY_DEVICE; + + hypre_TFree(offd_i_new, HYPRE_MEMORY_DEVICE); + + hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(par_matrix)); + hypre_ParCSRMatrixOffd(par_matrix) = offd; + + hypre_TFree(hypre_ParCSRMatrixDeviceColMapOffd(par_matrix), HYPRE_MEMORY_DEVICE); + hypre_ParCSRMatrixDeviceColMapOffd(par_matrix) = col_map_offd_new; + + hypre_TFree(hypre_ParCSRMatrixColMapOffd(par_matrix), HYPRE_MEMORY_HOST); + hypre_ParCSRMatrixColMapOffd(par_matrix) = hypre_TAlloc(HYPRE_BigInt, num_cols_offd_new, HYPRE_MEMORY_HOST); + hypre_TMemcpy(hypre_ParCSRMatrixColMapOffd(par_matrix), col_map_offd_new, HYPRE_BigInt, num_cols_offd_new, + HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + + col_map_offd_new = NULL; + } + + hypre_TFree(col_map_offd_map, HYPRE_MEMORY_DEVICE); + hypre_TFree(col_map_offd_new, HYPRE_MEMORY_DEVICE); + } /* if (nelms) */ + + hypre_IJMatrixAssembleFlag(matrix) = 1; + hypre_AuxParCSRMatrixDestroy(aux_matrix); + hypre_IJMatrixTranslator(matrix) = NULL; + + return hypre_error_flag; +} + +HYPRE_Int +hypre_IJMatrixSetConstantValuesParCSRDevice( hypre_IJMatrix *matrix, + HYPRE_Complex value ) +{ + hypre_ParCSRMatrix *par_matrix = (hypre_ParCSRMatrix *) hypre_IJMatrixObject( matrix ); + hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(par_matrix); + hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(par_matrix); + HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); + HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); + HYPRE_Int nnz_diag = hypre_CSRMatrixNumNonzeros(diag); + HYPRE_Int nnz_offd = hypre_CSRMatrixNumNonzeros(offd); + + HYPRE_THRUST_CALL( fill_n, diag_data, nnz_diag, value ); + HYPRE_THRUST_CALL( fill_n, offd_data, nnz_offd, value ); + + return hypre_error_flag; +} + +#endif diff --git a/src/IJ_mv/IJVector.c b/src/IJ_mv/IJVector.c index b6292cd44..988b4892e 100644 --- a/src/IJ_mv/IJVector.c +++ b/src/IJ_mv/IJVector.c @@ -19,7 +19,7 @@ * hypre_IJVectorDistribute *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int hypre_IJVectorDistribute( HYPRE_IJVector vector, const HYPRE_Int *vec_starts ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -28,7 +28,7 @@ hypre_IJVectorDistribute( HYPRE_IJVector vector, const HYPRE_Int *vec_starts ) { hypre_printf("Vector variable is NULL -- hypre_IJVectorDistribute\n"); exit(1); - } + } if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) @@ -47,7 +47,7 @@ hypre_IJVectorDistribute( HYPRE_IJVector vector, const HYPRE_Int *vec_starts ) * hypre_IJVectorZeroValues *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int hypre_IJVectorZeroValues( HYPRE_IJVector vector ) { hypre_IJVector *vec = (hypre_IJVector *) vector; @@ -56,7 +56,7 @@ hypre_IJVectorZeroValues( HYPRE_IJVector vector ) { hypre_printf("Vector variable is NULL -- hypre_IJVectorZeroValues\n"); exit(1); - } + } /* if ( hypre_IJVectorObjectType(vec) == HYPRE_PETSC ) @@ -69,9 +69,9 @@ hypre_IJVectorZeroValues( HYPRE_IJVector vector ) else */ if ( hypre_IJVectorObjectType(vec) == HYPRE_PARCSR ) - + { return( hypre_IJVectorZeroValuesPar(vec) ); - + } else { hypre_printf("Unrecognized object type -- hypre_IJVectorZeroValues\n"); diff --git a/src/IJ_mv/IJVector_parcsr.c b/src/IJ_mv/IJVector_parcsr.c index 56d13e232..f2e635d82 100644 --- a/src/IJ_mv/IJVector_parcsr.c +++ b/src/IJ_mv/IJVector_parcsr.c @@ -10,7 +10,7 @@ * IJVector_Par interface * *****************************************************************************/ - + #include "_hypre_IJ_mv.h" #include "../HYPRE.h" @@ -33,30 +33,19 @@ hypre_IJVectorCreatePar(hypre_IJVector *vector, HYPRE_BigInt global_n, *partitioning, jmin; hypre_MPI_Comm_size(comm, &num_procs); -#ifdef HYPRE_NO_GLOBAL_PARTITION jmin = hypre_IJVectorGlobalFirstRow(vector); global_n = hypre_IJVectorGlobalNumRows(vector); - partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); - - /* Shift to zero-based partitioning for ParVector object */ - for (j = 0; j < 2; j++) - partitioning[j] = IJpartitioning[j] - jmin; - -#else - jmin = IJpartitioning[0]; - global_n = IJpartitioning[num_procs] - jmin; - - partitioning = hypre_CTAlloc(HYPRE_BigInt, num_procs+1, HYPRE_MEMORY_HOST); + partitioning = hypre_CTAlloc(HYPRE_BigInt, 2, HYPRE_MEMORY_HOST); /* Shift to zero-based partitioning for ParVector object */ - for (j = 0; j < num_procs+1; j++) + for (j = 0; j < 2; j++) + { partitioning[j] = IJpartitioning[j] - jmin; - -#endif + } hypre_IJVectorObject(vector) = - hypre_ParVectorCreate(comm, global_n, (HYPRE_BigInt *) partitioning); + hypre_ParVectorCreate(comm, global_n, (HYPRE_BigInt *) partitioning); return hypre_error_flag; } @@ -65,14 +54,14 @@ hypre_IJVectorCreatePar(hypre_IJVector *vector, * * hypre_IJVectorDestroyPar * - * frees ParVector local storage of an IJVectorPar + * frees ParVector local storage of an IJVectorPar * *****************************************************************************/ HYPRE_Int hypre_IJVectorDestroyPar(hypre_IJVector *vector) { - return hypre_ParVectorDestroy((hypre_ParVector*)hypre_IJVectorObject(vector)); + return hypre_ParVectorDestroy((hypre_ParVector*)hypre_IJVectorObject(vector)); } /****************************************************************************** @@ -82,46 +71,49 @@ hypre_IJVectorDestroyPar(hypre_IJVector *vector) * initializes ParVector of IJVectorPar * *****************************************************************************/ - HYPRE_Int hypre_IJVectorInitializePar(hypre_IJVector *vector) +{ + return hypre_IJVectorInitializePar_v2(vector, hypre_IJVectorMemoryLocation(vector)); +} + +HYPRE_Int +hypre_IJVectorInitializePar_v2(hypre_IJVector *vector, HYPRE_MemoryLocation memory_location) { hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); HYPRE_BigInt *partitioning = hypre_ParVectorPartitioning(par_vector); hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); - HYPRE_Int my_id; HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); - + + HYPRE_Int my_id; MPI_Comm comm = hypre_IJVectorComm(vector); + hypre_MPI_Comm_rank(comm, &my_id); + + HYPRE_MemoryLocation memory_location_aux = + hypre_GetExecPolicy1(memory_location) == HYPRE_EXEC_HOST ? HYPRE_MEMORY_HOST : HYPRE_MEMORY_DEVICE; - hypre_MPI_Comm_rank(comm,&my_id); - if (!partitioning) { if (print_level) { hypre_printf("No ParVector partitioning for initialization -- "); - hypre_printf("hypre_IJVectorInitializePar\n"); + hypre_printf("hypre_IJVectorInitializePar\n"); } hypre_error_in_arg(1); return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION hypre_VectorSize(local_vector) = (HYPRE_Int)(partitioning[1] - partitioning[0]); -#else - hypre_VectorSize(local_vector) = (HYPRE_Int)(partitioning[my_id+1] - partitioning[my_id]); -#endif - hypre_ParVectorInitialize(par_vector); + hypre_ParVectorInitialize_v2(par_vector, memory_location); if (!aux_vector) - { + { hypre_AuxParVectorCreate(&aux_vector); hypre_IJVectorTranslator(vector) = aux_vector; } - hypre_AuxParVectorInitialize(aux_vector); + hypre_AuxParVectorInitialize_v2(aux_vector, memory_location_aux); return hypre_error_flag; } @@ -145,6 +137,11 @@ hypre_IJVectorSetMaxOffProcElmtsPar(hypre_IJVector *vector, hypre_IJVectorTranslator(vector) = aux_vector; } hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParVectorUsrOffProcElmts(aux_vector) = max_off_proc_elmts; +#endif + return hypre_error_flag; } @@ -160,12 +157,12 @@ hypre_IJVectorSetMaxOffProcElmtsPar(hypre_IJVector *vector, HYPRE_Int hypre_IJVectorDistributePar(hypre_IJVector *vector, - const HYPRE_Int *vec_starts) + const HYPRE_Int *vec_starts) { hypre_ParVector *old_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); hypre_ParVector *par_vector; HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); - + if (!old_vector) { if (print_level) @@ -179,7 +176,7 @@ hypre_IJVectorDistributePar(hypre_IJVector *vector, } par_vector = hypre_VectorToParVector(hypre_ParVectorComm(old_vector), - hypre_ParVectorLocalVector(old_vector), + hypre_ParVectorLocalVector(old_vector), (HYPRE_BigInt *)vec_starts); if (!par_vector) { @@ -211,9 +208,7 @@ HYPRE_Int hypre_IJVectorZeroValuesPar(hypre_IJVector *vector) { HYPRE_Int my_id; - HYPRE_Int i; HYPRE_BigInt vec_start, vec_stop; - HYPRE_Complex *data; hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); MPI_Comm comm = hypre_IJVectorComm(vector); @@ -223,7 +218,7 @@ hypre_IJVectorZeroValuesPar(hypre_IJVector *vector) hypre_MPI_Comm_rank(comm, &my_id); - /* If par_vector == NULL or partitioning == NULL or local_vector == NULL + /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) @@ -262,15 +257,10 @@ hypre_IJVectorZeroValuesPar(hypre_IJVector *vector) return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = partitioning[0]; vec_stop = partitioning[1]; -#else - vec_start = partitioning[my_id]; - vec_stop = partitioning[my_id+1]; -#endif - if (vec_start > vec_stop) + if (vec_start > vec_stop) { if (print_level) { @@ -282,13 +272,10 @@ hypre_IJVectorZeroValuesPar(hypre_IJVector *vector) return hypre_error_flag; } - data = hypre_VectorData( local_vector ); -#ifdef HYPRE_USING_OPENMP -#pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE -#endif - for (i = 0; i < (HYPRE_Int)(vec_stop - vec_start); i++) - data[i] = 0.; - + hypre_assert(hypre_VectorSize(local_vector) == (HYPRE_Int)(vec_stop - vec_start)); + + hypre_SeqVectorSetConstantValues(local_vector, 0.0); + return hypre_error_flag; } @@ -322,7 +309,7 @@ hypre_IJVectorSetValuesPar(hypre_IJVector *vector, hypre_MPI_Comm_rank(comm, &my_id); - /* If par_vector == NULL or partitioning == NULL or local_vector == NULL + /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) @@ -360,15 +347,10 @@ hypre_IJVectorSetValuesPar(hypre_IJVector *vector, return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = IJpartitioning[0]; vec_stop = IJpartitioning[1]-1; -#else - vec_start = IJpartitioning[my_id]; - vec_stop = IJpartitioning[my_id+1]-1; -#endif - if (vec_start > vec_stop) + if (vec_start > vec_stop) { if (print_level) { @@ -391,15 +373,15 @@ hypre_IJVectorSetValuesPar(hypre_IJVector *vector, { for (j = 0; j < num_values; j++) { - i = indices[j]; - if (i >= vec_start && i <= vec_stop) + i = indices[j]; + if (i >= vec_start && i <= vec_stop) { k = (HYPRE_Int)( i- vec_start); data[k] = values[j]; } - } + } } - else + else { if (num_values > (HYPRE_Int)(vec_stop - vec_start) + 1) { @@ -408,15 +390,15 @@ hypre_IJVectorSetValuesPar(hypre_IJVector *vector, hypre_printf("Warning! Indices beyond local range not identified!\n "); hypre_printf("Off processor values have been ignored!\n"); } - num_values = (HYPRE_Int)(vec_stop - vec_start) +1; + num_values = (HYPRE_Int)(vec_stop - vec_start) +1; } #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE #endif for (j = 0; j < num_values; j++) data[j] = values[j]; - } - + } + return hypre_error_flag; } @@ -450,7 +432,7 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, hypre_MPI_Comm_rank(comm, &my_id); - /* If par_vector == NULL or partitioning == NULL or local_vector == NULL + /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) @@ -488,15 +470,10 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = IJpartitioning[0]; vec_stop = IJpartitioning[1]-1; -#else - vec_start = IJpartitioning[my_id]; - vec_stop = IJpartitioning[my_id+1]-1; -#endif - if (vec_start > vec_stop) + if (vec_start > vec_stop) { if (print_level) { @@ -513,7 +490,7 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, if (indices) { HYPRE_Int current_num_elmts - = hypre_AuxParVectorCurrentNumElmts(aux_vector); + = hypre_AuxParVectorCurrentOffProcElmts(aux_vector); HYPRE_Int max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(aux_vector); HYPRE_BigInt *off_proc_i = hypre_AuxParVectorOffProcI(aux_vector); @@ -522,12 +499,12 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, for (j = 0; j < num_values; j++) { - i = indices[j]; - if (i < vec_start || i > vec_stop) + i = indices[j]; + if (i < vec_start || i > vec_stop) { /* if elements outside processor boundaries, store in off processor stash */ - if (!max_off_proc_elmts) + if (!max_off_proc_elmts) { max_off_proc_elmts = 100; hypre_AuxParVectorMaxOffProcElmts(aux_vector) = @@ -543,7 +520,7 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, { max_off_proc_elmts += 10; off_proc_i = hypre_TReAlloc(off_proc_i, HYPRE_BigInt, max_off_proc_elmts, HYPRE_MEMORY_HOST); - off_proc_data = hypre_TReAlloc(off_proc_data, HYPRE_Complex, + off_proc_data = hypre_TReAlloc(off_proc_data, HYPRE_Complex, max_off_proc_elmts, HYPRE_MEMORY_HOST); hypre_AuxParVectorMaxOffProcElmts(aux_vector) = max_off_proc_elmts; @@ -552,16 +529,16 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, } off_proc_i[current_num_elmts] = i; off_proc_data[current_num_elmts++] = values[j]; - hypre_AuxParVectorCurrentNumElmts(aux_vector)=current_num_elmts; + hypre_AuxParVectorCurrentOffProcElmts(aux_vector)=current_num_elmts; } else /* local values are added to the vector */ { k = (HYPRE_Int)(i - vec_start); data[k] += values[j]; } - } + } } - else + else { if (num_values > (HYPRE_Int)(vec_stop - vec_start) + 1) { @@ -570,15 +547,15 @@ hypre_IJVectorAddToValuesPar(hypre_IJVector *vector, hypre_printf("Warning! Indices beyond local range not identified!\n "); hypre_printf("Off processor values have been ignored!\n"); } - num_values = (HYPRE_Int)(vec_stop - vec_start) +1; + num_values = (HYPRE_Int)(vec_stop - vec_start) +1; } #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE #endif for (j = 0; j < num_values; j++) data[j] += values[j]; - } - + } + return hypre_error_flag; } @@ -609,10 +586,10 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) hypre_printf("**** Vector storage is either unallocated or orphaned ****\n"); } hypre_error_in_arg(1); - } + } partitioning = hypre_ParVectorPartitioning(par_vector); if (!IJpartitioning) - { + { if (print_level) { hypre_printf("IJpartitioning == NULL -- "); @@ -622,7 +599,7 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) hypre_error_in_arg(1); } if (!partitioning) - { + { if (print_level) { hypre_printf("partitioning == NULL -- "); @@ -638,7 +615,7 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) HYPRE_Int max_off_proc_elmts; HYPRE_BigInt *off_proc_i; HYPRE_Complex *off_proc_data; - current_num_elmts = hypre_AuxParVectorCurrentNumElmts(aux_vector); + current_num_elmts = hypre_AuxParVectorCurrentOffProcElmts(aux_vector); hypre_MPI_Allreduce(¤t_num_elmts,&off_proc_elmts,1,HYPRE_MPI_INT, hypre_MPI_SUM,comm); if (off_proc_elmts) @@ -646,18 +623,19 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) max_off_proc_elmts=hypre_AuxParVectorMaxOffProcElmts(aux_vector); off_proc_i=hypre_AuxParVectorOffProcI(aux_vector); off_proc_data=hypre_AuxParVectorOffProcData(aux_vector); - hypre_IJVectorAssembleOffProcValsPar(vector, max_off_proc_elmts, - current_num_elmts, off_proc_i, off_proc_data); - hypre_TFree(hypre_AuxParVectorOffProcI(aux_vector), HYPRE_MEMORY_HOST); - hypre_TFree(hypre_AuxParVectorOffProcData(aux_vector), HYPRE_MEMORY_HOST); - hypre_AuxParVectorMaxOffProcElmts(aux_vector) = 0; - hypre_AuxParVectorCurrentNumElmts(aux_vector) = 0; + hypre_IJVectorAssembleOffProcValsPar(vector, max_off_proc_elmts, + current_num_elmts, HYPRE_MEMORY_HOST, + off_proc_i, off_proc_data); + hypre_TFree(hypre_AuxParVectorOffProcI(aux_vector), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParVectorOffProcData(aux_vector), HYPRE_MEMORY_HOST); + hypre_AuxParVectorMaxOffProcElmts(aux_vector) = 0; + hypre_AuxParVectorCurrentOffProcElmts(aux_vector) = 0; } } return hypre_error_flag; } - + /****************************************************************************** * * hypre_IJVectorGetValuesPar @@ -667,29 +645,28 @@ hypre_IJVectorAssemblePar(hypre_IJVector *vector) *****************************************************************************/ HYPRE_Int -hypre_IJVectorGetValuesPar(hypre_IJVector *vector, - HYPRE_Int num_values, - const HYPRE_BigInt *indices, - HYPRE_Complex *values) +hypre_IJVectorGetValuesPar(hypre_IJVector *vector, + HYPRE_Int num_values, + const HYPRE_BigInt *indices, + HYPRE_Complex *values) { - HYPRE_Int my_id; - HYPRE_Int j, k; - HYPRE_BigInt i, vec_start, vec_stop; - HYPRE_Complex *data; - HYPRE_Int ierr = 0; - - HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); - hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); - MPI_Comm comm = hypre_IJVectorComm(vector); - hypre_Vector *local_vector; - HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); + HYPRE_Int my_id; + MPI_Comm comm = hypre_IJVectorComm(vector); + HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); + HYPRE_BigInt vec_start; + HYPRE_BigInt vec_stop; + hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); + HYPRE_Int print_level = hypre_IJVectorPrintLevel(vector); /* If no components are to be retrieved, perform no checking and return */ - if (num_values < 1) return 0; + if (num_values < 1) + { + return 0; + } hypre_MPI_Comm_rank(comm, &my_id); - /* If par_vector == NULL or partitioning == NULL or local_vector == NULL + /* If par_vector == NULL or partitioning == NULL or local_vector == NULL let user know of catastrophe and exit */ if (!par_vector) @@ -703,7 +680,7 @@ hypre_IJVectorGetValuesPar(hypre_IJVector *vector, hypre_error_in_arg(1); return hypre_error_flag; } - local_vector = hypre_ParVectorLocalVector(par_vector); + if (!IJpartitioning) { if (print_level) @@ -715,6 +692,8 @@ hypre_IJVectorGetValuesPar(hypre_IJVector *vector, hypre_error_in_arg(1); return hypre_error_flag; } + + hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); if (!local_vector) { if (print_level) @@ -727,15 +706,10 @@ hypre_IJVectorGetValuesPar(hypre_IJVector *vector, return hypre_error_flag; } -#ifdef HYPRE_NO_GLOBAL_PARTITION vec_start = IJpartitioning[0]; vec_stop = IJpartitioning[1]; -#else - vec_start = IJpartitioning[my_id]; - vec_stop = IJpartitioning[my_id+1]; -#endif - if (vec_start > vec_stop) + if (vec_start > vec_stop) { if (print_level) { @@ -747,58 +721,10 @@ hypre_IJVectorGetValuesPar(hypre_IJVector *vector, return hypre_error_flag; } - /* Determine whether indices points to local indices only, and if not, let - user know of catastrophe and exit. If indices == NULL, assume that - num_values components are to be retrieved from block starting at - vec_start */ - - if (indices) - { - for (i = 0; i < num_values; i++) - { - ierr += (indices[i] < vec_start); - ierr += (indices[i] >= vec_stop); - } - } - - if (ierr) - { - if (print_level) - { - hypre_printf("indices beyond local range -- "); - hypre_printf("hypre_IJVectorGetValuesPar\n"); - hypre_printf("**** Indices specified are unusable ****\n"); - } - hypre_error_in_arg(3); - return hypre_error_flag; - } - - data = hypre_VectorData(local_vector); + hypre_assert(vec_start == hypre_ParVectorFirstIndex(par_vector)); + hypre_assert(vec_stop == hypre_ParVectorLastIndex(par_vector) + 1); - if (indices) - { -#ifdef HYPRE_USING_OPENMP -#pragma omp parallel for private(i,j) HYPRE_SMP_SCHEDULE -#endif - for (j = 0; j < num_values; j++) - { - k = (HYPRE_Int)(indices[j] - vec_start); - values[j] = data[k]; - } - } - else - { - if (num_values > (HYPRE_Int)(vec_stop-vec_start)) - { - hypre_error_in_arg(2); - return hypre_error_flag; - } -#ifdef HYPRE_USING_OPENMP -#pragma omp parallel for private(j) HYPRE_SMP_SCHEDULE -#endif - for (j = 0; j < num_values; j++) - values[j] = data[j]; - } + hypre_ParVectorGetValues(par_vector, num_values, (HYPRE_BigInt *) indices, values); return hypre_error_flag; } @@ -811,240 +737,13 @@ hypre_IJVectorGetValuesPar(hypre_IJVector *vector, * partition is being used. *****************************************************************************/ -#ifndef HYPRE_NO_GLOBAL_PARTITION - -HYPRE_Int -hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, - HYPRE_Int max_off_proc_elmts, - HYPRE_Int current_num_elmts, - HYPRE_BigInt *off_proc_i, - HYPRE_Complex *off_proc_data) -{ - MPI_Comm comm = hypre_IJVectorComm(vector); - hypre_ParVector *par_vector = ( hypre_ParVector *) hypre_IJVectorObject(vector); - hypre_MPI_Request *requests = NULL; - hypre_MPI_Status *status = NULL; - HYPRE_Int i, j, j2; - HYPRE_Int iii, indx, ip; - HYPRE_BigInt row, first_index; - HYPRE_Int proc_id, num_procs, my_id; - HYPRE_Int num_sends, num_sends2; - HYPRE_Int num_recvs; - HYPRE_Int num_requests; - HYPRE_Int vec_start, vec_len; - HYPRE_Int *send_procs; - HYPRE_BigInt *send_i; - HYPRE_Int *send_map_starts; - HYPRE_Int *recv_procs; - HYPRE_BigInt *recv_i; - HYPRE_Int *recv_vec_starts; - HYPRE_Int *info; - HYPRE_Int *int_buffer; - HYPRE_Int *proc_id_mem; - HYPRE_BigInt *partitioning; - HYPRE_Int *displs; - HYPRE_Int *recv_buf; - HYPRE_Complex *send_data; - HYPRE_Complex *recv_data; - HYPRE_Complex *data = hypre_VectorData(hypre_ParVectorLocalVector(par_vector)); - - hypre_MPI_Comm_size(comm,&num_procs); - hypre_MPI_Comm_rank(comm, &my_id); - partitioning = hypre_IJVectorPartitioning(vector); - - first_index = partitioning[my_id]; - - info = hypre_CTAlloc(HYPRE_Int, num_procs, HYPRE_MEMORY_HOST); - proc_id_mem = hypre_CTAlloc(HYPRE_Int, current_num_elmts, HYPRE_MEMORY_HOST); - for (i=0; i < current_num_elmts; i++) - { - row = off_proc_i[i]; - proc_id = hypre_FindProc(partitioning,row,num_procs); - proc_id_mem[i] = proc_id; - info[proc_id]++; - } - - /* determine send_procs and amount of data to be sent */ - num_sends = 0; - for (i=0; i < num_procs; i++) - { - if (info[i]) - { - num_sends++; - } - } - num_sends2 = 2*num_sends; - send_procs = hypre_CTAlloc(HYPRE_Int, num_sends, HYPRE_MEMORY_HOST); - send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1, HYPRE_MEMORY_HOST); - int_buffer = hypre_CTAlloc(HYPRE_Int, num_sends2, HYPRE_MEMORY_HOST); - j = 0; - j2 = 0; - send_map_starts[0] = 0; - for (i=0; i < num_procs; i++) - { - if (info[i]) - { - send_procs[j++] = i; - send_map_starts[j] = send_map_starts[j-1]+info[i]; - int_buffer[j2++] = i; - int_buffer[j2++] = info[i]; - } - } - - hypre_MPI_Allgather(&num_sends2,1,HYPRE_MPI_INT,info,1,HYPRE_MPI_INT,comm); - - displs = hypre_CTAlloc(HYPRE_Int, num_procs+1, HYPRE_MEMORY_HOST); - displs[0] = 0; - for (i=1; i < num_procs+1; i++) - displs[i] = displs[i-1]+info[i-1]; - recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs], HYPRE_MEMORY_HOST); - - hypre_MPI_Allgatherv(int_buffer,num_sends2,HYPRE_MPI_INT,recv_buf,info,displs, - HYPRE_MPI_INT,comm); - - hypre_TFree(int_buffer, HYPRE_MEMORY_HOST); - hypre_TFree(info, HYPRE_MEMORY_HOST); - - /* determine recv procs and amount of data to be received */ - num_recvs = 0; - for (j=0; j < displs[num_procs]; j+=2) - { - if (recv_buf[j] == my_id) - num_recvs++; - } - - recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs, HYPRE_MEMORY_HOST); - recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1, HYPRE_MEMORY_HOST); - - j2 = 0; - recv_vec_starts[0] = 0; - for (i=0; i < num_procs; i++) - { - for (j=displs[i]; j < displs[i+1]; j+=2) - { - if (recv_buf[j] == my_id) - { - recv_procs[j2++] = i; - recv_vec_starts[j2] = recv_vec_starts[j2-1]+recv_buf[j+1]; - } - if (j2 == num_recvs) break; - } - } - hypre_TFree(recv_buf, HYPRE_MEMORY_HOST); - hypre_TFree(displs, HYPRE_MEMORY_HOST); - - /* set up data to be sent to send procs */ - /* send_i contains for each send proc - indices, send_data contains corresponding values */ - - send_i = hypre_CTAlloc(HYPRE_BigInt, send_map_starts[num_sends], HYPRE_MEMORY_HOST); - send_data = hypre_CTAlloc(HYPRE_Complex, send_map_starts[num_sends], HYPRE_MEMORY_HOST); - recv_i = hypre_CTAlloc(HYPRE_BigInt, recv_vec_starts[num_recvs], HYPRE_MEMORY_HOST); - recv_data = hypre_CTAlloc(HYPRE_Complex, recv_vec_starts[num_recvs], HYPRE_MEMORY_HOST); - - for (i=0; i < current_num_elmts; i++) - { - proc_id = proc_id_mem[i]; - indx = hypre_BinarySearch(send_procs,proc_id,num_sends); - iii = send_map_starts[indx]; - send_i[iii] = off_proc_i[i]; - send_data[iii] = off_proc_data[i]; - send_map_starts[indx]++; - } - - hypre_TFree(proc_id_mem, HYPRE_MEMORY_HOST); - - for (i=num_sends; i > 0; i--) - { - send_map_starts[i] = send_map_starts[i-1]; - } - send_map_starts[0] = 0; - - num_requests = num_recvs+num_sends; - - requests = hypre_CTAlloc(hypre_MPI_Request, num_requests, HYPRE_MEMORY_HOST); - status = hypre_CTAlloc(hypre_MPI_Status, num_requests, HYPRE_MEMORY_HOST); - - j=0; - for (i=0; i < num_recvs; i++) - { - vec_start = recv_vec_starts[i]; - vec_len = recv_vec_starts[i+1] - vec_start; - ip = recv_procs[i]; - hypre_MPI_Irecv(&recv_i[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - - for (i=0; i < num_sends; i++) - { - vec_start = send_map_starts[i]; - vec_len = send_map_starts[i+1] - vec_start; - ip = send_procs[i]; - hypre_MPI_Isend(&send_i[vec_start], vec_len, HYPRE_MPI_BIG_INT, - ip, 0, comm, &requests[j++]); - } - - if (num_requests) - { - hypre_MPI_Waitall(num_requests, requests, status); - } - - j=0; - for (i=0; i < num_recvs; i++) - { - vec_start = recv_vec_starts[i]; - vec_len = recv_vec_starts[i+1] - vec_start; - ip = recv_procs[i]; - hypre_MPI_Irecv(&recv_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - - for (i=0; i < num_sends; i++) - { - vec_start = send_map_starts[i]; - vec_len = send_map_starts[i+1] - vec_start; - ip = send_procs[i]; - hypre_MPI_Isend(&send_data[vec_start], vec_len, HYPRE_MPI_COMPLEX, - ip, 0, comm, &requests[j++]); - } - - if (num_requests) - { - hypre_MPI_Waitall(num_requests, requests, status); - } - - hypre_TFree(requests, HYPRE_MEMORY_HOST); - hypre_TFree(status, HYPRE_MEMORY_HOST); - hypre_TFree(send_i, HYPRE_MEMORY_HOST); - hypre_TFree(send_data, HYPRE_MEMORY_HOST); - hypre_TFree(send_procs, HYPRE_MEMORY_HOST); - hypre_TFree(send_map_starts, HYPRE_MEMORY_HOST); - hypre_TFree(recv_procs, HYPRE_MEMORY_HOST); - - for (i=0; i < recv_vec_starts[num_recvs]; i++) - { - row = recv_i[i]; - j = (HYPRE_Int)(row - first_index); - data[j] += recv_data[i]; - } - - hypre_TFree(recv_vec_starts, HYPRE_MEMORY_HOST); - hypre_TFree(recv_i, HYPRE_MEMORY_HOST); - hypre_TFree(recv_data, HYPRE_MEMORY_HOST); - - return hypre_error_flag; -} - -#else - -/* assumed partition version */ - HYPRE_Int -hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, - HYPRE_Int max_off_proc_elmts, - HYPRE_Int current_num_elmts, - HYPRE_BigInt *off_proc_i, - HYPRE_Complex *off_proc_data) +hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, + HYPRE_Int max_off_proc_elmts, + HYPRE_Int current_num_elmts, + HYPRE_MemoryLocation memory_location, + HYPRE_BigInt *off_proc_i, + HYPRE_Complex *off_proc_data) { HYPRE_Int myid; HYPRE_BigInt global_first_row, global_num_rows; @@ -1061,7 +760,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, HYPRE_Int counter; HYPRE_BigInt upper_bound; HYPRE_Int num_real_procs; - + HYPRE_BigInt *row_list=NULL; HYPRE_Int *a_proc_id=NULL, *orig_order=NULL; HYPRE_Int *real_proc_id = NULL, *us_real_proc_id = NULL; @@ -1082,9 +781,9 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, HYPRE_BigInt *ex_contact_buf=NULL; HYPRE_Complex *vector_data; HYPRE_Complex value; - + hypre_DataExchangeResponse response_obj1, response_obj2; - hypre_ProcListElements send_proc_obj; + hypre_ProcListElements send_proc_obj; MPI_Comm comm = hypre_IJVectorComm(vector); hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); @@ -1092,16 +791,37 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, hypre_IJAssumedPart *apart; hypre_MPI_Comm_rank(comm, &myid); - + global_num_rows = hypre_IJVectorGlobalNumRows(vector); global_first_row = hypre_IJVectorGlobalFirstRow(vector); - - /* verify that we have created the assumed partition */ + if (memory_location == HYPRE_MEMORY_DEVICE) + { + HYPRE_BigInt *off_proc_i_h = hypre_TAlloc(HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST); + HYPRE_Complex *off_proc_data_h = hypre_TAlloc(HYPRE_Complex, current_num_elmts, HYPRE_MEMORY_HOST); + + hypre_TMemcpy(off_proc_i_h, off_proc_i, HYPRE_BigInt, current_num_elmts, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(off_proc_data_h, off_proc_data, HYPRE_Complex, current_num_elmts, HYPRE_MEMORY_HOST, HYPRE_MEMORY_DEVICE); + + off_proc_i = off_proc_i_h; + off_proc_data = off_proc_data_h; + } + + /* call hypre_IJVectorAddToValuesParCSR directly inside this function + * with one chunk of data */ + HYPRE_Int off_proc_nelm_recv_cur = 0; + HYPRE_Int off_proc_nelm_recv_max = 0; + HYPRE_BigInt *off_proc_i_recv = NULL; + HYPRE_Complex *off_proc_data_recv = NULL; + HYPRE_BigInt *off_proc_i_recv_d = NULL; + HYPRE_Complex *off_proc_data_recv_d = NULL; + + /* verify that we have created the assumed partition */ if (hypre_IJVectorAssumedPart(vector) == NULL) { hypre_IJVectorCreateAssumedPartition(vector); } + apart = (hypre_IJAssumedPart*) hypre_IJVectorAssumedPart(vector); /* get the assumed processor id for each row */ @@ -1114,17 +834,17 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, { for (i=0; i < current_num_elmts; i++) { - row = off_proc_i[i]; + row = off_proc_i[i]; row_list[i] = row; hypre_GetAssumedPartitionProcFromRow(comm, row, global_first_row, - global_num_rows, &proc_id); + global_num_rows, &proc_id); a_proc_id[i] = proc_id; orig_order[i] = i; } /* now we need to find the actual order of each row - sort on row - this will result in proc ids sorted also...*/ - + hypre_BigQsortb2i(row_list, a_proc_id, orig_order, 0, current_num_elmts -1); /* calculate the number of contacts */ @@ -1132,15 +852,15 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, last_proc = a_proc_id[0]; for (i=1; i < current_num_elmts; i++) { - if (a_proc_id[i] > last_proc) + if (a_proc_id[i] > last_proc) { ex_num_contacts++; last_proc = a_proc_id[i]; } } - + } - + /* now we will go through a create a contact list - need to contact assumed processors and find out who the actual row owner is - we will contact with a range (2 numbers) */ @@ -1151,7 +871,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, counter = 0; range_end = -1; - for (i=0; i< current_num_elmts; i++) + for (i=0; i< current_num_elmts; i++) { if (row_list[i] > range_end) { @@ -1160,35 +880,35 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, /* end of prev. range */ if (counter > 0) ex_contact_buf[counter*2 - 1] = row_list[i-1]; - + /*start new range*/ - ex_contact_procs[counter] = proc_id; + ex_contact_procs[counter] = proc_id; ex_contact_vec_starts[counter] = counter*2; ex_contact_buf[counter*2] = row_list[i]; counter++; - + hypre_GetAssumedPartitionRowRange(comm, proc_id, global_first_row, - global_num_rows, &range_start, &range_end); + global_num_rows, &range_start, &range_end); } } /*finish the starts*/ ex_contact_vec_starts[counter] = counter*2; /*finish the last range*/ - if (counter > 0) + if (counter > 0) ex_contact_buf[counter*2 - 1] = row_list[current_num_elmts - 1]; /* create response object - can use same fill response as used in the commpkg routine */ response_obj1.fill_response = hypre_RangeFillResponseIJDetermineRecvProcs; - response_obj1.data1 = apart; /* this is necessary so we can fill responses*/ + response_obj1.data1 = apart; /* this is necessary so we can fill responses*/ response_obj1.data2 = NULL; - + max_response_size = 6; /* 6 means we can fit 3 ranges*/ - - hypre_DataExchangeList(ex_num_contacts, ex_contact_procs, - ex_contact_buf, ex_contact_vec_starts, sizeof(HYPRE_BigInt), - sizeof(HYPRE_BigInt), &response_obj1, max_response_size, 4, + + hypre_DataExchangeList(ex_num_contacts, ex_contact_procs, + ex_contact_buf, ex_contact_vec_starts, sizeof(HYPRE_BigInt), + sizeof(HYPRE_BigInt), &response_obj1, max_response_size, 4, comm, (void**) &response_buf, &response_buf_starts); /* now response_buf contains a proc_id followed by an upper bound for the @@ -1202,9 +922,9 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, a_proc_id = NULL; /*how many ranges were returned?*/ - num_ranges = response_buf_starts[ex_num_contacts]; + num_ranges = response_buf_starts[ex_num_contacts]; num_ranges = num_ranges/2; - + prev_id = -1; j = 0; counter = 0; @@ -1216,15 +936,15 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, upper_bound = response_buf[i*2+1]; counter = 0; tmp_id = (HYPRE_Int)response_buf[i*2]; - + /* loop through row_list entries - counting how many are in the range */ - while (j < current_num_elmts && row_list[j] <= upper_bound) + while (j < current_num_elmts && row_list[j] <= upper_bound) { real_proc_id[j] = tmp_id; j++; - counter++; + counter++; } - if (counter > 0 && tmp_id != prev_id) + if (counter > 0 && tmp_id != prev_id) { num_real_procs++; } @@ -1236,20 +956,20 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, HYPRE_Int and HYPRE_Complex data. (row number and value) - we will send everything as a void since we may not know the rel sizes of ints and doubles */ - + /* first find out how many elements to send per proc - so we can do storage */ - + complex_size = sizeof(HYPRE_Complex); big_int_size = sizeof(HYPRE_BigInt); - + obj_size_bytes = hypre_max(big_int_size, complex_size); - + ex_contact_procs = hypre_CTAlloc(HYPRE_Int, num_real_procs, HYPRE_MEMORY_HOST); num_rows_per_proc = hypre_CTAlloc(HYPRE_Int, num_real_procs, HYPRE_MEMORY_HOST); - + counter = 0; - + if (num_real_procs > 0 ) { ex_contact_procs[0] = real_proc_id[0]; @@ -1270,7 +990,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, } } } - + /* calculate total storage and make vec_starts arrays */ storage = 0; ex_contact_vec_starts = hypre_CTAlloc(HYPRE_Int, num_real_procs + 1, HYPRE_MEMORY_HOST); @@ -1280,7 +1000,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, { storage += 1 + 2* num_rows_per_proc[i]; ex_contact_vec_starts[i+1] = -storage-1; /* need negative for next loop */ - } + } /*void_contact_buf = hypre_MAlloc(storage*obj_size_bytes);*/ void_contact_buf = hypre_CTAlloc(char, storage*obj_size_bytes, HYPRE_MEMORY_HOST); @@ -1288,7 +1008,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, /* set up data to be sent to send procs */ /* for each proc, ex_contact_buf_d contains #rows, row #, data, etc. */ - + /* un-sort real_proc_id - we want to access data arrays in order */ us_real_proc_id = hypre_CTAlloc(HYPRE_Int, current_num_elmts, HYPRE_MEMORY_HOST); @@ -1324,7 +1044,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, in++; } - /* add row # */ + /* add row # */ hypre_TMemcpy( index_ptr, &row, HYPRE_BigInt,1 , HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); index_ptr = (void *) ((char *) index_ptr + obj_size_bytes); in++; @@ -1334,13 +1054,13 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, hypre_TMemcpy( index_ptr, &tmp_complex, HYPRE_Complex, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); index_ptr = (void *) ((char *) index_ptr + obj_size_bytes); in++; - + /* increment the indexes to keep track of where we are - fix later */ ex_contact_vec_starts[indx] = in; } - + /* some clean up */ - + hypre_TFree(response_buf, HYPRE_MEMORY_HOST); hypre_TFree(response_buf_starts, HYPRE_MEMORY_HOST); @@ -1374,7 +1094,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, send_proc_obj.storage_length = num_real_procs + 5; send_proc_obj.id = NULL; /* don't care who sent it to us */ send_proc_obj.vec_starts = - hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1, HYPRE_MEMORY_HOST); + hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1, HYPRE_MEMORY_HOST); send_proc_obj.vec_starts[0] = 0; send_proc_obj.element_storage_length = storage + 20; send_proc_obj.v_elements = @@ -1386,9 +1106,9 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, max_response_size = 0; - hypre_DataExchangeList(num_real_procs, ex_contact_procs, + hypre_DataExchangeList(num_real_procs, ex_contact_procs, void_contact_buf, ex_contact_vec_starts, obj_size_bytes, - 0, &response_obj2, max_response_size, 5, + 0, &response_obj2, max_response_size, 5, comm, (void **) &response_buf, &response_buf_starts); /***********************************/ @@ -1403,12 +1123,12 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, /* Now we can unpack the send_proc_objects and either set or add to the vector data */ - num_recvs = send_proc_obj.length; + num_recvs = send_proc_obj.length; /* alias */ recv_data_ptr = send_proc_obj.v_elements; recv_starts = send_proc_obj.vec_starts; - + vector_data = hypre_VectorData(hypre_ParVectorLocalVector(par_vector)); first_index = hypre_ParVectorFirstIndex(par_vector); @@ -1424,7 +1144,7 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, for (j=0; j < row_count; j++) /* for each row: unpack info */ { /* row # */ - hypre_TMemcpy( &row, recv_data_ptr, HYPRE_BigInt, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + hypre_TMemcpy( &row, recv_data_ptr, HYPRE_BigInt, 1, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); recv_data_ptr = (void *) ((char *)recv_data_ptr + obj_size_bytes); indx++; @@ -1433,15 +1153,55 @@ hypre_IJVectorAssembleOffProcValsPar( hypre_IJVector *vector, recv_data_ptr = (void *) ((char *)recv_data_ptr + obj_size_bytes); indx++; - k = (HYPRE_Int)(row - first_index - global_first_row); - vector_data[k] += value; + if (memory_location == HYPRE_MEMORY_HOST) + { + k = (HYPRE_Int)(row - first_index - global_first_row); + vector_data[k] += value; + } + else + { + if (off_proc_nelm_recv_cur >= off_proc_nelm_recv_max) + { + off_proc_nelm_recv_max = 2 * (off_proc_nelm_recv_cur + 1); + off_proc_i_recv = hypre_TReAlloc(off_proc_i_recv, HYPRE_BigInt, off_proc_nelm_recv_max, HYPRE_MEMORY_HOST); + off_proc_data_recv = hypre_TReAlloc(off_proc_data_recv, HYPRE_Complex, off_proc_nelm_recv_max, HYPRE_MEMORY_HOST); + } + off_proc_i_recv[off_proc_nelm_recv_cur] = row; + off_proc_data_recv[off_proc_nelm_recv_cur] = value; + off_proc_nelm_recv_cur ++; + } } } - + + if (memory_location == HYPRE_MEMORY_DEVICE) + { + off_proc_i_recv_d = hypre_TAlloc(HYPRE_BigInt, off_proc_nelm_recv_cur, HYPRE_MEMORY_DEVICE); + off_proc_data_recv_d = hypre_TAlloc(HYPRE_Complex, off_proc_nelm_recv_cur, HYPRE_MEMORY_DEVICE); + + hypre_TMemcpy(off_proc_i_recv_d, off_proc_i_recv, HYPRE_BigInt, off_proc_nelm_recv_cur, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); + hypre_TMemcpy(off_proc_data_recv_d, off_proc_data_recv, HYPRE_Complex, off_proc_nelm_recv_cur, + HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_IJVectorSetAddValuesParDevice(vector, off_proc_nelm_recv_cur, off_proc_i_recv_d, off_proc_data_recv_d, "add"); +#endif + } + hypre_TFree(send_proc_obj.v_elements, HYPRE_MEMORY_HOST); hypre_TFree(send_proc_obj.vec_starts, HYPRE_MEMORY_HOST); - + + if (memory_location == HYPRE_MEMORY_DEVICE) + { + hypre_TFree(off_proc_i, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_data, HYPRE_MEMORY_HOST); + } + + hypre_TFree(off_proc_i_recv, HYPRE_MEMORY_HOST); + hypre_TFree(off_proc_data_recv, HYPRE_MEMORY_HOST); + + hypre_TFree(off_proc_i_recv_d, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_data_recv_d, HYPRE_MEMORY_DEVICE); + return hypre_error_flag; } - -#endif diff --git a/src/IJ_mv/IJVector_parcsr_device.c b/src/IJ_mv/IJVector_parcsr_device.c new file mode 100644 index 000000000..46e65ab0b --- /dev/null +++ b/src/IJ_mv/IJVector_parcsr_device.c @@ -0,0 +1,364 @@ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/****************************************************************************** + * + * IJVector_ParCSR interface + * + *****************************************************************************/ + +#include "_hypre_IJ_mv.h" +#include "_hypre_utilities.hpp" + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + +template +struct hypre_IJVectorAssembleFunctor : public thrust::binary_function< thrust::tuple, thrust::tuple, thrust::tuple > +{ + typedef thrust::tuple Tuple; + + __device__ Tuple operator()(const Tuple& x, const Tuple& y ) + { + return thrust::make_tuple( hypre_max(thrust::get<0>(x), thrust::get<0>(y)), thrust::get<1>(x) + thrust::get<1>(y) ); + } +}; + +HYPRE_Int hypre_IJVectorAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0, HYPRE_Complex *A0, HYPRE_Int *N1, HYPRE_BigInt **I1, HYPRE_Complex **A1); + +HYPRE_Int hypre_IJVectorAssembleSortAndReduce1(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0, HYPRE_Complex *A0, HYPRE_Int *N1, HYPRE_BigInt **I1, char **X1, HYPRE_Complex **A1 ); + +__global__ void hypreCUDAKernel_IJVectorAssemblePar(HYPRE_Int n, HYPRE_Complex *x, HYPRE_BigInt *map, HYPRE_BigInt offset, char *SorA, HYPRE_Complex *y); + +/* + */ +HYPRE_Int +hypre_IJVectorSetAddValuesParDevice(hypre_IJVector *vector, + HYPRE_Int num_values, + const HYPRE_BigInt *indices, + const HYPRE_Complex *values, + const char *action) +{ + HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); + HYPRE_BigInt vec_start, vec_stop; + vec_start = IJpartitioning[0]; + vec_stop = IJpartitioning[1]-1; + HYPRE_Int nrows = vec_stop - vec_start + 1; + const char SorA = action[0] == 's' ? 1 : 0; + + if (num_values <= 0) + { + return hypre_error_flag; + } + + /* this is a special use to set/add local values */ + if (!indices) + { + hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); + hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_vector); + HYPRE_Int num_values2 = hypre_min( hypre_VectorSize(local_vector), num_values ); + HYPRE_BigInt *indices2 = hypre_TAlloc(HYPRE_BigInt, num_values2, HYPRE_MEMORY_DEVICE); + HYPRE_THRUST_CALL(sequence, indices2, indices2 + num_values2, vec_start); + + hypre_IJVectorSetAddValuesParDevice(vector, num_values2, indices2, values, action); + + hypre_TFree(indices2, HYPRE_MEMORY_DEVICE); + + return hypre_error_flag; + } + + hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); + + if (!aux_vector) + { + hypre_AuxParVectorCreate(&aux_vector); + hypre_AuxParVectorInitialize_v2(aux_vector, HYPRE_MEMORY_DEVICE); + hypre_IJVectorTranslator(vector) = aux_vector; + } + + HYPRE_Int stack_elmts_max = hypre_AuxParVectorMaxStackElmts(aux_vector); + HYPRE_Int stack_elmts_current = hypre_AuxParVectorCurrentStackElmts(aux_vector); + HYPRE_Int stack_elmts_required = stack_elmts_current + num_values; + HYPRE_BigInt *stack_i = hypre_AuxParVectorStackI(aux_vector); + HYPRE_Complex *stack_data = hypre_AuxParVectorStackData(aux_vector); + char *stack_sora = hypre_AuxParVectorStackSorA(aux_vector); + + if ( stack_elmts_max < stack_elmts_required ) + { + HYPRE_Int stack_elmts_max_new = nrows * hypre_AuxParVectorInitAllocFactor(aux_vector); + if (hypre_AuxParVectorUsrOffProcElmts(aux_vector) >= 0) + { + stack_elmts_max_new += hypre_AuxParVectorUsrOffProcElmts(aux_vector); + } + stack_elmts_max_new = hypre_max(stack_elmts_max * hypre_AuxParVectorGrowFactor(aux_vector), stack_elmts_max_new); + stack_elmts_max_new = hypre_max(stack_elmts_required, stack_elmts_max_new); + + hypre_AuxParVectorStackI(aux_vector) = stack_i = + hypre_TReAlloc_v2(stack_i, HYPRE_BigInt, stack_elmts_max, HYPRE_BigInt, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParVectorStackData(aux_vector) = stack_data = + hypre_TReAlloc_v2(stack_data, HYPRE_Complex, stack_elmts_max, HYPRE_Complex, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + hypre_AuxParVectorStackSorA(aux_vector) = stack_sora = + hypre_TReAlloc_v2(stack_sora, char, stack_elmts_max, char, stack_elmts_max_new, HYPRE_MEMORY_DEVICE); + + hypre_AuxParVectorMaxStackElmts(aux_vector) = stack_elmts_max_new; + } + + HYPRE_THRUST_CALL(fill_n, stack_sora + stack_elmts_current, num_values, SorA); + + hypre_TMemcpy(stack_i + stack_elmts_current, indices, HYPRE_BigInt, num_values, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + hypre_TMemcpy(stack_data + stack_elmts_current, values, HYPRE_Complex, num_values, HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_DEVICE); + + hypre_AuxParVectorCurrentStackElmts(aux_vector) += num_values; + + return hypre_error_flag; +} + +/****************************************************************************** + * + * + *****************************************************************************/ + +HYPRE_Int +hypre_IJVectorAssembleParDevice(hypre_IJVector *vector) +{ + MPI_Comm comm = hypre_IJVectorComm(vector); + HYPRE_BigInt *IJpartitioning = hypre_IJVectorPartitioning(vector); + HYPRE_BigInt vec_start, vec_stop; + vec_start = IJpartitioning[0]; + vec_stop = IJpartitioning[1]-1; + hypre_ParVector *par_vector = (hypre_ParVector*) hypre_IJVectorObject(vector); + hypre_AuxParVector *aux_vector = (hypre_AuxParVector*) hypre_IJVectorTranslator(vector); + + if (!aux_vector) + { + return hypre_error_flag; + } + + if (!par_vector) + { + return hypre_error_flag; + } + + HYPRE_Int nelms = hypre_AuxParVectorCurrentStackElmts(aux_vector); + HYPRE_BigInt *stack_i = hypre_AuxParVectorStackI(aux_vector); + HYPRE_Complex *stack_data = hypre_AuxParVectorStackData(aux_vector); + char *stack_sora = hypre_AuxParVectorStackSorA(aux_vector); + + in_range pred(vec_start, vec_stop); + HYPRE_Int nelms_on = HYPRE_THRUST_CALL(count_if, stack_i, stack_i+nelms, pred); + HYPRE_Int nelms_off = nelms - nelms_on; + HYPRE_Int nelms_off_max; + hypre_MPI_Allreduce(&nelms_off, &nelms_off_max, 1, HYPRE_MPI_INT, hypre_MPI_MAX, comm); + + /* communicate for aux off-proc and add to remote aux on-proc */ + if (nelms_off_max) + { + HYPRE_Int new_nnz = 0; + HYPRE_BigInt *new_i = NULL; + HYPRE_Complex *new_data = NULL; + + if (nelms_off) + { + /* copy off-proc entries out of stack and remove from stack */ + HYPRE_BigInt *off_proc_i = hypre_TAlloc(HYPRE_BigInt, nelms_off, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *off_proc_data = hypre_TAlloc(HYPRE_Complex, nelms_off, HYPRE_MEMORY_DEVICE); + char *off_proc_sora = hypre_TAlloc(char, nelms_off, HYPRE_MEMORY_DEVICE); + char *is_on_proc = hypre_TAlloc(char, nelms, HYPRE_MEMORY_DEVICE); + + HYPRE_THRUST_CALL(transform, stack_i, stack_i + nelms, is_on_proc, pred); + + auto new_end1 = HYPRE_THRUST_CALL( + copy_if, + thrust::make_zip_iterator(thrust::make_tuple(stack_i, stack_data, stack_sora )), /* first */ + thrust::make_zip_iterator(thrust::make_tuple(stack_i + nelms, stack_data + nelms, stack_sora + nelms)), /* last */ + is_on_proc, /* stencil */ + thrust::make_zip_iterator(thrust::make_tuple(off_proc_i, off_proc_data, off_proc_sora)), /* result */ + thrust::not1(thrust::identity()) ); + + hypre_assert(thrust::get<0>(new_end1.get_iterator_tuple()) - off_proc_i == nelms_off); + + /* remove off-proc entries from stack */ + auto new_end2 = HYPRE_THRUST_CALL( + remove_if, + thrust::make_zip_iterator(thrust::make_tuple(stack_i, stack_data, stack_sora )), /* first */ + thrust::make_zip_iterator(thrust::make_tuple(stack_i + nelms, stack_data + nelms, stack_sora + nelms)), /* last */ + is_on_proc, /* stencil */ + thrust::not1(thrust::identity()) ); + + hypre_assert(thrust::get<0>(new_end2.get_iterator_tuple()) - stack_i == nelms_on); + + hypre_AuxParVectorCurrentStackElmts(aux_vector) = nelms_on; + + hypre_TFree(is_on_proc, HYPRE_MEMORY_DEVICE); + + /* sort and reduce */ + hypre_IJVectorAssembleSortAndReduce3(nelms_off, off_proc_i, off_proc_sora, off_proc_data, &new_nnz, &new_i, &new_data); + + hypre_TFree(off_proc_i, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_data, HYPRE_MEMORY_DEVICE); + hypre_TFree(off_proc_sora, HYPRE_MEMORY_DEVICE); + } + + /* send new_i/data to remote processes and the receivers call addtovalues */ + hypre_IJVectorAssembleOffProcValsPar(vector, -1, new_nnz, HYPRE_MEMORY_DEVICE, new_i, new_data); + + hypre_TFree(new_i, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_data, HYPRE_MEMORY_DEVICE); + } + + /* Note: the stack might have been changed in hypre_IJVectorAssembleOffProcValsPar, + * so must get the size and the pointers again */ + nelms = hypre_AuxParVectorCurrentStackElmts(aux_vector); + stack_i = hypre_AuxParVectorStackI(aux_vector); + stack_data = hypre_AuxParVectorStackData(aux_vector); + stack_sora = hypre_AuxParVectorStackSorA(aux_vector); + +#ifdef HYPRE_DEBUG + /* the stack should only have on-proc elements now */ + HYPRE_Int tmp = HYPRE_THRUST_CALL(count_if, stack_i, stack_i+nelms, pred); + hypre_assert(nelms == tmp); +#endif + + if (nelms) + { + HYPRE_Int new_nnz; + HYPRE_BigInt *new_i; + HYPRE_Complex *new_data; + char *new_sora; + + /* sort and reduce */ + hypre_IJVectorAssembleSortAndReduce1(nelms, stack_i, stack_sora, stack_data, &new_nnz, &new_i, &new_sora, &new_data); + + /* set/add to local vector */ + dim3 bDim = hypre_GetDefaultCUDABlockDimension(); + dim3 gDim = hypre_GetDefaultCUDAGridDimension(new_nnz, "thread", bDim); + HYPRE_CUDA_LAUNCH( hypreCUDAKernel_IJVectorAssemblePar, gDim, bDim, new_nnz, new_data, new_i, vec_start, new_sora, + hypre_VectorData(hypre_ParVectorLocalVector(par_vector)) ); + + hypre_TFree(new_i, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_data, HYPRE_MEMORY_DEVICE); + hypre_TFree(new_sora, HYPRE_MEMORY_DEVICE); + } + + hypre_AuxParVectorDestroy(aux_vector); + hypre_IJVectorTranslator(vector) = NULL; + + return hypre_error_flag; +} + +/* helper routine used in hypre_IJVectorAssembleParCSRDevice: + * 1. sort (X0, A0) with key I0 + * 2. for each segment in I0, zero out in A0 all before the last `set' + * 3. reduce A0 [with sum] and reduce X0 [with max] + * N0: input size; N1: size after reduction (<= N0) + * Note: (I1, X1, A1) are not resized to N1 but have size N0 + */ +HYPRE_Int +hypre_IJVectorAssembleSortAndReduce1(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0, HYPRE_Complex *A0, + HYPRE_Int *N1, HYPRE_BigInt **I1, char **X1, HYPRE_Complex **A1 ) +{ + HYPRE_THRUST_CALL( stable_sort_by_key, + I0, + I0 + N0, + thrust::make_zip_iterator(thrust::make_tuple(X0, A0)) ); + + HYPRE_BigInt *I = hypre_TAlloc(HYPRE_BigInt, N0, HYPRE_MEMORY_DEVICE); + char *X = hypre_TAlloc(char, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); + + /* output X: 0: keep, 1: zero-out */ + HYPRE_THRUST_CALL( + exclusive_scan_by_key, + make_reverse_iterator(thrust::device_pointer_cast(I0)+N0), /* key begin */ + make_reverse_iterator(thrust::device_pointer_cast(I0)), /* key end */ + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), /* input value begin */ + make_reverse_iterator(thrust::device_pointer_cast(X) +N0), /* output value begin */ + char(0), /* init */ + thrust::equal_to(), + thrust::maximum() ); + + HYPRE_THRUST_CALL(replace_if, A0, A0 + N0, X, thrust::identity(), 0.0); + + auto new_end = HYPRE_THRUST_CALL( + reduce_by_key, + I0, /* keys_first */ + I0 + N0, /* keys_last */ + thrust::make_zip_iterator(thrust::make_tuple(X0, A0 )), /* values_first */ + I, /* keys_output */ + thrust::make_zip_iterator(thrust::make_tuple(X, A )), /* values_output */ + thrust::equal_to(), /* binary_pred */ + hypre_IJVectorAssembleFunctor() /* binary_op */); + + *N1 = new_end.first - I; + *I1 = I; + *X1 = X; + *A1 = A; + + return hypre_error_flag; +} + +HYPRE_Int +hypre_IJVectorAssembleSortAndReduce3(HYPRE_Int N0, HYPRE_BigInt *I0, char *X0, HYPRE_Complex *A0, + HYPRE_Int *N1, HYPRE_BigInt **I1, HYPRE_Complex **A1) +{ + HYPRE_THRUST_CALL( stable_sort_by_key, + I0, + I0 + N0, + thrust::make_zip_iterator(thrust::make_tuple(X0, A0)) ); + + HYPRE_Int *I = hypre_TAlloc(HYPRE_Int, N0, HYPRE_MEMORY_DEVICE); + HYPRE_Complex *A = hypre_TAlloc(HYPRE_Complex, N0, HYPRE_MEMORY_DEVICE); + + /* output in X0: 0: keep, 1: zero-out */ + HYPRE_THRUST_CALL( + inclusive_scan_by_key, + make_reverse_iterator(thrust::device_pointer_cast(I0)+N0), /* key begin */ + make_reverse_iterator(thrust::device_pointer_cast(I0)), /* key end */ + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), /* input value begin */ + make_reverse_iterator(thrust::device_pointer_cast(X0)+N0), /* output value begin */ + thrust::equal_to(), + thrust::maximum() ); + + HYPRE_THRUST_CALL(replace_if, A0, A0 + N0, X0, thrust::identity(), 0.0); + + auto new_end = HYPRE_THRUST_CALL( + reduce_by_key, + I0, /* keys_first */ + I0 + N0, /* keys_last */ + A0, /* values_first */ + I, /* keys_output */ + A /* values_output */); + + *N1 = new_end.second - A; + *I1 = I; + *A1 = A; + + return hypre_error_flag; +} + +/* y[map[i]-offset] = x[i] or y[map[i]] += x[i] depending on SorA, + * same index cannot appear more than once in map */ +__global__ void +hypreCUDAKernel_IJVectorAssemblePar(HYPRE_Int n, HYPRE_Complex *x, HYPRE_BigInt *map, HYPRE_BigInt offset, char *SorA, HYPRE_Complex *y) +{ + HYPRE_Int i = hypre_cuda_get_grid_thread_id<1,1>(); + + if (i >= n) + { + return; + } + + if (SorA[i]) + { + y[map[i]-offset] = x[i]; + } + else + { + y[map[i]-offset] += x[i]; + } +} + +#endif diff --git a/src/IJ_mv/IJ_matrix.h b/src/IJ_mv/IJ_matrix.h index f43f49332..58e3b0fcb 100644 --- a/src/IJ_mv/IJ_matrix.h +++ b/src/IJ_mv/IJ_matrix.h @@ -20,7 +20,7 @@ typedef struct hypre_IJMatrix_struct { - MPI_Comm comm; + MPI_Comm comm; HYPRE_BigInt *row_partitioning; /* distribution of rows across processors */ HYPRE_BigInt *col_partitioning; /* distribution of columns */ @@ -37,7 +37,6 @@ typedef struct hypre_IJMatrix_struct HYPRE_BigInt global_first_col; /* to be able to avoid using the global */ HYPRE_BigInt global_num_rows; /* global partition */ HYPRE_BigInt global_num_cols; - HYPRE_Int omp_flag; HYPRE_Int print_level; @@ -47,25 +46,34 @@ typedef struct hypre_IJMatrix_struct * Accessor macros: hypre_IJMatrix *--------------------------------------------------------------------------*/ -#define hypre_IJMatrixComm(matrix) ((matrix) -> comm) +#define hypre_IJMatrixComm(matrix) ((matrix) -> comm) +#define hypre_IJMatrixRowPartitioning(matrix) ((matrix) -> row_partitioning) +#define hypre_IJMatrixColPartitioning(matrix) ((matrix) -> col_partitioning) -#define hypre_IJMatrixRowPartitioning(matrix) ((matrix) -> row_partitioning) -#define hypre_IJMatrixColPartitioning(matrix) ((matrix) -> col_partitioning) +#define hypre_IJMatrixObjectType(matrix) ((matrix) -> object_type) +#define hypre_IJMatrixObject(matrix) ((matrix) -> object) +#define hypre_IJMatrixTranslator(matrix) ((matrix) -> translator) +#define hypre_IJMatrixAssumedPart(matrix) ((matrix) -> assumed_part) -#define hypre_IJMatrixObjectType(matrix) ((matrix) -> object_type) -#define hypre_IJMatrixObject(matrix) ((matrix) -> object) -#define hypre_IJMatrixTranslator(matrix) ((matrix) -> translator) -#define hypre_IJMatrixAssumedPart(matrix) ((matrix) -> assumed_part) +#define hypre_IJMatrixAssembleFlag(matrix) ((matrix) -> assemble_flag) -#define hypre_IJMatrixAssembleFlag(matrix) ((matrix) -> assemble_flag) +#define hypre_IJMatrixGlobalFirstRow(matrix) ((matrix) -> global_first_row) +#define hypre_IJMatrixGlobalFirstCol(matrix) ((matrix) -> global_first_col) +#define hypre_IJMatrixGlobalNumRows(matrix) ((matrix) -> global_num_rows) +#define hypre_IJMatrixGlobalNumCols(matrix) ((matrix) -> global_num_cols) +#define hypre_IJMatrixOMPFlag(matrix) ((matrix) -> omp_flag) +#define hypre_IJMatrixPrintLevel(matrix) ((matrix) -> print_level) +static inline HYPRE_MemoryLocation +hypre_IJMatrixMemoryLocation(hypre_IJMatrix *matrix) +{ + if ( hypre_IJMatrixObject(matrix) && hypre_IJMatrixObjectType(matrix) == HYPRE_PARCSR) + { + return hypre_ParCSRMatrixMemoryLocation( (hypre_ParCSRMatrix *) hypre_IJMatrixObject(matrix) ); + } -#define hypre_IJMatrixGlobalFirstRow(matrix) ((matrix) -> global_first_row) -#define hypre_IJMatrixGlobalFirstCol(matrix) ((matrix) -> global_first_col) -#define hypre_IJMatrixGlobalNumRows(matrix) ((matrix) -> global_num_rows) -#define hypre_IJMatrixGlobalNumCols(matrix) ((matrix) -> global_num_cols) -#define hypre_IJMatrixOMPFlag(matrix) ((matrix) -> omp_flag) -#define hypre_IJMatrixPrintLevel(matrix) ((matrix) -> print_level) + return HYPRE_MEMORY_UNDEFINED; +} /*-------------------------------------------------------------------------- * prototypes for operations on local objects @@ -83,4 +91,5 @@ HYPRE_Int hypre_GetIJMatrixISISMatrix( HYPRE_IJMatrix IJmatrix, RowMatrix *reference ) #endif -#endif +#endif /* #ifndef hypre_IJ_MATRIX_HEADER */ + diff --git a/src/IJ_mv/IJ_vector.h b/src/IJ_mv/IJ_vector.h index 8ee4ec18d..fa4a68bb4 100644 --- a/src/IJ_mv/IJ_vector.h +++ b/src/IJ_mv/IJ_vector.h @@ -22,23 +22,21 @@ typedef struct hypre_IJVector_struct { MPI_Comm comm; - HYPRE_BigInt *partitioning; /* Indicates partitioning over tasks */ + HYPRE_BigInt *partitioning; /* Indicates partitioning over tasks */ HYPRE_Int object_type; /* Indicates the type of "local storage" */ void *object; /* Structure for storing local portion */ void *translator; /* Structure for storing off processor - information */ + information */ - void *assumed_part; /* IJ Vector assumed partition */ - - HYPRE_BigInt global_first_row; /* these for data items are necessary */ - HYPRE_BigInt global_num_rows; /* to be able to avoid using the global */ - /* global partition */ - HYPRE_Int print_level; - + void *assumed_part; /* IJ Vector assumed partition */ + HYPRE_BigInt global_first_row; /* these for data items are necessary */ + HYPRE_BigInt global_num_rows; /* to be able to avoid using the global */ + /* global partition */ + HYPRE_Int print_level; } hypre_IJVector; @@ -46,27 +44,31 @@ typedef struct hypre_IJVector_struct * Accessor macros: hypre_IJVector *--------------------------------------------------------------------------*/ -#define hypre_IJVectorComm(vector) ((vector) -> comm) - -#define hypre_IJVectorPartitioning(vector) ((vector) -> partitioning) - -#define hypre_IJVectorObjectType(vector) ((vector) -> object_type) - -#define hypre_IJVectorObject(vector) ((vector) -> object) - -#define hypre_IJVectorTranslator(vector) ((vector) -> translator) - +#define hypre_IJVectorComm(vector) ((vector) -> comm) +#define hypre_IJVectorPartitioning(vector) ((vector) -> partitioning) +#define hypre_IJVectorObjectType(vector) ((vector) -> object_type) +#define hypre_IJVectorObject(vector) ((vector) -> object) +#define hypre_IJVectorTranslator(vector) ((vector) -> translator) #define hypre_IJVectorAssumedPart(vector) ((vector) -> assumed_part) - #define hypre_IJVectorGlobalFirstRow(vector) ((vector) -> global_first_row) +#define hypre_IJVectorGlobalNumRows(vector) ((vector) -> global_num_rows) +#define hypre_IJVectorPrintLevel(vector) ((vector) -> print_level) -#define hypre_IJVectorGlobalNumRows(vector) ((vector) -> global_num_rows) +static inline HYPRE_MemoryLocation +hypre_IJVectorMemoryLocation(hypre_IJVector *vector) +{ + if ( hypre_IJVectorObject(vector) && hypre_IJVectorObjectType(vector) == HYPRE_PARCSR) + { + return hypre_ParVectorMemoryLocation( (hypre_ParVector *) hypre_IJVectorObject(vector) ); + } -#define hypre_IJVectorPrintLevel(vector) ((vector) -> print_level) + return HYPRE_MEMORY_UNDEFINED; +} /*-------------------------------------------------------------------------- * prototypes for operations on local objects *--------------------------------------------------------------------------*/ /* #include "./internal_protos.h" */ -#endif +#endif /* #ifndef hypre_IJ_VECTOR_HEADER */ + diff --git a/src/IJ_mv/Makefile b/src/IJ_mv/Makefile index a013d1bc0..4a8feecc4 100644 --- a/src/IJ_mv/Makefile +++ b/src/IJ_mv/Makefile @@ -43,7 +43,13 @@ FILES =\ IJVector.c\ IJVector_parcsr.c -OBJS = ${FILES:.c=.o} +CUFILES =\ + IJMatrix_parcsr_device.c\ + IJVector_parcsr_device.c + +COBJS = ${FILES:.c=.o} +CUOBJS = ${CUFILES:.c=.obj} +OBJS = ${COBJS} ${CUOBJS} SONAME = libHYPRE_IJ_mv-${HYPRE_RELEASE_VERSION}${HYPRE_LIB_SUFFIX} @@ -62,7 +68,7 @@ install: libHYPRE_IJ_mv${HYPRE_LIB_SUFFIX} # cp -fR libHYPRE* $(HYPRE_LIB_INSTALL) clean: - rm -f *.o libHYPRE* + rm -f *.o* libHYPRE* rm -rf pchdir tca.map *inslog* distclean: clean diff --git a/src/IJ_mv/_hypre_IJ_mv.h b/src/IJ_mv/_hypre_IJ_mv.h index b8e001718..6fd48c266 100644 --- a/src/IJ_mv/_hypre_IJ_mv.h +++ b/src/IJ_mv/_hypre_IJ_mv.h @@ -1,25 +1,25 @@ -/****************************************************************************** - * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - -#include +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ #ifndef hypre_IJ_HEADER #define hypre_IJ_HEADER -#include "_hypre_utilities.h" -#include "seq_mv.h" +#include #include "_hypre_parcsr_mv.h" #include "HYPRE_IJ_mv.h" +#include "HYPRE.h" #ifdef __cplusplus extern "C" { #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /****************************************************************************** * * Header info for Auxiliary Parallel CSR Matrix data structures @@ -37,63 +37,114 @@ extern "C" { typedef struct { - HYPRE_Int local_num_rows; /* defines number of rows on this processors */ - HYPRE_Int local_num_cols; /* defines number of cols of diag */ - - HYPRE_Int need_aux; /* if need_aux = 1, aux_j, aux_data are used to - generate the parcsr matrix (default), - for need_aux = 0, data is put directly into - parcsr structure (requires the knowledge of - offd_i and diag_i ) */ - - HYPRE_Int *row_length; /* row_length_diag[i] contains number of stored - elements in i-th row */ - HYPRE_Int *row_space; /* row_space_diag[i] contains space allocated to - i-th row */ - HYPRE_BigInt **aux_j; /* contains collected column indices */ - HYPRE_Complex **aux_data; /* contains collected data */ - - HYPRE_Int *indx_diag; /* indx_diag[i] points to first empty space of portion - in diag_j , diag_data assigned to row i */ - HYPRE_Int *indx_offd; /* indx_offd[i] points to first empty space of portion - in offd_j , offd_data assigned to row i */ - HYPRE_Int max_off_proc_elmts; /* length of off processor stash set for - SetValues and AddTOValues */ - HYPRE_Int current_num_elmts; /* current no. of elements stored in stash */ - HYPRE_Int off_proc_i_indx; /* pointer to first empty space in - set_off_proc_i_set */ - HYPRE_BigInt *off_proc_i; /* length 2*num_off_procs_elmts, contains info pairs - (code, no. of elmts) where code contains global - row no. if SetValues, and (-global row no. -1) - if AddToValues*/ - HYPRE_BigInt *off_proc_j; /* contains column indices */ - HYPRE_Complex *off_proc_data; /* contains corresponding data */ + HYPRE_Int local_num_rows; /* defines number of rows on this processor */ + HYPRE_Int local_num_rownnz; /* defines number of nonzero rows on this processor */ + HYPRE_Int local_num_cols; /* defines number of cols of diag */ + + HYPRE_Int need_aux; /* if need_aux = 1, aux_j, aux_data are used to + generate the parcsr matrix (default), + for need_aux = 0, data is put directly into + parcsr structure (requires the knowledge of + offd_i and diag_i ) */ + + HYPRE_Int *rownnz; /* row_nnz[i] contains the i-th nonzero row id */ + HYPRE_Int *row_length; /* row_length[i] contains number of stored + elements in i-th row */ + HYPRE_Int *row_space; /* row_space[i] contains space allocated to + i-th row */ + + HYPRE_Int *diag_sizes; /* user input row lengths of diag */ + HYPRE_Int *offd_sizes; /* user input row lengths of diag */ + + HYPRE_BigInt **aux_j; /* contains collected column indices */ + HYPRE_Complex **aux_data; /* contains collected data */ + + HYPRE_Int *indx_diag; /* indx_diag[i] points to first empty space of portion + in diag_j , diag_data assigned to row i */ + HYPRE_Int *indx_offd; /* indx_offd[i] points to first empty space of portion + in offd_j , offd_data assigned to row i */ + + HYPRE_Int max_off_proc_elmts; /* length of off processor stash set for + SetValues and AddTOValues */ + HYPRE_Int current_off_proc_elmts; /* current no. of elements stored in stash */ + HYPRE_Int off_proc_i_indx; /* pointer to first empty space in + set_off_proc_i_set */ + HYPRE_BigInt *off_proc_i; /* length 2*num_off_procs_elmts, contains info pairs + (code, no. of elmts) where code contains global + row no. if SetValues, and (-global row no. -1) + if AddToValues */ + HYPRE_BigInt *off_proc_j; /* contains column indices + * ( global col id.) if SetValues, + * (-global col id. -1) if AddToValues */ + HYPRE_Complex *off_proc_data; /* contains corresponding data */ + + HYPRE_MemoryLocation memory_location; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_Int max_stack_elmts; + HYPRE_Int current_stack_elmts; + HYPRE_BigInt *stack_i; + HYPRE_BigInt *stack_j; + HYPRE_Complex *stack_data; + char *stack_sora; /* Set (1) or Add (0) */ + HYPRE_Int usr_on_proc_elmts; /* user given num elmt on-proc */ + HYPRE_Int usr_off_proc_elmts; /* user given num elmt off-proc */ + HYPRE_Real init_alloc_factor; + HYPRE_Real grow_factor; +#endif } hypre_AuxParCSRMatrix; /*-------------------------------------------------------------------------- * Accessor functions for the Parallel CSR Matrix structure *--------------------------------------------------------------------------*/ -#define hypre_AuxParCSRMatrixLocalNumRows(matrix) ((matrix) -> local_num_rows) -#define hypre_AuxParCSRMatrixLocalNumCols(matrix) ((matrix) -> local_num_cols) - -#define hypre_AuxParCSRMatrixNeedAux(matrix) ((matrix) -> need_aux) -#define hypre_AuxParCSRMatrixRowLength(matrix) ((matrix) -> row_length) -#define hypre_AuxParCSRMatrixRowSpace(matrix) ((matrix) -> row_space) -#define hypre_AuxParCSRMatrixAuxJ(matrix) ((matrix) -> aux_j) -#define hypre_AuxParCSRMatrixAuxData(matrix) ((matrix) -> aux_data) - -#define hypre_AuxParCSRMatrixIndxDiag(matrix) ((matrix) -> indx_diag) -#define hypre_AuxParCSRMatrixIndxOffd(matrix) ((matrix) -> indx_offd) +#define hypre_AuxParCSRMatrixLocalNumRows(matrix) ((matrix) -> local_num_rows) +#define hypre_AuxParCSRMatrixLocalNumRownnz(matrix) ((matrix) -> local_num_rownnz) +#define hypre_AuxParCSRMatrixLocalNumCols(matrix) ((matrix) -> local_num_cols) + +#define hypre_AuxParCSRMatrixNeedAux(matrix) ((matrix) -> need_aux) +#define hypre_AuxParCSRMatrixRownnz(matrix) ((matrix) -> rownnz) +#define hypre_AuxParCSRMatrixRowLength(matrix) ((matrix) -> row_length) +#define hypre_AuxParCSRMatrixRowSpace(matrix) ((matrix) -> row_space) +#define hypre_AuxParCSRMatrixAuxJ(matrix) ((matrix) -> aux_j) +#define hypre_AuxParCSRMatrixAuxData(matrix) ((matrix) -> aux_data) + +#define hypre_AuxParCSRMatrixIndxDiag(matrix) ((matrix) -> indx_diag) +#define hypre_AuxParCSRMatrixIndxOffd(matrix) ((matrix) -> indx_offd) + +#define hypre_AuxParCSRMatrixDiagSizes(matrix) ((matrix) -> diag_sizes) +#define hypre_AuxParCSRMatrixOffdSizes(matrix) ((matrix) -> offd_sizes) + +#define hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) +#define hypre_AuxParCSRMatrixCurrentOffProcElmts(matrix) ((matrix) -> current_off_proc_elmts) +#define hypre_AuxParCSRMatrixOffProcIIndx(matrix) ((matrix) -> off_proc_i_indx) +#define hypre_AuxParCSRMatrixOffProcI(matrix) ((matrix) -> off_proc_i) +#define hypre_AuxParCSRMatrixOffProcJ(matrix) ((matrix) -> off_proc_j) +#define hypre_AuxParCSRMatrixOffProcData(matrix) ((matrix) -> off_proc_data) + +#define hypre_AuxParCSRMatrixMemoryLocation(matrix) ((matrix) -> memory_location) + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) +#define hypre_AuxParCSRMatrixMaxStackElmts(matrix) ((matrix) -> max_stack_elmts) +#define hypre_AuxParCSRMatrixCurrentStackElmts(matrix) ((matrix) -> current_stack_elmts) +#define hypre_AuxParCSRMatrixStackI(matrix) ((matrix) -> stack_i) +#define hypre_AuxParCSRMatrixStackJ(matrix) ((matrix) -> stack_j) +#define hypre_AuxParCSRMatrixStackData(matrix) ((matrix) -> stack_data) +#define hypre_AuxParCSRMatrixStackSorA(matrix) ((matrix) -> stack_sora) +#define hypre_AuxParCSRMatrixUsrOnProcElmts(matrix) ((matrix) -> usr_on_proc_elmts) +#define hypre_AuxParCSRMatrixUsrOffProcElmts(matrix) ((matrix) -> usr_off_proc_elmts) +#define hypre_AuxParCSRMatrixInitAllocFactor(matrix) ((matrix) -> init_alloc_factor) +#define hypre_AuxParCSRMatrixGrowFactor(matrix) ((matrix) -> grow_factor) +#endif -#define hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) -#define hypre_AuxParCSRMatrixCurrentNumElmts(matrix) ((matrix) -> current_num_elmts) -#define hypre_AuxParCSRMatrixOffProcIIndx(matrix) ((matrix) -> off_proc_i_indx) -#define hypre_AuxParCSRMatrixOffProcI(matrix) ((matrix) -> off_proc_i) -#define hypre_AuxParCSRMatrixOffProcJ(matrix) ((matrix) -> off_proc_j) -#define hypre_AuxParCSRMatrixOffProcData(matrix) ((matrix) -> off_proc_data) +#endif /* #ifndef hypre_AUX_PARCSR_MATRIX_HEADER */ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ -#endif /****************************************************************************** * * Header info for Auxiliary Parallel Vector data structures @@ -111,23 +162,56 @@ typedef struct typedef struct { - HYPRE_Int max_off_proc_elmts; /* length of off processor stash for - SetValues and AddToValues*/ - HYPRE_Int current_num_elmts; /* current no. of elements stored in stash */ - HYPRE_BigInt *off_proc_i; /* contains column indices */ - HYPRE_Complex *off_proc_data; /* contains corresponding data */ + HYPRE_Int max_off_proc_elmts; /* length of off processor stash for + SetValues and AddToValues*/ + HYPRE_Int current_off_proc_elmts; /* current no. of elements stored in stash */ + HYPRE_BigInt *off_proc_i; /* contains column indices */ + HYPRE_Complex *off_proc_data; /* contains corresponding data */ + + HYPRE_MemoryLocation memory_location; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_Int max_stack_elmts; /* length of stash for SetValues and AddToValues*/ + HYPRE_Int current_stack_elmts; /* current no. of elements stored in stash */ + HYPRE_BigInt *stack_i; /* contains row indices */ + HYPRE_Complex *stack_data; /* contains corresponding data */ + char *stack_sora; + HYPRE_Int usr_off_proc_elmts; /* the num of off-proc elements usr guided */ + HYPRE_Real init_alloc_factor; + HYPRE_Real grow_factor; +#endif } hypre_AuxParVector; /*-------------------------------------------------------------------------- * Accessor functions for the Parallel Vector structure *--------------------------------------------------------------------------*/ -#define hypre_AuxParVectorMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) -#define hypre_AuxParVectorCurrentNumElmts(matrix) ((matrix) -> current_num_elmts) -#define hypre_AuxParVectorOffProcI(matrix) ((matrix) -> off_proc_i) -#define hypre_AuxParVectorOffProcData(matrix) ((matrix) -> off_proc_data) - +#define hypre_AuxParVectorMaxOffProcElmts(vector) ((vector) -> max_off_proc_elmts) +#define hypre_AuxParVectorCurrentOffProcElmts(vector) ((vector) -> current_off_proc_elmts) +#define hypre_AuxParVectorOffProcI(vector) ((vector) -> off_proc_i) +#define hypre_AuxParVectorOffProcData(vector) ((vector) -> off_proc_data) + +#define hypre_AuxParVectorMemoryLocation(vector) ((vector) -> memory_location) + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) +#define hypre_AuxParVectorMaxStackElmts(vector) ((vector) -> max_stack_elmts) +#define hypre_AuxParVectorCurrentStackElmts(vector) ((vector) -> current_stack_elmts) +#define hypre_AuxParVectorStackI(vector) ((vector) -> stack_i) +#define hypre_AuxParVectorStackData(vector) ((vector) -> stack_data) +#define hypre_AuxParVectorStackSorA(vector) ((vector) -> stack_sora) +#define hypre_AuxParVectorUsrOffProcElmts(vector) ((vector) -> usr_off_proc_elmts) +#define hypre_AuxParVectorInitAllocFactor(vector) ((vector) -> init_alloc_factor) +#define hypre_AuxParVectorGrowFactor(vector) ((vector) -> grow_factor) #endif + +#endif /* #ifndef hypre_AUX_PAR_VECTOR_HEADER */ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /****************************************************************************** * * Header info for the hypre_IJMatrix structures @@ -143,22 +227,22 @@ typedef struct typedef struct hypre_IJMatrix_struct { - MPI_Comm comm; + MPI_Comm comm; HYPRE_BigInt *row_partitioning; /* distribution of rows across processors */ HYPRE_BigInt *col_partitioning; /* distribution of columns */ HYPRE_Int object_type; /* Indicates the type of "object" */ void *object; /* Structure for storing local portion */ - void *translator; /* optional storage_type specfic structure - for holding additional local info */ - void *assumed_part; /* IJMatrix assumed partition */ - HYPRE_Int assemble_flag; /* indicates whether matrix has been - assembled */ - - HYPRE_BigInt global_first_row; /* these for data items are necessary */ - HYPRE_BigInt global_first_col; /* to be able to avoind using the global */ - HYPRE_BigInt global_num_rows; /* global partition */ + void *translator; /* optional storage_type specific structure + for holding additional local info */ + void *assumed_part; /* IJMatrix assumed partition */ + HYPRE_Int assemble_flag; /* indicates whether matrix has been + assembled */ + + HYPRE_BigInt global_first_row; /* these four data items are necessary */ + HYPRE_BigInt global_first_col; /* to be able to avoid using the global */ + HYPRE_BigInt global_num_rows; /* global partition */ HYPRE_BigInt global_num_cols; HYPRE_Int omp_flag; HYPRE_Int print_level; @@ -169,25 +253,34 @@ typedef struct hypre_IJMatrix_struct * Accessor macros: hypre_IJMatrix *--------------------------------------------------------------------------*/ -#define hypre_IJMatrixComm(matrix) ((matrix) -> comm) +#define hypre_IJMatrixComm(matrix) ((matrix) -> comm) +#define hypre_IJMatrixRowPartitioning(matrix) ((matrix) -> row_partitioning) +#define hypre_IJMatrixColPartitioning(matrix) ((matrix) -> col_partitioning) -#define hypre_IJMatrixRowPartitioning(matrix) ((matrix) -> row_partitioning) -#define hypre_IJMatrixColPartitioning(matrix) ((matrix) -> col_partitioning) +#define hypre_IJMatrixObjectType(matrix) ((matrix) -> object_type) +#define hypre_IJMatrixObject(matrix) ((matrix) -> object) +#define hypre_IJMatrixTranslator(matrix) ((matrix) -> translator) +#define hypre_IJMatrixAssumedPart(matrix) ((matrix) -> assumed_part) -#define hypre_IJMatrixObjectType(matrix) ((matrix) -> object_type) -#define hypre_IJMatrixObject(matrix) ((matrix) -> object) -#define hypre_IJMatrixTranslator(matrix) ((matrix) -> translator) -#define hypre_IJMatrixAssumedPart(matrix) ((matrix) -> assumed_part) +#define hypre_IJMatrixAssembleFlag(matrix) ((matrix) -> assemble_flag) -#define hypre_IJMatrixAssembleFlag(matrix) ((matrix) -> assemble_flag) +#define hypre_IJMatrixGlobalFirstRow(matrix) ((matrix) -> global_first_row) +#define hypre_IJMatrixGlobalFirstCol(matrix) ((matrix) -> global_first_col) +#define hypre_IJMatrixGlobalNumRows(matrix) ((matrix) -> global_num_rows) +#define hypre_IJMatrixGlobalNumCols(matrix) ((matrix) -> global_num_cols) +#define hypre_IJMatrixOMPFlag(matrix) ((matrix) -> omp_flag) +#define hypre_IJMatrixPrintLevel(matrix) ((matrix) -> print_level) +static inline HYPRE_MemoryLocation +hypre_IJMatrixMemoryLocation(hypre_IJMatrix *matrix) +{ + if ( hypre_IJMatrixObject(matrix) && hypre_IJMatrixObjectType(matrix) == HYPRE_PARCSR) + { + return hypre_ParCSRMatrixMemoryLocation( (hypre_ParCSRMatrix *) hypre_IJMatrixObject(matrix) ); + } -#define hypre_IJMatrixGlobalFirstRow(matrix) ((matrix) -> global_first_row) -#define hypre_IJMatrixGlobalFirstCol(matrix) ((matrix) -> global_first_col) -#define hypre_IJMatrixGlobalNumRows(matrix) ((matrix) -> global_num_rows) -#define hypre_IJMatrixGlobalNumCols(matrix) ((matrix) -> global_num_cols) -#define hypre_IJMatrixOMPFlag(matrix) ((matrix) -> omp_flag) -#define hypre_IJMatrixPrintLevel(matrix) ((matrix) -> print_level) + return HYPRE_MEMORY_UNDEFINED; +} /*-------------------------------------------------------------------------- * prototypes for operations on local objects @@ -198,14 +291,22 @@ typedef struct hypre_IJMatrix_struct HYPRE_Int hypre_GetIJMatrixParCSRMatrix( HYPRE_IJMatrix IJmatrix, Mat *reference ) #endif - + #ifdef ISIS_AVAILABLE /* IJMatrix_isis.c */ HYPRE_Int hypre_GetIJMatrixISISMatrix( HYPRE_IJMatrix IJmatrix, RowMatrix *reference ) #endif -#endif +#endif /* #ifndef hypre_IJ_MATRIX_HEADER */ + +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /****************************************************************************** * * Header info for the hypre_IJMatrix structures @@ -223,23 +324,21 @@ typedef struct hypre_IJVector_struct { MPI_Comm comm; - HYPRE_BigInt *partitioning; /* Indicates partitioning over tasks */ + HYPRE_BigInt *partitioning; /* Indicates partitioning over tasks */ HYPRE_Int object_type; /* Indicates the type of "local storage" */ void *object; /* Structure for storing local portion */ void *translator; /* Structure for storing off processor - information */ - - void *assumed_part; /* IJ Vector assumed partition */ + information */ - HYPRE_BigInt global_first_row; /* these for data items are necessary */ - HYPRE_BigInt global_num_rows; /* to be able to avoid using the global */ - /* global partition */ - HYPRE_Int print_level; - + void *assumed_part; /* IJ Vector assumed partition */ + HYPRE_BigInt global_first_row; /* these for data items are necessary */ + HYPRE_BigInt global_num_rows; /* to be able to avoid using the global */ + /* global partition */ + HYPRE_Int print_level; } hypre_IJVector; @@ -247,42 +346,53 @@ typedef struct hypre_IJVector_struct * Accessor macros: hypre_IJVector *--------------------------------------------------------------------------*/ -#define hypre_IJVectorComm(vector) ((vector) -> comm) - -#define hypre_IJVectorPartitioning(vector) ((vector) -> partitioning) - -#define hypre_IJVectorObjectType(vector) ((vector) -> object_type) - -#define hypre_IJVectorObject(vector) ((vector) -> object) - -#define hypre_IJVectorTranslator(vector) ((vector) -> translator) - +#define hypre_IJVectorComm(vector) ((vector) -> comm) +#define hypre_IJVectorPartitioning(vector) ((vector) -> partitioning) +#define hypre_IJVectorObjectType(vector) ((vector) -> object_type) +#define hypre_IJVectorObject(vector) ((vector) -> object) +#define hypre_IJVectorTranslator(vector) ((vector) -> translator) #define hypre_IJVectorAssumedPart(vector) ((vector) -> assumed_part) - #define hypre_IJVectorGlobalFirstRow(vector) ((vector) -> global_first_row) +#define hypre_IJVectorGlobalNumRows(vector) ((vector) -> global_num_rows) +#define hypre_IJVectorPrintLevel(vector) ((vector) -> print_level) -#define hypre_IJVectorGlobalNumRows(vector) ((vector) -> global_num_rows) +static inline HYPRE_MemoryLocation +hypre_IJVectorMemoryLocation(hypre_IJVector *vector) +{ + if ( hypre_IJVectorObject(vector) && hypre_IJVectorObjectType(vector) == HYPRE_PARCSR) + { + return hypre_ParVectorMemoryLocation( (hypre_ParVector *) hypre_IJVectorObject(vector) ); + } -#define hypre_IJVectorPrintLevel(vector) ((vector) -> print_level) + return HYPRE_MEMORY_UNDEFINED; +} /*-------------------------------------------------------------------------- * prototypes for operations on local objects *--------------------------------------------------------------------------*/ /* #include "./internal_protos.h" */ -#endif +#endif /* #ifndef hypre_IJ_VECTOR_HEADER */ + +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ /* aux_parcsr_matrix.c */ HYPRE_Int hypre_AuxParCSRMatrixCreate ( hypre_AuxParCSRMatrix **aux_matrix , HYPRE_Int local_num_rows , HYPRE_Int local_num_cols , HYPRE_Int *sizes ); HYPRE_Int hypre_AuxParCSRMatrixDestroy ( hypre_AuxParCSRMatrix *matrix ); +HYPRE_Int hypre_AuxParCSRMatrixSetRownnz ( hypre_AuxParCSRMatrix *matrix ); HYPRE_Int hypre_AuxParCSRMatrixInitialize ( hypre_AuxParCSRMatrix *matrix ); -HYPRE_Int hypre_AuxParCSRMatrixSetMaxOffPRocElmts ( hypre_AuxParCSRMatrix *matrix , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int hypre_AuxParCSRMatrixInitialize_v2( hypre_AuxParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); /* aux_par_vector.c */ HYPRE_Int hypre_AuxParVectorCreate ( hypre_AuxParVector **aux_vector ); HYPRE_Int hypre_AuxParVectorDestroy ( hypre_AuxParVector *vector ); HYPRE_Int hypre_AuxParVectorInitialize ( hypre_AuxParVector *vector ); -HYPRE_Int hypre_AuxParVectorSetMaxOffPRocElmts ( hypre_AuxParVector *vector , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int hypre_AuxParVectorInitialize_v2( hypre_AuxParVector *vector, HYPRE_MemoryLocation memory_location ); /* IJ_assumed_part.c */ HYPRE_Int hypre_IJMatrixCreateAssumedPartition ( hypre_IJMatrix *matrix ); @@ -319,15 +429,19 @@ HYPRE_Int hypre_IJMatrixInitializeParCSR ( hypre_IJMatrix *matrix ); HYPRE_Int hypre_IJMatrixGetRowCountsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_BigInt *rows , HYPRE_Int *ncols ); HYPRE_Int hypre_IJMatrixGetValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , HYPRE_BigInt *rows , HYPRE_BigInt *cols , HYPRE_Complex *values ); HYPRE_Int hypre_IJMatrixSetValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixSetAddValuesParCSRDevice ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values, const char *action ); HYPRE_Int hypre_IJMatrixSetConstantValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Complex value ); HYPRE_Int hypre_IJMatrixAddToValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); HYPRE_Int hypre_IJMatrixDestroyParCSR ( hypre_IJMatrix *matrix ); -HYPRE_Int hypre_IJMatrixAssembleOffProcValsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int off_proc_i_indx , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_BigInt *off_proc_i , HYPRE_BigInt *off_proc_j , HYPRE_Complex *off_proc_data ); +HYPRE_Int hypre_IJMatrixAssembleOffProcValsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int off_proc_i_indx , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_MemoryLocation memory_location , HYPRE_BigInt *off_proc_i , HYPRE_BigInt *off_proc_j , HYPRE_Complex *off_proc_data ); HYPRE_Int hypre_FillResponseIJOffProcVals ( void *p_recv_contact_buf , HYPRE_Int contact_size , HYPRE_Int contact_proc , void *ro , MPI_Comm comm , void **p_send_response_buf , HYPRE_Int *response_message_size ); HYPRE_Int hypre_FindProc ( HYPRE_BigInt *list , HYPRE_BigInt value , HYPRE_Int list_length ); HYPRE_Int hypre_IJMatrixAssembleParCSR ( hypre_IJMatrix *matrix ); HYPRE_Int hypre_IJMatrixSetValuesOMPParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); HYPRE_Int hypre_IJMatrixAddToValuesOMPParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix); +HYPRE_Int hypre_IJMatrixInitializeParCSR_v2(hypre_IJMatrix *matrix, HYPRE_MemoryLocation memory_location); +HYPRE_Int hypre_IJMatrixSetConstantValuesParCSRDevice( hypre_IJMatrix *matrix, HYPRE_Complex value ); /* IJMatrix_petsc.c */ HYPRE_Int hypre_IJMatrixSetLocalSizePETSc ( hypre_IJMatrix *matrix , HYPRE_Int local_m , HYPRE_Int local_n ); @@ -354,6 +468,7 @@ HYPRE_Int hypre_IJVectorZeroValues ( HYPRE_IJVector vector ); HYPRE_Int hypre_IJVectorCreatePar ( hypre_IJVector *vector , HYPRE_BigInt *IJpartitioning ); HYPRE_Int hypre_IJVectorDestroyPar ( hypre_IJVector *vector ); HYPRE_Int hypre_IJVectorInitializePar ( hypre_IJVector *vector ); +HYPRE_Int hypre_IJVectorInitializePar_v2(hypre_IJVector *vector, HYPRE_MemoryLocation memory_location); HYPRE_Int hypre_IJVectorSetMaxOffProcElmtsPar ( hypre_IJVector *vector , HYPRE_Int max_off_proc_elmts ); HYPRE_Int hypre_IJVectorDistributePar ( hypre_IJVector *vector , const HYPRE_Int *vec_starts ); HYPRE_Int hypre_IJVectorZeroValuesPar ( hypre_IJVector *vector ); @@ -361,7 +476,9 @@ HYPRE_Int hypre_IJVectorSetValuesPar ( hypre_IJVector *vector , HYPRE_Int num_va HYPRE_Int hypre_IJVectorAddToValuesPar ( hypre_IJVector *vector , HYPRE_Int num_values , const HYPRE_BigInt *indices , const HYPRE_Complex *values ); HYPRE_Int hypre_IJVectorAssemblePar ( hypre_IJVector *vector ); HYPRE_Int hypre_IJVectorGetValuesPar ( hypre_IJVector *vector , HYPRE_Int num_values , const HYPRE_BigInt *indices , HYPRE_Complex *values ); -HYPRE_Int hypre_IJVectorAssembleOffProcValsPar ( hypre_IJVector *vector , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_BigInt *off_proc_i , HYPRE_Complex *off_proc_data ); +HYPRE_Int hypre_IJVectorAssembleOffProcValsPar ( hypre_IJVector *vector , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_MemoryLocation memory_location , HYPRE_BigInt *off_proc_i , HYPRE_Complex *off_proc_data ); +HYPRE_Int hypre_IJVectorSetAddValuesParDevice(hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const HYPRE_Complex *values, const char *action); +HYPRE_Int hypre_IJVectorAssembleParDevice(hypre_IJVector *vector); /* HYPRE_IJMatrix.c */ HYPRE_Int HYPRE_IJMatrixCreate ( MPI_Comm comm , HYPRE_BigInt ilower , HYPRE_BigInt iupper , HYPRE_BigInt jlower , HYPRE_BigInt jupper , HYPRE_IJMatrix *matrix ); diff --git a/src/IJ_mv/aux_par_vector.c b/src/IJ_mv/aux_par_vector.c index a965e69d6..a2e20f566 100644 --- a/src/IJ_mv/aux_par_vector.c +++ b/src/IJ_mv/aux_par_vector.c @@ -22,16 +22,26 @@ HYPRE_Int hypre_AuxParVectorCreate( hypre_AuxParVector **aux_vector) { hypre_AuxParVector *vector; - - vector = hypre_CTAlloc(hypre_AuxParVector, 1, HYPRE_MEMORY_HOST); - + + vector = hypre_CTAlloc(hypre_AuxParVector, 1, HYPRE_MEMORY_HOST); + /* set defaults */ hypre_AuxParVectorMaxOffProcElmts(vector) = 0; - hypre_AuxParVectorCurrentNumElmts(vector) = 0; + hypre_AuxParVectorCurrentOffProcElmts(vector) = 0; /* stash for setting or adding off processor values */ hypre_AuxParVectorOffProcI(vector) = NULL; hypre_AuxParVectorOffProcData(vector) = NULL; - + hypre_AuxParVectorMemoryLocation(vector) = HYPRE_MEMORY_HOST; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParVectorMaxStackElmts(vector) = 0; + hypre_AuxParVectorCurrentStackElmts(vector) = 0; + hypre_AuxParVectorStackI(vector) = NULL; + hypre_AuxParVectorStackData(vector) = NULL; + hypre_AuxParVectorStackSorA(vector) = NULL; + hypre_AuxParVectorUsrOffProcElmts(vector) = -1; + hypre_AuxParVectorInitAllocFactor(vector) = 1.5; + hypre_AuxParVectorGrowFactor(vector) = 2.0; +#endif *aux_vector = vector; return 0; @@ -41,17 +51,22 @@ hypre_AuxParVectorCreate( hypre_AuxParVector **aux_vector) * hypre_AuxParVectorDestroy *--------------------------------------------------------------------------*/ -HYPRE_Int +HYPRE_Int hypre_AuxParVectorDestroy( hypre_AuxParVector *vector ) { HYPRE_Int ierr=0; if (vector) { - if (hypre_AuxParVectorOffProcI(vector)) - hypre_TFree(hypre_AuxParVectorOffProcI(vector), HYPRE_MEMORY_HOST); - if (hypre_AuxParVectorOffProcData(vector)) - hypre_TFree(hypre_AuxParVectorOffProcData(vector), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParVectorOffProcI(vector), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParVectorOffProcData(vector), HYPRE_MEMORY_HOST); + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_TFree(hypre_AuxParVectorStackI(vector), hypre_AuxParVectorMemoryLocation(vector)); + hypre_TFree(hypre_AuxParVectorStackData(vector), hypre_AuxParVectorMemoryLocation(vector)); + hypre_TFree(hypre_AuxParVectorStackSorA(vector), hypre_AuxParVectorMemoryLocation(vector)); +#endif + hypre_TFree(vector, HYPRE_MEMORY_HOST); } @@ -59,36 +74,25 @@ hypre_AuxParVectorDestroy( hypre_AuxParVector *vector ) } /*-------------------------------------------------------------------------- - * hypre_AuxParVectorInitialize + * hypre_AuxParVectorInitialize_v2 *--------------------------------------------------------------------------*/ -HYPRE_Int -hypre_AuxParVectorInitialize( hypre_AuxParVector *vector ) +HYPRE_Int +hypre_AuxParVectorInitialize_v2( hypre_AuxParVector *vector, HYPRE_MemoryLocation memory_location ) { - HYPRE_Int max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(vector); + hypre_AuxParVectorMemoryLocation(vector) = memory_location; - /* allocate stash for setting or adding off processor values */ - if (max_off_proc_elmts > 0) + if ( memory_location == HYPRE_MEMORY_HOST ) { - hypre_AuxParVectorOffProcI(vector) = hypre_CTAlloc(HYPRE_BigInt, - max_off_proc_elmts, HYPRE_MEMORY_HOST); - hypre_AuxParVectorOffProcData(vector) = hypre_CTAlloc(HYPRE_Complex, - max_off_proc_elmts, HYPRE_MEMORY_HOST); + /* CPU assembly */ + /* allocate stash for setting or adding off processor values */ + HYPRE_Int max_off_proc_elmts = hypre_AuxParVectorMaxOffProcElmts(vector); + if (max_off_proc_elmts > 0) + { + hypre_AuxParVectorOffProcI(vector) = hypre_CTAlloc(HYPRE_BigInt, max_off_proc_elmts, HYPRE_MEMORY_HOST); + hypre_AuxParVectorOffProcData(vector) = hypre_CTAlloc(HYPRE_Complex, max_off_proc_elmts, HYPRE_MEMORY_HOST); + } } return 0; } - -/*-------------------------------------------------------------------------- - * hypre_AuxParVectorSetMaxOffProcElmts - *--------------------------------------------------------------------------*/ - -HYPRE_Int -hypre_AuxParVectorSetMaxOffPRocElmts( hypre_AuxParVector *vector, - HYPRE_Int max_off_proc_elmts ) -{ - HYPRE_Int ierr = 0; - hypre_AuxParVectorMaxOffProcElmts(vector) = max_off_proc_elmts; - return ierr; -} - diff --git a/src/IJ_mv/aux_par_vector.h b/src/IJ_mv/aux_par_vector.h index 149af845e..96a06ee9c 100644 --- a/src/IJ_mv/aux_par_vector.h +++ b/src/IJ_mv/aux_par_vector.h @@ -22,21 +22,46 @@ typedef struct { - HYPRE_Int max_off_proc_elmts; /* length of off processor stash for - SetValues and AddToValues*/ - HYPRE_Int current_num_elmts; /* current no. of elements stored in stash */ - HYPRE_BigInt *off_proc_i; /* contains column indices */ - HYPRE_Complex *off_proc_data; /* contains corresponding data */ + HYPRE_Int max_off_proc_elmts; /* length of off processor stash for + SetValues and AddToValues*/ + HYPRE_Int current_off_proc_elmts; /* current no. of elements stored in stash */ + HYPRE_BigInt *off_proc_i; /* contains column indices */ + HYPRE_Complex *off_proc_data; /* contains corresponding data */ + + HYPRE_MemoryLocation memory_location; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_Int max_stack_elmts; /* length of stash for SetValues and AddToValues*/ + HYPRE_Int current_stack_elmts; /* current no. of elements stored in stash */ + HYPRE_BigInt *stack_i; /* contains row indices */ + HYPRE_Complex *stack_data; /* contains corresponding data */ + char *stack_sora; + HYPRE_Int usr_off_proc_elmts; /* the num of off-proc elements usr guided */ + HYPRE_Real init_alloc_factor; + HYPRE_Real grow_factor; +#endif } hypre_AuxParVector; /*-------------------------------------------------------------------------- * Accessor functions for the Parallel Vector structure *--------------------------------------------------------------------------*/ -#define hypre_AuxParVectorMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) -#define hypre_AuxParVectorCurrentNumElmts(matrix) ((matrix) -> current_num_elmts) -#define hypre_AuxParVectorOffProcI(matrix) ((matrix) -> off_proc_i) -#define hypre_AuxParVectorOffProcData(matrix) ((matrix) -> off_proc_data) -//#define hypre_AuxParVectorCancelIndx(matrix) ((matrix) -> cancel_indx) +#define hypre_AuxParVectorMaxOffProcElmts(vector) ((vector) -> max_off_proc_elmts) +#define hypre_AuxParVectorCurrentOffProcElmts(vector) ((vector) -> current_off_proc_elmts) +#define hypre_AuxParVectorOffProcI(vector) ((vector) -> off_proc_i) +#define hypre_AuxParVectorOffProcData(vector) ((vector) -> off_proc_data) +#define hypre_AuxParVectorMemoryLocation(vector) ((vector) -> memory_location) + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) +#define hypre_AuxParVectorMaxStackElmts(vector) ((vector) -> max_stack_elmts) +#define hypre_AuxParVectorCurrentStackElmts(vector) ((vector) -> current_stack_elmts) +#define hypre_AuxParVectorStackI(vector) ((vector) -> stack_i) +#define hypre_AuxParVectorStackData(vector) ((vector) -> stack_data) +#define hypre_AuxParVectorStackSorA(vector) ((vector) -> stack_sora) +#define hypre_AuxParVectorUsrOffProcElmts(vector) ((vector) -> usr_off_proc_elmts) +#define hypre_AuxParVectorInitAllocFactor(vector) ((vector) -> init_alloc_factor) +#define hypre_AuxParVectorGrowFactor(vector) ((vector) -> grow_factor) #endif + +#endif /* #ifndef hypre_AUX_PAR_VECTOR_HEADER */ diff --git a/src/IJ_mv/aux_parcsr_matrix.c b/src/IJ_mv/aux_parcsr_matrix.c index 0de689feb..83a150bc3 100644 --- a/src/IJ_mv/aux_parcsr_matrix.c +++ b/src/IJ_mv/aux_parcsr_matrix.c @@ -20,44 +20,54 @@ HYPRE_Int hypre_AuxParCSRMatrixCreate( hypre_AuxParCSRMatrix **aux_matrix, - HYPRE_Int local_num_rows, - HYPRE_Int local_num_cols, - HYPRE_Int *sizes) + HYPRE_Int local_num_rows, + HYPRE_Int local_num_cols, + HYPRE_Int *sizes ) { hypre_AuxParCSRMatrix *matrix; matrix = hypre_CTAlloc(hypre_AuxParCSRMatrix, 1, HYPRE_MEMORY_HOST); hypre_AuxParCSRMatrixLocalNumRows(matrix) = local_num_rows; + hypre_AuxParCSRMatrixLocalNumRownnz(matrix) = local_num_rows; hypre_AuxParCSRMatrixLocalNumCols(matrix) = local_num_cols; - if (sizes) - { - hypre_AuxParCSRMatrixRowSpace(matrix) = sizes; - } - else - { - hypre_AuxParCSRMatrixRowSpace(matrix) = NULL; - } + hypre_AuxParCSRMatrixRowSpace(matrix) = sizes; /* set defaults */ hypre_AuxParCSRMatrixNeedAux(matrix) = 1; hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) = 0; - hypre_AuxParCSRMatrixCurrentNumElmts(matrix) = 0; + hypre_AuxParCSRMatrixCurrentOffProcElmts(matrix) = 0; hypre_AuxParCSRMatrixOffProcIIndx(matrix) = 0; + hypre_AuxParCSRMatrixRownnz(matrix) = NULL; hypre_AuxParCSRMatrixRowLength(matrix) = NULL; hypre_AuxParCSRMatrixAuxJ(matrix) = NULL; hypre_AuxParCSRMatrixAuxData(matrix) = NULL; hypre_AuxParCSRMatrixIndxDiag(matrix) = NULL; hypre_AuxParCSRMatrixIndxOffd(matrix) = NULL; - /* stash for setting or adding off processor values */ + hypre_AuxParCSRMatrixDiagSizes(matrix) = NULL; + hypre_AuxParCSRMatrixOffdSizes(matrix) = NULL; + /* stash for setting or adding on/off-proc values */ hypre_AuxParCSRMatrixOffProcI(matrix) = NULL; hypre_AuxParCSRMatrixOffProcJ(matrix) = NULL; hypre_AuxParCSRMatrixOffProcData(matrix) = NULL; - + hypre_AuxParCSRMatrixMemoryLocation(matrix) = HYPRE_MEMORY_HOST; +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_AuxParCSRMatrixMaxStackElmts(matrix) = 0; + hypre_AuxParCSRMatrixCurrentStackElmts(matrix) = 0; + hypre_AuxParCSRMatrixStackI(matrix) = NULL; + hypre_AuxParCSRMatrixStackJ(matrix) = NULL; + hypre_AuxParCSRMatrixStackData(matrix) = NULL; + hypre_AuxParCSRMatrixStackSorA(matrix) = NULL; + hypre_AuxParCSRMatrixUsrOnProcElmts(matrix) = -1; + hypre_AuxParCSRMatrixUsrOffProcElmts(matrix) = -1; + hypre_AuxParCSRMatrixInitAllocFactor(matrix) = 5.0; + hypre_AuxParCSRMatrixGrowFactor(matrix) = 2.0; +#endif *aux_matrix = matrix; - return 0; + + return hypre_error_flag; } /*-------------------------------------------------------------------------- @@ -67,121 +77,306 @@ hypre_AuxParCSRMatrixCreate( hypre_AuxParCSRMatrix **aux_matrix, HYPRE_Int hypre_AuxParCSRMatrixDestroy( hypre_AuxParCSRMatrix *matrix ) { - HYPRE_Int ierr=0; - HYPRE_Int i; - HYPRE_Int num_rows; + HYPRE_Int num_rownnz; + HYPRE_Int num_rows; + HYPRE_Int *rownnz; + HYPRE_Int i; if (matrix) { + rownnz = hypre_AuxParCSRMatrixRownnz(matrix); + num_rownnz = hypre_AuxParCSRMatrixLocalNumRownnz(matrix); num_rows = hypre_AuxParCSRMatrixLocalNumRows(matrix); - if (hypre_AuxParCSRMatrixRowLength(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixRowLength(matrix), HYPRE_MEMORY_HOST); - if (hypre_AuxParCSRMatrixRowSpace(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixRowSpace(matrix), HYPRE_MEMORY_HOST); + if (hypre_AuxParCSRMatrixAuxJ(matrix)) { - for (i=0; i < num_rows; i++) - hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix)[i], HYPRE_MEMORY_HOST); + if (hypre_AuxParCSRMatrixRownnz(matrix)) + { + for (i = 0; i < num_rownnz; i++) + { + hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix)[rownnz[i]], HYPRE_MEMORY_HOST); + } + } + else + { + for (i = 0; i < num_rows; i++) + { + hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix)[i], HYPRE_MEMORY_HOST); + } + } + hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix), HYPRE_MEMORY_HOST); } + if (hypre_AuxParCSRMatrixAuxData(matrix)) { - for (i=0; i < num_rows; i++) - hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix)[i], HYPRE_MEMORY_HOST); - hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix), HYPRE_MEMORY_HOST); + if (hypre_AuxParCSRMatrixRownnz(matrix)) + { + for (i = 0; i < num_rownnz; i++) + { + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix)[rownnz[i]], HYPRE_MEMORY_HOST); + } + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix), HYPRE_MEMORY_HOST); + } + else + { + for (i = 0; i < num_rows; i++) + { + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix)[i], HYPRE_MEMORY_HOST); + } + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix), HYPRE_MEMORY_HOST); + } } - if (hypre_AuxParCSRMatrixIndxDiag(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixIndxDiag(matrix), HYPRE_MEMORY_HOST); - if (hypre_AuxParCSRMatrixIndxOffd(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixIndxOffd(matrix), HYPRE_MEMORY_HOST); - if (hypre_AuxParCSRMatrixOffProcI(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixOffProcI(matrix), HYPRE_MEMORY_HOST); - if (hypre_AuxParCSRMatrixOffProcJ(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixOffProcJ(matrix), HYPRE_MEMORY_HOST); - if (hypre_AuxParCSRMatrixOffProcData(matrix)) - hypre_TFree(hypre_AuxParCSRMatrixOffProcData(matrix), HYPRE_MEMORY_HOST); + + hypre_TFree(hypre_AuxParCSRMatrixRownnz(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixRowLength(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixRowSpace(matrix), HYPRE_MEMORY_HOST); + + hypre_TFree(hypre_AuxParCSRMatrixIndxDiag(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixIndxOffd(matrix), HYPRE_MEMORY_HOST); + + hypre_TFree(hypre_AuxParCSRMatrixDiagSizes(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixOffdSizes(matrix), HYPRE_MEMORY_HOST); + + hypre_TFree(hypre_AuxParCSRMatrixOffProcI(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixOffProcJ(matrix), HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixOffProcData(matrix), HYPRE_MEMORY_HOST); + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + hypre_TFree(hypre_AuxParCSRMatrixStackI(matrix), hypre_AuxParCSRMatrixMemoryLocation(matrix)); + hypre_TFree(hypre_AuxParCSRMatrixStackJ(matrix), hypre_AuxParCSRMatrixMemoryLocation(matrix)); + hypre_TFree(hypre_AuxParCSRMatrixStackData(matrix), hypre_AuxParCSRMatrixMemoryLocation(matrix)); + hypre_TFree(hypre_AuxParCSRMatrixStackSorA(matrix), hypre_AuxParCSRMatrixMemoryLocation(matrix)); +#endif + hypre_TFree(matrix, HYPRE_MEMORY_HOST); } - return ierr; + return hypre_error_flag; } /*-------------------------------------------------------------------------- - * hypre_AuxParCSRMatrixInitialize + * hypre_AuxParCSRMatrixSetRownnz *--------------------------------------------------------------------------*/ HYPRE_Int -hypre_AuxParCSRMatrixInitialize( hypre_AuxParCSRMatrix *matrix ) +hypre_AuxParCSRMatrixSetRownnz( hypre_AuxParCSRMatrix *matrix ) { - HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(matrix); - HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(matrix); - HYPRE_Int max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(matrix); - HYPRE_BigInt **aux_j; - HYPRE_Complex **aux_data; - HYPRE_Int i; + HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(matrix); + HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(matrix); + HYPRE_Int num_rownnz_old = hypre_AuxParCSRMatrixLocalNumRownnz(matrix); + HYPRE_Int *rownnz_old = hypre_AuxParCSRMatrixRownnz(matrix); + HYPRE_Int *rownnz; - if (local_num_rows < 0) - return -1; - if (local_num_rows == 0) - return 0; - /* allocate stash for setting or adding off processor values */ - if (max_off_proc_elmts > 0) + HYPRE_Int i, ii, local_num_rownnz; + + /* Count number of nonzero rows */ + local_num_rownnz = 0; +#ifdef HYPRE_USING_OPENMP +#pragma omp parallel for private(i) reduction(+:local_num_rownnz) HYPRE_SMP_SCHEDULE +#endif + for (i = 0; i < local_num_rows; i++) { - hypre_AuxParCSRMatrixOffProcI(matrix) = hypre_CTAlloc(HYPRE_BigInt, - 2*max_off_proc_elmts, HYPRE_MEMORY_HOST); - hypre_AuxParCSRMatrixOffProcJ(matrix) = hypre_CTAlloc(HYPRE_BigInt, - max_off_proc_elmts, HYPRE_MEMORY_HOST); - hypre_AuxParCSRMatrixOffProcData(matrix) = hypre_CTAlloc(HYPRE_Complex, - max_off_proc_elmts, HYPRE_MEMORY_HOST); + if (row_space[i] > 0) + { + local_num_rownnz++; + } } - if (hypre_AuxParCSRMatrixNeedAux(matrix)) + + if (local_num_rownnz != local_num_rows) { - aux_j = hypre_CTAlloc(HYPRE_BigInt *, local_num_rows, HYPRE_MEMORY_HOST); - aux_data = hypre_CTAlloc(HYPRE_Complex *, local_num_rows, HYPRE_MEMORY_HOST); - if (!hypre_AuxParCSRMatrixRowLength(matrix)) - hypre_AuxParCSRMatrixRowLength(matrix) = - hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); - if (row_space) + rownnz = hypre_CTAlloc(HYPRE_Int, local_num_rownnz, HYPRE_MEMORY_HOST); + + /* Find nonzero rows */ + local_num_rownnz = 0; + for (i = 0; i < local_num_rows; i++) { - for (i=0; i < local_num_rows; i++) + if (row_space[i] > 0) { - aux_j[i] = hypre_CTAlloc(HYPRE_BigInt, row_space[i], HYPRE_MEMORY_HOST); - aux_data[i] = hypre_CTAlloc(HYPRE_Complex, row_space[i], HYPRE_MEMORY_HOST); + rownnz[local_num_rownnz++] = i; } } - else + + /* Free memory if necessary */ + if (rownnz_old && rownnz && (local_num_rownnz < num_rownnz_old)) { - row_space = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); - for (i=0; i < local_num_rows; i++) + ii = 0; + for (i = 0; i < num_rownnz_old; i++) { - row_space[i] = 30; - aux_j[i] = hypre_CTAlloc(HYPRE_BigInt, 30, HYPRE_MEMORY_HOST); - aux_data[i] = hypre_CTAlloc(HYPRE_Complex, 30, HYPRE_MEMORY_HOST); + if (rownnz_old[i] == rownnz[ii]) + { + ii++; + } + else + { + hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix)[rownnz_old[i]], HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix)[rownnz_old[i]], HYPRE_MEMORY_HOST); + } + + if (ii == local_num_rownnz) + { + i = i + 1; + for (; i < num_rownnz_old; i++) + { + hypre_TFree(hypre_AuxParCSRMatrixAuxJ(matrix)[rownnz_old[i]], + HYPRE_MEMORY_HOST); + hypre_TFree(hypre_AuxParCSRMatrixAuxData(matrix)[rownnz_old[i]], + HYPRE_MEMORY_HOST); + } + break; + } } - hypre_AuxParCSRMatrixRowSpace(matrix) = row_space; } - hypre_AuxParCSRMatrixAuxJ(matrix) = aux_j; - hypre_AuxParCSRMatrixAuxData(matrix) = aux_data; + hypre_TFree(rownnz_old, HYPRE_MEMORY_HOST); + + hypre_AuxParCSRMatrixLocalNumRownnz(matrix) = local_num_rownnz; + hypre_AuxParCSRMatrixRownnz(matrix) = rownnz; } else { - hypre_AuxParCSRMatrixIndxDiag(matrix) = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); - hypre_AuxParCSRMatrixIndxOffd(matrix) = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); + hypre_TFree(rownnz_old, HYPRE_MEMORY_HOST); + hypre_AuxParCSRMatrixLocalNumRownnz(matrix) = local_num_rows; + hypre_AuxParCSRMatrixRownnz(matrix) = NULL; } - return 0; + return hypre_error_flag; } /*-------------------------------------------------------------------------- - * hypre_AuxParCSRMatrixSetMaxOffProcElmts + * hypre_AuxParCSRMatrixInitialize_v2 *--------------------------------------------------------------------------*/ - HYPRE_Int -hypre_AuxParCSRMatrixSetMaxOffPRocElmts( hypre_AuxParCSRMatrix *matrix, - HYPRE_Int max_off_proc_elmts ) +hypre_AuxParCSRMatrixInitialize_v2( hypre_AuxParCSRMatrix *matrix, + HYPRE_MemoryLocation memory_location ) { - HYPRE_Int ierr = 0; - hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) = max_off_proc_elmts; - return ierr; + HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(matrix); + HYPRE_Int max_off_proc_elmts = hypre_AuxParCSRMatrixMaxOffProcElmts(matrix); + + hypre_AuxParCSRMatrixMemoryLocation(matrix) = memory_location; + + if (local_num_rows < 0) + { + return -1; + } + + if (local_num_rows == 0) + { + return 0; + } + + if (memory_location != HYPRE_MEMORY_HOST) + { + /* GPU assembly */ + hypre_AuxParCSRMatrixNeedAux(matrix) = 1; + } + else + { + /* CPU assembly */ + /* allocate stash for setting or adding off processor values */ + if (max_off_proc_elmts > 0) + { + hypre_AuxParCSRMatrixOffProcI(matrix) = hypre_CTAlloc(HYPRE_BigInt, 2*max_off_proc_elmts, HYPRE_MEMORY_HOST); + hypre_AuxParCSRMatrixOffProcJ(matrix) = hypre_CTAlloc(HYPRE_BigInt, max_off_proc_elmts, HYPRE_MEMORY_HOST); + hypre_AuxParCSRMatrixOffProcData(matrix) = hypre_CTAlloc(HYPRE_Complex, max_off_proc_elmts, HYPRE_MEMORY_HOST); + } + + if (hypre_AuxParCSRMatrixNeedAux(matrix)) + { + HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(matrix); + HYPRE_Int *rownnz = hypre_AuxParCSRMatrixRownnz(matrix); + HYPRE_BigInt **aux_j = hypre_CTAlloc(HYPRE_BigInt *, local_num_rows, HYPRE_MEMORY_HOST); + HYPRE_Complex **aux_data = hypre_CTAlloc(HYPRE_Complex *, local_num_rows, HYPRE_MEMORY_HOST); + + HYPRE_Int local_num_rownnz; + HYPRE_Int i, ii; + + if (row_space) + { + /* Count number of nonzero rows */ + local_num_rownnz = 0; + for (i = 0; i < local_num_rows; i++) + { + if (row_space[i] > 0) + { + local_num_rownnz++; + } + } + + if (local_num_rownnz != local_num_rows) + { + rownnz = hypre_CTAlloc(HYPRE_Int, local_num_rownnz, HYPRE_MEMORY_HOST); + + /* Find nonzero rows */ + local_num_rownnz = 0; + for (i = 0; i < local_num_rows; i++) + { + if (row_space[i] > 0) + { + rownnz[local_num_rownnz++] = i; + } + } + + hypre_AuxParCSRMatrixLocalNumRownnz(matrix) = local_num_rownnz; + hypre_AuxParCSRMatrixRownnz(matrix) = rownnz; + } + } + + if (!hypre_AuxParCSRMatrixRowLength(matrix)) + { + hypre_AuxParCSRMatrixRowLength(matrix) = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); + } + + if (row_space) + { + if (local_num_rownnz != local_num_rows) + { + for (i = 0; i < local_num_rownnz; i++) + { + ii = rownnz[i]; + aux_j[ii] = hypre_CTAlloc(HYPRE_BigInt, row_space[ii], HYPRE_MEMORY_HOST); + aux_data[ii] = hypre_CTAlloc(HYPRE_Complex, row_space[ii], HYPRE_MEMORY_HOST); + } + } + else + { + for (i = 0; i < local_num_rows; i++) + { + aux_j[i] = hypre_CTAlloc(HYPRE_BigInt, row_space[i], HYPRE_MEMORY_HOST); + aux_data[i] = hypre_CTAlloc(HYPRE_Complex, row_space[i], HYPRE_MEMORY_HOST); + } + } + } + else + { + row_space = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); + for (i = 0; i < local_num_rows; i++) + { + row_space[i] = 30; + aux_j[i] = hypre_CTAlloc(HYPRE_BigInt, 30, HYPRE_MEMORY_HOST); + aux_data[i] = hypre_CTAlloc(HYPRE_Complex, 30, HYPRE_MEMORY_HOST); + } + hypre_AuxParCSRMatrixRowSpace(matrix) = row_space; + } + hypre_AuxParCSRMatrixAuxJ(matrix) = aux_j; + hypre_AuxParCSRMatrixAuxData(matrix) = aux_data; + } + else + { + hypre_AuxParCSRMatrixIndxDiag(matrix) = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); + hypre_AuxParCSRMatrixIndxOffd(matrix) = hypre_CTAlloc(HYPRE_Int, local_num_rows, HYPRE_MEMORY_HOST); + } + } + + return hypre_error_flag; } +HYPRE_Int +hypre_AuxParCSRMatrixInitialize(hypre_AuxParCSRMatrix *matrix) +{ + if (matrix) + { + return hypre_AuxParCSRMatrixInitialize_v2(matrix, hypre_AuxParCSRMatrixMemoryLocation(matrix)); + } + + return -2; +} diff --git a/src/IJ_mv/aux_parcsr_matrix.h b/src/IJ_mv/aux_parcsr_matrix.h index 40d92ce58..7cdeb7b11 100644 --- a/src/IJ_mv/aux_parcsr_matrix.h +++ b/src/IJ_mv/aux_parcsr_matrix.h @@ -22,61 +22,104 @@ typedef struct { - HYPRE_Int local_num_rows; /* defines number of rows on this processors */ - HYPRE_Int local_num_cols; /* defines number of cols of diag */ - - HYPRE_Int need_aux; /* if need_aux = 1, aux_j, aux_data are used to - generate the parcsr matrix (default), - for need_aux = 0, data is put directly into - parcsr structure (requires the knowledge of - offd_i and diag_i ) */ - - HYPRE_Int *row_length; /* row_length_diag[i] contains number of stored - elements in i-th row */ - HYPRE_Int *row_space; /* row_space_diag[i] contains space allocated to - i-th row */ - HYPRE_BigInt **aux_j; /* contains collected column indices */ - HYPRE_Complex **aux_data; /* contains collected data */ - - HYPRE_Int *indx_diag; /* indx_diag[i] points to first empty space of portion - in diag_j , diag_data assigned to row i */ - HYPRE_Int *indx_offd; /* indx_offd[i] points to first empty space of portion - in offd_j , offd_data assigned to row i */ - HYPRE_Int max_off_proc_elmts; /* length of off processor stash set for - SetValues and AddTOValues */ - HYPRE_Int current_num_elmts; /* current no. of elements stored in stash */ - HYPRE_Int off_proc_i_indx; /* pointer to first empty space in - set_off_proc_i_set */ - HYPRE_BigInt *off_proc_i; /* length 2*num_off_procs_elmts, contains info pairs - (code, no. of elmts) where code contains global - row no., only used for AddToValues */ - HYPRE_BigInt *off_proc_j; /* contains column indices */ - HYPRE_Complex *off_proc_data; /* contains corresponding data */ + HYPRE_Int local_num_rows; /* defines number of rows on this processor */ + HYPRE_Int local_num_rownnz; /* defines number of nonzero rows on this processor */ + HYPRE_Int local_num_cols; /* defines number of cols of diag */ + + HYPRE_Int need_aux; /* if need_aux = 1, aux_j, aux_data are used to + generate the parcsr matrix (default), + for need_aux = 0, data is put directly into + parcsr structure (requires the knowledge of + offd_i and diag_i ) */ + + HYPRE_Int *rownnz; /* row_nnz[i] contains the i-th nonzero row id */ + HYPRE_Int *row_length; /* row_length[i] contains number of stored + elements in i-th row */ + HYPRE_Int *row_space; /* row_space[i] contains space allocated to + i-th row */ + + HYPRE_Int *diag_sizes; /* user input row lengths of diag */ + HYPRE_Int *offd_sizes; /* user input row lengths of diag */ + + HYPRE_BigInt **aux_j; /* contains collected column indices */ + HYPRE_Complex **aux_data; /* contains collected data */ + + HYPRE_Int *indx_diag; /* indx_diag[i] points to first empty space of portion + in diag_j , diag_data assigned to row i */ + HYPRE_Int *indx_offd; /* indx_offd[i] points to first empty space of portion + in offd_j , offd_data assigned to row i */ + + HYPRE_Int max_off_proc_elmts; /* length of off processor stash set for + SetValues and AddTOValues */ + HYPRE_Int current_off_proc_elmts; /* current no. of elements stored in stash */ + HYPRE_Int off_proc_i_indx; /* pointer to first empty space in + set_off_proc_i_set */ + HYPRE_BigInt *off_proc_i; /* length 2*num_off_procs_elmts, contains info pairs + (code, no. of elmts) where code contains global + row no. if SetValues, and (-global row no. -1) + if AddToValues */ + HYPRE_BigInt *off_proc_j; /* contains column indices + * ( global col id.) if SetValues, + * (-global col id. -1) if AddToValues */ + HYPRE_Complex *off_proc_data; /* contains corresponding data */ + + HYPRE_MemoryLocation memory_location; + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) + HYPRE_Int max_stack_elmts; + HYPRE_Int current_stack_elmts; + HYPRE_BigInt *stack_i; + HYPRE_BigInt *stack_j; + HYPRE_Complex *stack_data; + char *stack_sora; /* Set (1) or Add (0) */ + HYPRE_Int usr_on_proc_elmts; /* user given num elmt on-proc */ + HYPRE_Int usr_off_proc_elmts; /* user given num elmt off-proc */ + HYPRE_Real init_alloc_factor; + HYPRE_Real grow_factor; +#endif } hypre_AuxParCSRMatrix; /*-------------------------------------------------------------------------- * Accessor functions for the Parallel CSR Matrix structure *--------------------------------------------------------------------------*/ -#define hypre_AuxParCSRMatrixLocalNumRows(matrix) ((matrix) -> local_num_rows) -#define hypre_AuxParCSRMatrixLocalNumCols(matrix) ((matrix) -> local_num_cols) +#define hypre_AuxParCSRMatrixLocalNumRows(matrix) ((matrix) -> local_num_rows) +#define hypre_AuxParCSRMatrixLocalNumRownnz(matrix) ((matrix) -> local_num_rownnz) +#define hypre_AuxParCSRMatrixLocalNumCols(matrix) ((matrix) -> local_num_cols) -#define hypre_AuxParCSRMatrixNeedAux(matrix) ((matrix) -> need_aux) -#define hypre_AuxParCSRMatrixRowLength(matrix) ((matrix) -> row_length) -#define hypre_AuxParCSRMatrixRowSpace(matrix) ((matrix) -> row_space) -#define hypre_AuxParCSRMatrixAuxJ(matrix) ((matrix) -> aux_j) -#define hypre_AuxParCSRMatrixAuxData(matrix) ((matrix) -> aux_data) +#define hypre_AuxParCSRMatrixNeedAux(matrix) ((matrix) -> need_aux) +#define hypre_AuxParCSRMatrixRownnz(matrix) ((matrix) -> rownnz) +#define hypre_AuxParCSRMatrixRowLength(matrix) ((matrix) -> row_length) +#define hypre_AuxParCSRMatrixRowSpace(matrix) ((matrix) -> row_space) +#define hypre_AuxParCSRMatrixAuxJ(matrix) ((matrix) -> aux_j) +#define hypre_AuxParCSRMatrixAuxData(matrix) ((matrix) -> aux_data) -#define hypre_AuxParCSRMatrixIndxDiag(matrix) ((matrix) -> indx_diag) -#define hypre_AuxParCSRMatrixIndxOffd(matrix) ((matrix) -> indx_offd) +#define hypre_AuxParCSRMatrixIndxDiag(matrix) ((matrix) -> indx_diag) +#define hypre_AuxParCSRMatrixIndxOffd(matrix) ((matrix) -> indx_offd) -#define hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) -#define hypre_AuxParCSRMatrixCurrentNumElmts(matrix) ((matrix) -> current_num_elmts) -#define hypre_AuxParCSRMatrixOffProcIIndx(matrix) ((matrix) -> off_proc_i_indx) -#define hypre_AuxParCSRMatrixOffProcI(matrix) ((matrix) -> off_proc_i) -#define hypre_AuxParCSRMatrixOffProcJ(matrix) ((matrix) -> off_proc_j) -#define hypre_AuxParCSRMatrixOffProcData(matrix) ((matrix) -> off_proc_data) -#define hypre_AuxParCSRMatrixAuxOffdJ(matrix) ((matrix) -> aux_offd_j) -//#define hypre_AuxParCSRMatrixCancelIndx(matrix) ((matrix) -> cancel_indx) +#define hypre_AuxParCSRMatrixDiagSizes(matrix) ((matrix) -> diag_sizes) +#define hypre_AuxParCSRMatrixOffdSizes(matrix) ((matrix) -> offd_sizes) +#define hypre_AuxParCSRMatrixMaxOffProcElmts(matrix) ((matrix) -> max_off_proc_elmts) +#define hypre_AuxParCSRMatrixCurrentOffProcElmts(matrix) ((matrix) -> current_off_proc_elmts) +#define hypre_AuxParCSRMatrixOffProcIIndx(matrix) ((matrix) -> off_proc_i_indx) +#define hypre_AuxParCSRMatrixOffProcI(matrix) ((matrix) -> off_proc_i) +#define hypre_AuxParCSRMatrixOffProcJ(matrix) ((matrix) -> off_proc_j) +#define hypre_AuxParCSRMatrixOffProcData(matrix) ((matrix) -> off_proc_data) + +#define hypre_AuxParCSRMatrixMemoryLocation(matrix) ((matrix) -> memory_location) + +#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP) +#define hypre_AuxParCSRMatrixMaxStackElmts(matrix) ((matrix) -> max_stack_elmts) +#define hypre_AuxParCSRMatrixCurrentStackElmts(matrix) ((matrix) -> current_stack_elmts) +#define hypre_AuxParCSRMatrixStackI(matrix) ((matrix) -> stack_i) +#define hypre_AuxParCSRMatrixStackJ(matrix) ((matrix) -> stack_j) +#define hypre_AuxParCSRMatrixStackData(matrix) ((matrix) -> stack_data) +#define hypre_AuxParCSRMatrixStackSorA(matrix) ((matrix) -> stack_sora) +#define hypre_AuxParCSRMatrixUsrOnProcElmts(matrix) ((matrix) -> usr_on_proc_elmts) +#define hypre_AuxParCSRMatrixUsrOffProcElmts(matrix) ((matrix) -> usr_off_proc_elmts) +#define hypre_AuxParCSRMatrixInitAllocFactor(matrix) ((matrix) -> init_alloc_factor) +#define hypre_AuxParCSRMatrixGrowFactor(matrix) ((matrix) -> grow_factor) #endif + +#endif /* #ifndef hypre_AUX_PARCSR_MATRIX_HEADER */ diff --git a/src/IJ_mv/headers b/src/IJ_mv/headers index 06f077fc6..045449581 100755 --- a/src/IJ_mv/headers +++ b/src/IJ_mv/headers @@ -12,15 +12,15 @@ INTERNAL_HEADER=_hypre_IJ_mv.h cat > $INTERNAL_HEADER <<@ -#include +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ #ifndef hypre_IJ_HEADER #define hypre_IJ_HEADER -#include "_hypre_utilities.h" -#include "seq_mv.h" +#include #include "_hypre_parcsr_mv.h" #include "HYPRE_IJ_mv.h" +#include "HYPRE.h" #ifdef __cplusplus extern "C" { @@ -36,11 +36,11 @@ cat aux_parcsr_matrix.h >> $INTERNAL_HEADER cat aux_par_vector.h >> $INTERNAL_HEADER cat IJ_matrix.h >> $INTERNAL_HEADER cat IJ_vector.h >> $INTERNAL_HEADER +cat protos.h >> $INTERNAL_HEADER -../utilities/protos aux_*.c >> $INTERNAL_HEADER -../utilities/protos IJ*.c >> $INTERNAL_HEADER -../utilities/protos HYPRE_IJ*.c >> $INTERNAL_HEADER - +#../utilities/protos aux_*.c >> $INTERNAL_HEADER +#../utilities/protos IJ*.c >> $INTERNAL_HEADER +#../utilities/protos HYPRE_IJ*.c >> $INTERNAL_HEADER #=========================================================================== # Include guards diff --git a/src/IJ_mv/protos.h b/src/IJ_mv/protos.h new file mode 100644 index 000000000..b1216b94f --- /dev/null +++ b/src/IJ_mv/protos.h @@ -0,0 +1,144 @@ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + +/* aux_parcsr_matrix.c */ +HYPRE_Int hypre_AuxParCSRMatrixCreate ( hypre_AuxParCSRMatrix **aux_matrix , HYPRE_Int local_num_rows , HYPRE_Int local_num_cols , HYPRE_Int *sizes ); +HYPRE_Int hypre_AuxParCSRMatrixDestroy ( hypre_AuxParCSRMatrix *matrix ); +HYPRE_Int hypre_AuxParCSRMatrixSetRownnz ( hypre_AuxParCSRMatrix *matrix ); +HYPRE_Int hypre_AuxParCSRMatrixInitialize ( hypre_AuxParCSRMatrix *matrix ); +HYPRE_Int hypre_AuxParCSRMatrixInitialize_v2( hypre_AuxParCSRMatrix *matrix, HYPRE_MemoryLocation memory_location ); + +/* aux_par_vector.c */ +HYPRE_Int hypre_AuxParVectorCreate ( hypre_AuxParVector **aux_vector ); +HYPRE_Int hypre_AuxParVectorDestroy ( hypre_AuxParVector *vector ); +HYPRE_Int hypre_AuxParVectorInitialize ( hypre_AuxParVector *vector ); +HYPRE_Int hypre_AuxParVectorInitialize_v2( hypre_AuxParVector *vector, HYPRE_MemoryLocation memory_location ); + +/* IJ_assumed_part.c */ +HYPRE_Int hypre_IJMatrixCreateAssumedPartition ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJVectorCreateAssumedPartition ( hypre_IJVector *vector ); + +/* IJMatrix.c */ +HYPRE_Int hypre_IJMatrixGetRowPartitioning ( HYPRE_IJMatrix matrix , HYPRE_BigInt **row_partitioning ); +HYPRE_Int hypre_IJMatrixGetColPartitioning ( HYPRE_IJMatrix matrix , HYPRE_BigInt **col_partitioning ); +HYPRE_Int hypre_IJMatrixSetObject ( HYPRE_IJMatrix matrix , void *object ); + +/* IJMatrix_isis.c */ +HYPRE_Int hypre_IJMatrixSetLocalSizeISIS ( hypre_IJMatrix *matrix , HYPRE_Int local_m , HYPRE_Int local_n ); +HYPRE_Int hypre_IJMatrixCreateISIS ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetRowSizesISIS ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixSetDiagRowSizesISIS ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixSetOffDiagRowSizesISIS ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixInitializeISIS ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixInsertBlockISIS ( hypre_IJMatrix *matrix , HYPRE_Int m , HYPRE_Int n , HYPRE_Int *rows , HYPRE_Int *cols , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAddToBlockISIS ( hypre_IJMatrix *matrix , HYPRE_Int m , HYPRE_Int n , HYPRE_BigInt *rows , HYPRE_BigInt *cols , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixInsertRowISIS ( hypre_IJMatrix *matrix , HYPRE_Int n , HYPRE_BigInt row , HYPRE_BigInt *indices , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAddToRowISIS ( hypre_IJMatrix *matrix , HYPRE_Int n , HYPRE_BigInt row , HYPRE_BigInt *indices , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAssembleISIS ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixDistributeISIS ( hypre_IJMatrix *matrix , HYPRE_BigInt *row_starts , HYPRE_BigInt *col_starts ); +HYPRE_Int hypre_IJMatrixApplyISIS ( hypre_IJMatrix *matrix , hypre_ParVector *x , hypre_ParVector *b ); +HYPRE_Int hypre_IJMatrixDestroyISIS ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetTotalSizeISIS ( hypre_IJMatrix *matrix , HYPRE_Int size ); + +/* IJMatrix_parcsr.c */ +HYPRE_Int hypre_IJMatrixCreateParCSR ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetRowSizesParCSR ( hypre_IJMatrix *matrix , const HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixSetDiagOffdSizesParCSR ( hypre_IJMatrix *matrix , const HYPRE_Int *diag_sizes , const HYPRE_Int *offdiag_sizes ); +HYPRE_Int hypre_IJMatrixSetMaxOffProcElmtsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int hypre_IJMatrixInitializeParCSR ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixGetRowCountsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_BigInt *rows , HYPRE_Int *ncols ); +HYPRE_Int hypre_IJMatrixGetValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , HYPRE_BigInt *rows , HYPRE_BigInt *cols , HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixSetValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixSetAddValuesParCSRDevice ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values, const char *action ); +HYPRE_Int hypre_IJMatrixSetConstantValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Complex value ); +HYPRE_Int hypre_IJMatrixAddToValuesParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixDestroyParCSR ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixAssembleOffProcValsParCSR ( hypre_IJMatrix *matrix , HYPRE_Int off_proc_i_indx , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_MemoryLocation memory_location , HYPRE_BigInt *off_proc_i , HYPRE_BigInt *off_proc_j , HYPRE_Complex *off_proc_data ); +HYPRE_Int hypre_FillResponseIJOffProcVals ( void *p_recv_contact_buf , HYPRE_Int contact_size , HYPRE_Int contact_proc , void *ro , MPI_Comm comm , void **p_send_response_buf , HYPRE_Int *response_message_size ); +HYPRE_Int hypre_FindProc ( HYPRE_BigInt *list , HYPRE_BigInt value , HYPRE_Int list_length ); +HYPRE_Int hypre_IJMatrixAssembleParCSR ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetValuesOMPParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixAddToValuesOMPParCSR ( hypre_IJMatrix *matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_Int *row_indexes , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJMatrixAssembleParCSRDevice(hypre_IJMatrix *matrix); +HYPRE_Int hypre_IJMatrixInitializeParCSR_v2(hypre_IJMatrix *matrix, HYPRE_MemoryLocation memory_location); +HYPRE_Int hypre_IJMatrixSetConstantValuesParCSRDevice( hypre_IJMatrix *matrix, HYPRE_Complex value ); + +/* IJMatrix_petsc.c */ +HYPRE_Int hypre_IJMatrixSetLocalSizePETSc ( hypre_IJMatrix *matrix , HYPRE_Int local_m , HYPRE_Int local_n ); +HYPRE_Int hypre_IJMatrixCreatePETSc ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetRowSizesPETSc ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixSetDiagRowSizesPETSc ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixSetOffDiagRowSizesPETSc ( hypre_IJMatrix *matrix , HYPRE_Int *sizes ); +HYPRE_Int hypre_IJMatrixInitializePETSc ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixInsertBlockPETSc ( hypre_IJMatrix *matrix , HYPRE_Int m , HYPRE_Int n , HYPRE_BigInt *rows , HYPRE_BigInt *cols , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAddToBlockPETSc ( hypre_IJMatrix *matrix , HYPRE_Int m , HYPRE_Int n , HYPRE_Int *rows , HYPRE_Int *cols , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixInsertRowPETSc ( hypre_IJMatrix *matrix , HYPRE_Int n , HYPRE_BigInt row , HYPRE_BigInt *indices , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAddToRowPETSc ( hypre_IJMatrix *matrix , HYPRE_Int n , HYPRE_BigInt row , HYPRE_BigInt *indices , HYPRE_Complex *coeffs ); +HYPRE_Int hypre_IJMatrixAssemblePETSc ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixDistributePETSc ( hypre_IJMatrix *matrix , HYPRE_BigInt *row_starts , HYPRE_BigInt *col_starts ); +HYPRE_Int hypre_IJMatrixApplyPETSc ( hypre_IJMatrix *matrix , hypre_ParVector *x , hypre_ParVector *b ); +HYPRE_Int hypre_IJMatrixDestroyPETSc ( hypre_IJMatrix *matrix ); +HYPRE_Int hypre_IJMatrixSetTotalSizePETSc ( hypre_IJMatrix *matrix , HYPRE_Int size ); + +/* IJVector.c */ +HYPRE_Int hypre_IJVectorDistribute ( HYPRE_IJVector vector , const HYPRE_Int *vec_starts ); +HYPRE_Int hypre_IJVectorZeroValues ( HYPRE_IJVector vector ); + +/* IJVector_parcsr.c */ +HYPRE_Int hypre_IJVectorCreatePar ( hypre_IJVector *vector , HYPRE_BigInt *IJpartitioning ); +HYPRE_Int hypre_IJVectorDestroyPar ( hypre_IJVector *vector ); +HYPRE_Int hypre_IJVectorInitializePar ( hypre_IJVector *vector ); +HYPRE_Int hypre_IJVectorInitializePar_v2(hypre_IJVector *vector, HYPRE_MemoryLocation memory_location); +HYPRE_Int hypre_IJVectorSetMaxOffProcElmtsPar ( hypre_IJVector *vector , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int hypre_IJVectorDistributePar ( hypre_IJVector *vector , const HYPRE_Int *vec_starts ); +HYPRE_Int hypre_IJVectorZeroValuesPar ( hypre_IJVector *vector ); +HYPRE_Int hypre_IJVectorSetValuesPar ( hypre_IJVector *vector , HYPRE_Int num_values , const HYPRE_BigInt *indices , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJVectorAddToValuesPar ( hypre_IJVector *vector , HYPRE_Int num_values , const HYPRE_BigInt *indices , const HYPRE_Complex *values ); +HYPRE_Int hypre_IJVectorAssemblePar ( hypre_IJVector *vector ); +HYPRE_Int hypre_IJVectorGetValuesPar ( hypre_IJVector *vector , HYPRE_Int num_values , const HYPRE_BigInt *indices , HYPRE_Complex *values ); +HYPRE_Int hypre_IJVectorAssembleOffProcValsPar ( hypre_IJVector *vector , HYPRE_Int max_off_proc_elmts , HYPRE_Int current_num_elmts , HYPRE_MemoryLocation memory_location , HYPRE_BigInt *off_proc_i , HYPRE_Complex *off_proc_data ); +HYPRE_Int hypre_IJVectorSetAddValuesParDevice(hypre_IJVector *vector, HYPRE_Int num_values, const HYPRE_BigInt *indices, const HYPRE_Complex *values, const char *action); +HYPRE_Int hypre_IJVectorAssembleParDevice(hypre_IJVector *vector); + +/* HYPRE_IJMatrix.c */ +HYPRE_Int HYPRE_IJMatrixCreate ( MPI_Comm comm , HYPRE_BigInt ilower , HYPRE_BigInt iupper , HYPRE_BigInt jlower , HYPRE_BigInt jupper , HYPRE_IJMatrix *matrix ); +HYPRE_Int HYPRE_IJMatrixDestroy ( HYPRE_IJMatrix matrix ); +HYPRE_Int HYPRE_IJMatrixInitialize ( HYPRE_IJMatrix matrix ); +HYPRE_Int HYPRE_IJMatrixSetPrintLevel ( HYPRE_IJMatrix matrix , HYPRE_Int print_level ); +HYPRE_Int HYPRE_IJMatrixSetValues ( HYPRE_IJMatrix matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJMatrixSetConstantValues ( HYPRE_IJMatrix matrix , HYPRE_Complex value ); +HYPRE_Int HYPRE_IJMatrixAddToValues ( HYPRE_IJMatrix matrix , HYPRE_Int nrows , HYPRE_Int *ncols , const HYPRE_BigInt *rows , const HYPRE_BigInt *cols , const HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJMatrixAssemble ( HYPRE_IJMatrix matrix ); +HYPRE_Int HYPRE_IJMatrixGetRowCounts ( HYPRE_IJMatrix matrix , HYPRE_Int nrows , HYPRE_BigInt *rows , HYPRE_Int *ncols ); +HYPRE_Int HYPRE_IJMatrixGetValues ( HYPRE_IJMatrix matrix , HYPRE_Int nrows , HYPRE_Int *ncols , HYPRE_BigInt *rows , HYPRE_BigInt *cols , HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJMatrixSetObjectType ( HYPRE_IJMatrix matrix , HYPRE_Int type ); +HYPRE_Int HYPRE_IJMatrixGetObjectType ( HYPRE_IJMatrix matrix , HYPRE_Int *type ); +HYPRE_Int HYPRE_IJMatrixGetLocalRange ( HYPRE_IJMatrix matrix , HYPRE_BigInt *ilower , HYPRE_BigInt *iupper , HYPRE_BigInt *jlower , HYPRE_BigInt *jupper ); +HYPRE_Int HYPRE_IJMatrixGetObject ( HYPRE_IJMatrix matrix , void **object ); +HYPRE_Int HYPRE_IJMatrixSetRowSizes ( HYPRE_IJMatrix matrix , const HYPRE_Int *sizes ); +HYPRE_Int HYPRE_IJMatrixSetDiagOffdSizes ( HYPRE_IJMatrix matrix , const HYPRE_Int *diag_sizes , const HYPRE_Int *offdiag_sizes ); +HYPRE_Int HYPRE_IJMatrixSetMaxOffProcElmts ( HYPRE_IJMatrix matrix , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int HYPRE_IJMatrixRead ( const char *filename , MPI_Comm comm , HYPRE_Int type , HYPRE_IJMatrix *matrix_ptr ); +HYPRE_Int HYPRE_IJMatrixPrint ( HYPRE_IJMatrix matrix , const char *filename ); +HYPRE_Int HYPRE_IJMatrixSetOMPFlag ( HYPRE_IJMatrix matrix , HYPRE_Int omp_flag ); + +/* HYPRE_IJVector.c */ +HYPRE_Int HYPRE_IJVectorCreate ( MPI_Comm comm , HYPRE_BigInt jlower , HYPRE_BigInt jupper , HYPRE_IJVector *vector ); +HYPRE_Int HYPRE_IJVectorDestroy ( HYPRE_IJVector vector ); +HYPRE_Int HYPRE_IJVectorInitialize ( HYPRE_IJVector vector ); +HYPRE_Int HYPRE_IJVectorSetPrintLevel ( HYPRE_IJVector vector , HYPRE_Int print_level ); +HYPRE_Int HYPRE_IJVectorSetValues ( HYPRE_IJVector vector , HYPRE_Int nvalues , const HYPRE_BigInt *indices , const HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJVectorAddToValues ( HYPRE_IJVector vector , HYPRE_Int nvalues , const HYPRE_BigInt *indices , const HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJVectorAssemble ( HYPRE_IJVector vector ); +HYPRE_Int HYPRE_IJVectorGetValues ( HYPRE_IJVector vector , HYPRE_Int nvalues , const HYPRE_BigInt *indices , HYPRE_Complex *values ); +HYPRE_Int HYPRE_IJVectorSetMaxOffProcElmts ( HYPRE_IJVector vector , HYPRE_Int max_off_proc_elmts ); +HYPRE_Int HYPRE_IJVectorSetObjectType ( HYPRE_IJVector vector , HYPRE_Int type ); +HYPRE_Int HYPRE_IJVectorGetObjectType ( HYPRE_IJVector vector , HYPRE_Int *type ); +HYPRE_Int HYPRE_IJVectorGetLocalRange ( HYPRE_IJVector vector , HYPRE_BigInt *jlower , HYPRE_BigInt *jupper ); +HYPRE_Int HYPRE_IJVectorGetObject ( HYPRE_IJVector vector , void **object ); +HYPRE_Int HYPRE_IJVectorRead ( const char *filename , MPI_Comm comm , HYPRE_Int type , HYPRE_IJVector *vector_ptr ); +HYPRE_Int HYPRE_IJVectorPrint ( HYPRE_IJVector vector , const char *filename ); diff --git a/src/blas/CMakeLists.txt b/src/blas/CMakeLists.txt index 136301397..fd6ce340b 100644 --- a/src/blas/CMakeLists.txt +++ b/src/blas/CMakeLists.txt @@ -3,6 +3,12 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +set(HDRS + _hypre_blas.h + f2c.h + hypre_blas.h +) + set(SRCS dasum.c daxpy.c @@ -30,8 +36,11 @@ set(SRCS xerbla.c ) -convert_filenames_to_full_paths(SRCS) - -set(HYPRE_SOURCES ${HYPRE_SOURCES} ${SRCS} PARENT_SCOPE) +target_sources(${PROJECT_NAME} + PRIVATE ${SRCS} + ${HDRS} +) +convert_filenames_to_full_paths(HDRS) +set(HYPRE_HEADERS ${HYPRE_HEADERS} ${HDRS} PARENT_SCOPE) diff --git a/src/config/HYPREConfig.cmake.in b/src/config/HYPREConfig.cmake.in new file mode 100644 index 000000000..f89e19d84 --- /dev/null +++ b/src/config/HYPREConfig.cmake.in @@ -0,0 +1,61 @@ +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) + +set(HYPRE_ENABLE_SHARED @HYPRE_ENABLE_SHARED@) +set(HYPRE_ENABLE_BIGINT @HYPRE_ENABLE_BIGINT@) +set(HYPRE_ENABLE_MIXEDINT @HYPRE_ENABLE_MIXEDINT@) +set(HYPRE_ENABLE_SINGLE @HYPRE_ENABLE_SINGLE@) +set(HYPRE_ENABLE_LONG_DOUBLE @HYPRE_ENABLE_LONG_DOUBLE@) +set(HYPRE_ENABLE_COMPLEX @HYPRE_ENABLE_COMPLEX@) +set(HYPRE_ENABLE_HYPRE_BLAS @HYPRE_ENABLE_HYPRE_BLAS@) +set(HYPRE_ENABLE_HYPRE_LAPACK @HYPRE_ENABLE_HYPRE_LAPACK@) +set(HYPRE_ENABLE_PERSISTENT_COMM @HYPRE_ENABLE_PERSISTENT_COMM@) +set(HYPRE_ENABLE_FEI @HYPRE_ENABLE_FEI@) +set(HYPRE_WITH_MPI @HYPRE_WITH_MPI@) +set(HYPRE_WITH_OPENMP @HYPRE_WITH_OPENMP@) +set(HYPRE_WITH_HOPSCOTCH @HYPRE_WITH_HOPSCOTCH@) +set(HYPRE_USING_DSUPERLU @HYPRE_USING_DSUPERLU@) +set(HYPRE_WITH_CALIPER @HYPRE_WITH_CALIPER@) +set(HYPRE_PRINT_ERRORS @HYPRE_PRINT_ERRORS@) +set(HYPRE_TIMING @HYPRE_TIMING@) +set(HYPRE_BUILD_EXAMPLES @HYPRE_BUILD_EXAMPLES@) +set(HYPRE_BUILD_TESTS @HYPRE_BUILD_TESTS@) +set(HYPRE_USING_HOST_MEMORY @HYPRE_USING_HOST_MEMORY@) +set(HYPRE_WITH_CUDA @HYPRE_WITH_CUDA@) +set(HYPRE_ENABLE_UNIFIED_MEMORY @HYPRE_ENABLE_UNIFIED_MEMORY@) +set(HYPRE_ENABLE_CUDA_STREAMS @HYPRE_ENABLE_CUDA_STREAMS@) +set(HYPRE_ENABLE_CUSPARSE @HYPRE_ENABLE_CUSPARSE@) +set(HYPRE_ENABLE_DEVICE_POOL @HYPRE_ENABLE_DEVICE_POOL@) +set(HYPRE_ENABLE_CUBLAS @HYPRE_ENABLE_CUBLAS@) +set(HYPRE_ENABLE_CURAND @HYPRE_ENABLE_CURAND@) +set(HYPRE_ENABLE_GPU_PROFILING @HYPRE_ENABLE_GPU_PROFILING@) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") + +if(NOT HYPRE_ENABLE_HYPRE_BLAS) + find_dependency(BLAS) +endif() + +if(NOT HYPRE_ENABLE_HYPRE_LAPACK) + find_dependency(LAPACK) +endif() + +if(HYPRE_USING_DSUPERLU) + list(APPEND TPL_LIBRARIES @TPL_DSUPERLU_LIBRARIES@ stdc++) +endif() + +if(HYPRE_WITH_MPI) + find_dependency(MPI @MPI_C_VERSION@ EXACT) +endif() + +if(HYPRE_WITH_OPENMP) + find_dependency(OpenMP) +endif() + +include("${CMAKE_CURRENT_LIST_DIR}/HYPRETargets.cmake") diff --git a/src/config/HYPRE_config.h.cmake.in b/src/config/HYPRE_config.h.cmake.in index 137b1d3d2..a2ae68e20 100644 --- a/src/config/HYPRE_config.h.cmake.in +++ b/src/config/HYPRE_config.h.cmake.in @@ -7,65 +7,104 @@ #define HYPRE_RELEASE_NAME "@CMAKE_PROJECT_NAME@" #define HYPRE_RELEASE_VERSION "@HYPRE_VERSION@" +#define HYPRE_RELEASE_NUMBER @HYPRE_NUMBER@ #define HYPRE_RELEASE_DATE "@HYPRE_DATE@" #define HYPRE_RELEASE_TIME "@HYPRE_TIME@" #define HYPRE_RELEASE_BUGS "@HYPRE_BUGS@" /* Use long long int for HYPRE_BigInt */ -#cmakedefine HYPRE_MIXEDINT +#cmakedefine HYPRE_MIXEDINT 1 /* Use long long int for HYPRE_BigInt and HYPRE_Int*/ -#cmakedefine HYPRE_BIGINT +#cmakedefine HYPRE_BIGINT 1 /* Use single precision values for HYPRE_Real */ -#cmakedefine HYPRE_SINGLE +#cmakedefine HYPRE_SINGLE 1 /* Use quad precision values for HYPRE_Real */ -#cmakedefine HYPRE_LONG_DOUBLE +#cmakedefine HYPRE_LONG_DOUBLE 1 /* Use complex values */ -#cmakedefine HYPRE_COMPLEX +#cmakedefine HYPRE_COMPLEX 1 + +/* Debug mode */ +#cmakedefine HYPRE_DEBUG 1 /* Define to be the max dimension size (must be at least 3) */ #define HYPRE_MAXDIM 3 /* Use persistent communication */ -#cmakedefine HYPRE_USING_PERSISTENT_COMM +#cmakedefine HYPRE_USING_PERSISTENT_COMM 1 /* Use hopscotch hashing */ -#cmakedefine HYPRE_HOPSCOTCH +#cmakedefine HYPRE_HOPSCOTCH 1 /* Compile without MPI */ -#cmakedefine HYPRE_SEQUENTIAL +#cmakedefine HYPRE_SEQUENTIAL 1 /* Use HYPRE timing routines */ -#cmakedefine HYPRE_TIMING +#cmakedefine HYPRE_TIMING 1 /* Use internal BLAS library */ -#cmakedefine HYPRE_USING_HYPRE_BLAS +#cmakedefine HYPRE_USING_HYPRE_BLAS 1 /* Use internal LAPACK library */ -#cmakedefine HYPRE_USING_HYPRE_LAPACK - -/* Use assumed partition */ -#cmakedefine HYPRE_NO_GLOBAL_PARTITION +#cmakedefine HYPRE_USING_HYPRE_LAPACK 1 /* Print HYPRE errors */ -#cmakedefine HYPRE_PRINT_ERRORS +#cmakedefine HYPRE_PRINT_ERRORS 1 /* Use OpenMP */ -#cmakedefine HYPRE_USING_OPENMP +#cmakedefine HYPRE_USING_OPENMP 1 /* Use Caliper instrumentation */ -#cmakedefine HYPRE_USING_CALIPER +#cmakedefine HYPRE_USING_CALIPER 1 + +/* Use if executing on device with CUDA */ +#cmakedefine HYPRE_USING_CUDA 1 + +/* Use cuBLAS */ +#cmakedefine HYPRE_USING_CUBLAS 1 + +/* Use CUDA streams */ +#cmakedefine HYPRE_USING_CUDA_STREAMS 1 + +/* Use cuRAND */ +#cmakedefine HYPRE_USING_CURAND 1 + +/* Use cuSPARSE */ +#cmakedefine HYPRE_USING_CUSPARSE 1 + +/* Use device memory pool */ +#cmakedefine HYPRE_USING_DEVICE_POOL 1 + +/* Use unified memory */ +#cmakedefine HYPRE_USING_UNIFIED_MEMORY 1 + +/* Use device memory without UM */ +#cmakedefine HYPRE_USING_DEVICE_MEMORY 1 + +/* Use if executing on device with OpenMP */ +#cmakedefine HYPRE_USING_DEVICE_OPENMP 1 + +/* Use if executing on GPU device */ +#cmakedefine HYPRE_USING_GPU 1 + +/* Use HIP */ +#cmakedefine HYPRE_USING_HIP 1 + +/* Use NVTX */ +#cmakedefine HYPRE_USING_NVTX 1 /* Use SuperLU_Dist */ -#cmakedefine HYPRE_USING_DSUPERLU +#cmakedefine HYPRE_USING_DSUPERLU 1 /* Use SuperLU */ -#cmakedefine HAVE_SUPERLU +#cmakedefine HAVE_SUPERLU 1 + +/* Use MPI */ +#cmakedefine HYPRE_HAVE_MPI 1 -/* #undef HYPRE_HAVE_MPI */ /* #undef HYPRE_HAVE_MPI_COMM_F2C */ /* Define as follows to set the Fortran name mangling scheme: @@ -89,3 +128,6 @@ /* As F77_FUNC, but for C identifiers containing underscores. */ #define HYPRE_F77_FUNC_(name,NAME) name ## __ + +/* Define to 1 if using host memory only */ +#cmakedefine HYPRE_USING_HOST_MEMORY 1 diff --git a/src/config/HYPRE_config.h.in b/src/config/HYPRE_config.h.in index dd1e59b94..98425a654 100644 --- a/src/config/HYPRE_config.h.in +++ b/src/config/HYPRE_config.h.in @@ -1,190 +1,272 @@ -/****************************************************************************** - * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other - * HYPRE Project Developers. See the top-level COPYRIGHT file for details. - * - * SPDX-License-Identifier: (Apache-2.0 OR MIT) - ******************************************************************************/ - /* config/HYPRE_config.h.in. Generated from configure.in by autoheader. */ -/* Release name */ -#undef HYPRE_RELEASE_NAME +/* Define to dummy `main' function (if any) required to link to the Fortran + libraries. */ +#undef FC_DUMMY_MAIN -/* Version number */ -#undef HYPRE_RELEASE_VERSION +/* Define if F77 and FC dummy `main' functions are identical. */ +#undef FC_DUMMY_MAIN_EQ_F77 -/* Date of release */ -#undef HYPRE_RELEASE_DATE +/* Define to a macro mangling the given C identifier (in lower and upper + case), which must not contain underscores, for linking with Fortran. */ +#undef FC_FUNC -/* Time of release */ -#undef HYPRE_RELEASE_TIME +/* As FC_FUNC, but for C identifiers containing underscores. */ +#undef FC_FUNC_ -/* Bug reports */ -#undef HYPRE_RELEASE_BUGS +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H -/* Define to 1 for Solaris. */ -#undef HYPRE_SOLARIS +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H -/* Define to 1 for Linux on platforms running any version of CHAOS */ -#undef HYPRE_LINUX_CHAOS +/* Define to 1 if using MLI */ +#undef HAVE_MLI -/* Define to 1 for Linux platforms */ -#undef HYPRE_LINUX +/* Define to 1 if you have the `MPI_Comm_f2c' function. */ +#undef HAVE_MPI_COMM_F2C -/* Define to 1 for Alpha platforms */ -#undef HYPRE_ALPHA +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H -/* Define to 1 for RS6000 platforms */ -#undef HYPRE_RS6000 +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H -/* Define to 1 for IRIX64 platforms */ -#undef HYPRE_IRIX64 +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H -/* Define to 1 if using long long int for HYPRE_BigInt */ -#undef HYPRE_MIXEDINT +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H -/* Define to 1 if using long long int for HYPRE_Int and HYPRE_BigInt */ -#undef HYPRE_BIGINT +/* Define to 1 if using SuperLU */ +#undef HAVE_SUPERLU -/* Define to 1 if using single precision values for HYPRE_Real */ -#undef HYPRE_SINGLE +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H -/* Define to 1 if using quad precision values for HYPRE_Real */ -#undef HYPRE_LONG_DOUBLE +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 for Alpha platforms */ +#undef HYPRE_ALPHA + +/* Define to 1 if using long long int for HYPRE_Int and HYPRE_BigInt */ +#undef HYPRE_BIGINT /* Define to 1 if using complex values */ #undef HYPRE_COMPLEX -/* Define to be the max dimension size (must be at least 3) */ -#undef HYPRE_MAXDIM +/* Define to 1 if in debug mode */ +#undef HYPRE_DEBUG -/* Define to 1 if using persistent communication */ -#undef HYPRE_USING_PERSISTENT_COMM +/* Define to 1 if using OpenMP on device [target alloc version] */ +#undef HYPRE_DEVICE_OPENMP_ALLOC -/* Define to 1 if hopscotch hashing */ -#undef HYPRE_HOPSCOTCH +/* Define to 1 if strictly checking OpenMP offload directives */ +#undef HYPRE_DEVICE_OPENMP_CHECK + +/* Define as follows to set the Fortran name mangling scheme: 0 = unspecified; + 1 = no underscores; 2 = one underscore; 3 = two underscores; 4 = caps, no + underscores; 5 = one underscore before and after */ +#undef HYPRE_FMANGLE + +/* BLAS mangling */ +#undef HYPRE_FMANGLE_BLAS + +/* LAPACK mangling */ +#undef HYPRE_FMANGLE_LAPACK /* Define to 1 if an MPI library is found */ #undef HYPRE_HAVE_MPI -/* Define to 1 if Node Aware MPI library is used */ -#undef HYPRE_USING_NODE_AWARE_MPI - /* Define to 1 if the routine MPI_Comm_f2c is found */ #undef HYPRE_HAVE_MPI_COMM_F2C -/* Disable MPI, enable serial codes */ -#undef HYPRE_SEQUENTIAL +/* Define to 1 if hopscotch hashing */ +#undef HYPRE_HOPSCOTCH -/* Using HYPRE timing routines */ -#undef HYPRE_TIMING +/* Define to 1 for HP platforms */ +#undef HYPRE_HPPA -/* Using dxml for BLAS */ -#undef HYPRE_USING_DXML +/* Define to 1 for IRIX64 platforms */ +#undef HYPRE_IRIX64 -/* Using essl for BLAS */ -#undef HYPRE_USING_ESSL +/* Define to 1 for Linux platform */ +#undef HYPRE_LINUX -/* Using internal Hypre routines */ -#undef HYPRE_USING_HYPRE_BLAS +/* Define to 1 for Linux on platforms running any version of CHAOS */ +#undef HYPRE_LINUX_CHAOS -/* Using internal Hypre routines */ -#undef HYPRE_USING_HYPRE_LAPACK +/* Define to 1 if using quad precision values for HYPRE_Real */ +#undef HYPRE_LONG_DOUBLE + +/* Define to be the max dimension size (must be at least 3) */ +#undef HYPRE_MAXDIM -/* No global partitioning being used */ -#undef HYPRE_NO_GLOBAL_PARTITION +/* Define to 1 if using long long int for HYPRE_BigInt */ +#undef HYPRE_MIXEDINT /* Print HYPRE errors */ #undef HYPRE_PRINT_ERRORS -/* Enable OpenMP support */ -#undef HYPRE_USING_OPENMP +/* Bug reports */ +#undef HYPRE_RELEASE_BUGS -/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * MEMORY - *- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/ +/* Date of release */ +#undef HYPRE_RELEASE_DATE -/* Define to 1 if using host memory only */ -#undef HYPRE_USING_HOST_MEMORY +/* Release name */ +#undef HYPRE_RELEASE_NAME -/* Define to 1 if using device memory without UM */ -#undef HYPRE_USING_DEVICE_MEMORY +/* Release number */ +#undef HYPRE_RELEASE_NUMBER -/* Define to 1 if using unified memory */ -#undef HYPRE_USING_UNIFIED_MEMORY +/* Time of release */ +#undef HYPRE_RELEASE_TIME + +/* Release version */ +#undef HYPRE_RELEASE_VERSION + +/* Define to 1 for RS6000 platforms */ +#undef HYPRE_RS6000 + +/* Disable MPI, enable serial codes. */ +#undef HYPRE_SEQUENTIAL + +/* Define to 1 if using single precision values for HYPRE_Real */ +#undef HYPRE_SINGLE + +/* Define to 1 for Solaris. */ +#undef HYPRE_SOLARIS + +/* Using HYPRE timing routines */ +#undef HYPRE_TIMING + +/* Define to 1 if Caliper instrumentation is enabled */ +#undef HYPRE_USING_CALIPER + +/* Define to 1 if using cuBLAS */ +#undef HYPRE_USING_CUBLAS -/*- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * EXECUTION - *- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/ /* Define to 1 if executing on device with CUDA */ #undef HYPRE_USING_CUDA -/* Define to 1 if executing on device with OpenMP */ +/* Define to 1 if using CUDA streams */ +#undef HYPRE_USING_CUDA_STREAMS + +/* Define to 1 if using cuRAND */ +#undef HYPRE_USING_CURAND + +/* Define to 1 if using cuSPARSE */ +#undef HYPRE_USING_CUSPARSE + +/* Define to 1 if using device memory without UM */ +#undef HYPRE_USING_DEVICE_MEMORY + +/* Define to 1 if executing on device with OpenMP */ #undef HYPRE_USING_DEVICE_OPENMP -/* Define to 1 if using OpenMP on device [target alloc version] */ -#undef HYPRE_DEVICE_OPENMP_ALLOC +/* Define to 1 if using device pooling allocator */ +#undef HYPRE_USING_DEVICE_POOL + +/* Define to 1 if using DSuperLU */ +#undef HYPRE_USING_DSUPERLU -/* Define to 1 if using OpenMP on device [target mapped version] */ -#undef HYPRE_DEVICE_OPENMP_MAPPED +/* Using dxml for Blas */ +#undef HYPRE_USING_DXML -/* Define to 1 if strictly checking OpenMP offload directives */ -#undef HYPRE_DEVICE_OPENMP_CHECK +/* Using ESSL for Lapack */ +#undef HYPRE_USING_ESSL -/* Define to 1 if executing on host/device with RAJA */ -#undef HYPRE_USING_RAJA +/* Define to 1 if executing on GPU device */ +#undef HYPRE_USING_GPU + +/* HIP being used */ +#undef HYPRE_USING_HIP + +/* Define to 1 if using host memory only */ +#undef HYPRE_USING_HOST_MEMORY + +/* Using internal HYPRE routines */ +#undef HYPRE_USING_HYPRE_BLAS + +/* Using internal HYPRE routines */ +#undef HYPRE_USING_HYPRE_LAPACK /* Define to 1 if executing on host/device with KOKKOS */ #undef HYPRE_USING_KOKKOS -/* Define to 1 if using NVIDIA Tools Extension (NVTX) */ +/* Define to 1 if want to track memory operations in hypre */ +#undef HYPRE_USING_MEMORY_TRACKER + +/* Define to 1 if Node Aware MPI library is used */ +#undef HYPRE_USING_NODE_AWARE_MPI + +/* NVTX being used */ #undef HYPRE_USING_NVTX -/* Define to 1 if using cuSPARSE */ -#undef HYPRE_USING_CUSPARSE +/* Enable OpenMP support */ +#undef HYPRE_USING_OPENMP -/* Define to 1 if using cuBLAS */ -#undef HYPRE_USING_CUBLAS +/* Define to 1 if using persistent communication */ +#undef HYPRE_USING_PERSISTENT_COMM -/* Define to 1 if using cuRAND */ -#undef HYPRE_USING_CURAND +/* Define to 1 if executing on host/device with RAJA */ +#undef HYPRE_USING_RAJA -/* Define to 1 if using GPU aware MPI */ -#undef HYPRE_WITH_GPU_AWARE_MPI +/* rocBLAS being used */ +#undef HYPRE_USING_ROCBLAS -/* Define to 1 if using CUDA streams */ -#undef HYPRE_USING_CUDA_STREAMS +/* rocRAND being used */ +#undef HYPRE_USING_ROCRAND -/* Define as follows to set the Fortran name mangling scheme: - * 0 = unspecified - * 1 = no underscores - * 2 = one underscore - * 3 = two underscores - * 4 = caps, no underscores - * 5 = one underscore before and after */ -#undef HYPRE_FMANGLE +/* rocSPARSE being used */ +#undef HYPRE_USING_ROCSPARSE -/* Define as in HYPRE_FMANGLE to set the BLAS name mangling scheme */ -#undef HYPRE_FMANGLE_BLAS +/* Define to 1 if using AMD rocTX profiling */ +#undef HYPRE_USING_ROCTX -/* Define as in HYPRE_FMANGLE to set the LAPACK name mangling scheme */ -#undef HYPRE_FMANGLE_LAPACK +/* Define to 1 if using UMPIRE */ +#undef HYPRE_USING_UMPIRE -/* Define to a macro mangling the given C identifier (in lower and upper - * case), which must not contain underscores, for linking with Fortran. */ -#undef FC_FUNC +/* Define to 1 if using UMPIRE for device memory */ +#undef HYPRE_USING_UMPIRE_DEVICE -/* As HYPRE_FC_FUNC, but for C identifiers containing underscores. */ -#undef FC_FUNC_ +/* Define to 1 if using UMPIRE for host memory */ +#undef HYPRE_USING_UMPIRE_HOST -/* Define to 1 if Caliper instrumentation is enabled */ -#undef HYPRE_USING_CALIPER +/* Define to 1 if using UMPIRE for pinned memory */ +#undef HYPRE_USING_UMPIRE_PINNED -/* Define to 1 if using SuperLU */ -#undef HAVE_SUPERLU +/* Define to 1 if using UMPIRE for unified memory */ +#undef HYPRE_USING_UMPIRE_UM -/* Define to 1 if using DSuperLU */ -#undef HYPRE_USING_DSUPERLU +/* Define to 1 if using unified memory */ +#undef HYPRE_USING_UNIFIED_MEMORY -/* Define to 1 if using MLI */ -#undef HAVE_MLI +/* Define to 1 if using GPU aware MPI */ +#undef HYPRE_WITH_GPU_AWARE_MPI + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS diff --git a/src/config/Makefile.config.in b/src/config/Makefile.config.in index 6e29c531d..a99223c34 100644 --- a/src/config/Makefile.config.in +++ b/src/config/Makefile.config.in @@ -34,32 +34,37 @@ HYPRE_INC_INSTALL = @HYPRE_INCINSTALL@ HYPRE_LIB_SUFFIX = @HYPRE_LIBSUFFIX@ .SUFFIXES: -.SUFFIXES: .o .f .c .C .cxx .cc .cu +.SUFFIXES: .o .obj .f .c .C .cxx .cc + +FC = @FC@ +FFLAGS = @FFLAGS@ @FCFLAGS@ $(FC_COMPILE_FLAGS) + +CC = @CC@ +CFLAGS = @CFLAGS@ @DEFS@ $(C_COMPILE_FLAGS) + +CXX = @CXX@ +CXXFLAGS = @CXXFLAGS@ @DEFS@ $(CXX_COMPILE_FLAGS) + +CUCC = @CUCC@ ${CUDA_ARCH} +CUFLAGS = @CUFLAGS@ @DEFS@ ${C_COMPILE_FLAGS} .f.o: - $(FC) $(FFLAGS) -c $< + $(FC) $(FFLAGS) -c $< .c.o: - $(CC) $(CFLAGS) -c $< + $(CC) $(CFLAGS) -c $< .C.o: $(CXX) $(CXXFLAGS) -c $< .cxx.o: $(CXX) $(CXXFLAGS) -c $< .cc.o: $(CXX) $(CXXFLAGS) -c $< -#.cu.o: -# $(CUCC) $(CUFLAGS) -c $< - -FC = @FC@ -FFLAGS = @FFLAGS@ @FCFLAGS@ $(FC_COMPILE_FLAGS) - -CC = @CC@ -CFLAGS = @CFLAGS@ @DEFS@ $(C_COMPILE_FLAGS) - -CXX = @CXX@ -CXXFLAGS = @CXXFLAGS@ @DEFS@ $(CXX_COMPILE_FLAGS) - -#CUCC = @CUCC@ -#CUFLAGS = @CUFLAGS@ @DEFS@ ${CU_COMPILE_FLAGS} +ifeq ($(CUCC), ) +.c.obj: + $(CC) $(CFLAGS) -c $< -o $@ +else +.c.obj: + $(CUCC) $(CUFLAGS) -c $< -o $@ +endif LINK_FC = @LINK_FC@ LINK_CC = @LINK_CC@ @@ -82,19 +87,19 @@ AR = @AR@ RANLIB = @RANLIB@ LDFLAGS = @LDFLAGS@ -LIBS = @LIBS@ @CALIPER_LIBS@ @HYPRE_CUDA_LIBS@ @RAJA_LIBS@ @KOKKOS_LIBS@ +LIBS = @LIBS@ ${CALIPER_LIBS} ${HYPRE_CUDA_LIBS} ${HYPRE_HIP_LIBS} ${HYPRE_RAJA_LIB_DIR} ${HYPRE_RAJA_LIB} ${HYPRE_KOKKOS_LIB_DIR} ${HYPRE_KOKKOS_LIB} ${HYPRE_UMPIRE_LIB_DIR} ${HYPRE_UMPIRE_LIB} FLIBS = @FLIBS@ -INCLUDES = @CALIPER_INCLUDE@ @HYPRE_RAJA_INCLUDE@ @HYPRE_KOKKOS_INCLUDE@ @HYPRE_CUDA_INCL@ @HYPRE_NAP_INCLUDE@ +INCLUDES = ${CALIPER_INCLUDE} ${HYPRE_CUDA_INCLUDE} ${HYPRE_HIP_INCLUDE} ${HYPRE_RAJA_INCLUDE} ${HYPRE_KOKKOS_INCLUDE} ${HYPRE_UMPIRE_INCLUDE} ${HYPRE_NAP_INCLUDE} ################################################################## -## LAPACK Library Flags +## LAPACK Library Flags ################################################################## LAPACKLIBS = @LAPACKLIBS@ LAPACKLIBDIRS = @LAPACKLIBDIRS@ ################################################################## -## BLAS Library Flags +## BLAS Library Flags ################################################################## BLASLIBS = @BLASLIBS@ BLASLIBDIRS = @BLASLIBDIRS@ @@ -115,8 +120,16 @@ HYPRE_NAP_INCLUDE = @HYPRE_NAP_INCLUDE@ ################################################################## ## CUDA options ################################################################## -HYPRE_CUDA_INCL = @HYPRE_CUDA_INCL@ -HYPRE_CUDA_LIBS = @HYPRE_CUDA_LIBS@ +HYPRE_CUDA_PATH = @HYPRE_CUDA_PATH@ +HYPRE_CUDA_INCLUDE = @HYPRE_CUDA_INCLUDE@ +HYPRE_CUDA_LIBS = @HYPRE_CUDA_LIBS@ +CUDA_ARCH = @HYPRE_CUDA_GENCODE@ + +################################################################## +## HIP options +################################################################## +HYPRE_HIP_INCLUDE = @HYPRE_HIP_INCL@ +HYPRE_HIP_LIBS = @HYPRE_HIP_LIBS@ ################################################################## ## Caliper options @@ -167,4 +180,10 @@ HYPRE_KOKKOS_SRC_DIR = @HYPRE_KOKKOS_SRC_DIR@ HYPRE_KOKKOS_LIB_DIR = @HYPRE_KOKKOS_LIB_DIR@ HYPRE_KOKKOS_INCLUDE = @HYPRE_KOKKOS_INCLUDE@ HYPRE_KOKKOS_LIB = @HYPRE_KOKKOS_LIB@ -@HYPRE_KOKKOS_INC_FILE@ + +################################################################## +## UMPIRE options +################################################################## +HYPRE_UMPIRE_LIB_DIR = @HYPRE_UMPIRE_LIB_DIR@ +HYPRE_UMPIRE_INCLUDE = @HYPRE_UMPIRE_INCLUDE@ +HYPRE_UMPIRE_LIB = @HYPRE_UMPIRE_LIB@ diff --git a/src/config/Makefile.config.saved b/src/config/Makefile.config.saved deleted file mode 100644 index 015998674..000000000 --- a/src/config/Makefile.config.saved +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other -# HYPRE Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - -############################################################################### -## All configurable variables are defined in the file named Makefile.config.in -## When Autoconf is run, it will create a file named Makefile.config which -## will have all of the configurable variables replaced with their values. -############################################################################### - - - -srcdir = . -top_srcdir = .. -prefix = /g/g92/li50/workspace/hypre/src/hypre -exec_prefix = ${prefix} - - - -HYPRE_RELEASE_NAME = hypre -HYPRE_RELEASE_VERSION = 2.13.0 -HYPRE_RELEASE_DATE = 2017/10/20 -HYPRE_RELEASE_TIME = 00:00:00 -HYPRE_RELEASE_BUGS = hypre-support@llnl.gov - -HYPRE_SRC_TOP_DIR = /g/g92/li50/workspace/hypre/src -HYPRE_BUILD_DIR = /g/g92/li50/workspace/hypre/src/hypre - -HYPRE_INSTALL_DIR = NONE -HYPRE_LIB_INSTALL = ${exec_prefix}/lib -HYPRE_INC_INSTALL = ${prefix}/include - -HYPRE_LIB_SUFFIX = .a - -.SUFFIXES: -.SUFFIXES: .o .f .c .C .cxx .cc .cu - -.f.o: - $(FC) $(FFLAGS) -c $< -.c.o: - $(CC) $(CFLAGS) -c $< -.C.o: - $(CXX) $(CXXFLAGS) -c $< -.cxx.o: - $(CXX) $(CXXFLAGS) -c $< -.cc.o: - $(CXX) $(CXXFLAGS) -c $< -.cu.o: - $(NVCC) $(NVCCFLAGS) -c $< - -FC = mpif77 -FFLAGS = -O2 -g $(FC_COMPILE_FLAGS) - -CC = mpixlc-gpu -CFLAGS = -O -O3 -DHYPRE_USE_OMP45 -DHAVE_CONFIG_H $(C_COMPILE_FLAGS) - -CXX = mpixlC-gpu -CXXFLAGS = -O -O3 -DHYPRE_USE_OMP45 -DHAVE_CONFIG_H $(CXX_COMPILE_FLAGS) - -NVCC = nvcc -NVCCFLAGS = -NVCCLIBS = - -LINK_FC = mpif77 -LINK_CC = mpixlc-gpu -LINK_CXX = mpixlC-gpu - -BUILD_FC_SHARED = -BUILD_CC_SHARED = -BUILD_CXX_SHARED = -SHARED_COMPILE_FLAG = -SHARED_BUILD_FLAG = -SHARED_SET_SONAME = -SHARED_OPTIONS = - -BUILD_PYTHON = 0 -PYTHON = - -BUILD_JAVA = 0 - -AR = ar -rcu -RANLIB = ranlib - -LDFLAGS = -LIBS = -lstdc++ -lm -FLIBS = - -INCLUDES = - -################################################################## -## LAPACK Library Flags -################################################################## -LAPACKLIBS = -LAPACKLIBDIRS = - -################################################################## -## BLAS Library Flags -################################################################## -BLASLIBS = -BLASLIBDIRS = - -################################################################## -## MPI options -################################################################## -MPIINCLUDE = -MPILIBDIRS = -MPILIBS = -MPIFLAGS = - -################################################################## -## NVCC options -################################################################## -HYPRE_NVCC_MAKEFILE = Makefile.empty - -################################################################## -## Caliper options -################################################################## -CALIPER_INCLUDE = -CALIPER_LIBS = - -################################################################## -## SuperLU options -################################################################## -SUPERLU_INCLUDE = -SUPERLU_LIBS = - -################################################################## -## DsuperLU options -################################################################## -DSUPERLU_INCLUDE = -DSUPERLU_LIBS = - -################################################################## -## FEI options -################################################################## -HYPRE_FEI_SRC_DIR = /g/g92/li50/workspace/hypre/src/FEI_mv -HYPRE_FEI_BASE_DIR = /g/g92/li50/workspace/hypre/src/FEI_mv/fei-base -HYPRE_FEI_SUBDIRS = fei-hypre -HYPRE_FEI_HYPRE_FILES = /g/g92/li50/workspace/hypre/src/FEI_mv/fei-hypre/*.o -HYPRE_FEI_FEMLI_FILES = - -################################################################## -## RAJA options -################################################################## -HYPRE_RAJA_LIB_DIR = -HYPRE_RAJA_INCLUDE = -HYPRE_RAJA_LIB = - -################################################################## -## kokkos options -################################################################## -HYPRE_KOKKOS_SRC_DIR = -HYPRE_KOKKOS_LIB_DIR = -HYPRE_KOKKOS_INCLUDE = -HYPRE_KOKKOS_LIB = - diff --git a/src/config/bootstrap b/src/config/bootstrap deleted file mode 100755 index 6a63facb6..000000000 --- a/src/config/bootstrap +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh -# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other -# HYPRE Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - -if [ -f "configure.in" ]; then - echo "File configure.in already exists!" - echo "Make sure this script is being run from the root source directory." - exit -fi - -ln -s config/configure.in . - -rm -rf aclocal.m4 configure autom4te.cache - -autoconf --include=config -rm configure.in - -cat >> configure < HYPRE_config.h -rm -f HYPRE_config.h.tmp - -EOF - -# Update release information for CMake build system -# NOTE: Using '#' as delimiter in sed to allow for '/' in vdate -vnumb=`utilities/version -number` -vdate=`utilities/version -date` -vtime=`utilities/version -time` -sed -e 's#HYPRE_VERSION [^)]*#HYPRE_VERSION '$vnumb'#' CMakeLists.txt | -sed -e 's#HYPRE_DATE [^)]*#HYPRE_DATE '$vdate'#' | -sed -e 's#HYPRE_TIME [^)]*#HYPRE_TIME '$vtime'#' > CMakeLists.txt.tmp -mv CMakeLists.txt.tmp CMakeLists.txt - -# Update release information in documentation -(cd docs; ./update-release.sh) diff --git a/src/config/cmake/hypre_CMakeUtilities.cmake b/src/config/cmake/HYPRE_CMakeUtilities.cmake similarity index 64% rename from src/config/cmake/hypre_CMakeUtilities.cmake rename to src/config/cmake/HYPRE_CMakeUtilities.cmake index 2fce1a1c8..0a1e8c8be 100644 --- a/src/config/cmake/hypre_CMakeUtilities.cmake +++ b/src/config/cmake/HYPRE_CMakeUtilities.cmake @@ -1,3 +1,8 @@ +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + # A handy function to add the current source directory to a local # filename. To be used for creating a list of sources. function(convert_filenames_to_full_paths NAMES) @@ -15,18 +20,24 @@ function(add_hypre_executables EXE_SRCS) foreach(SRC_FILE IN LISTS ${EXE_SRCS}) get_filename_component(SRC_FILENAME ${SRC_FILE} NAME) + if (HYPRE_USING_CUDA) + # If CUDA is enabled, tag source files to be compiled with nvcc. + set_source_files_properties(${SRC_FILENAME} PROPERTIES LANGUAGE CUDA) + endif (HYPRE_USING_CUDA) + string(REPLACE ".c" "" EXE_NAME ${SRC_FILENAME}) # Actually add the exe add_executable(${EXE_NAME} ${SRC_FILE}) + # Link libraries - set (HYPRE_LIBS "HYPRE") + set(HYPRE_LIBS "HYPRE") # Link libraries for Unix systems if (UNIX) - list (APPEND HYPRE_LIBS m) + list(APPEND HYPRE_LIBS m) endif (UNIX) - + # Append the additional libraries and options - target_link_libraries(${EXE_NAME} PRIVATE "${HYPRE_LIBS}") + target_link_libraries(${EXE_NAME} PRIVATE "${HYPRE_LIBS}") endforeach(SRC_FILE) endfunction() diff --git a/src/config/cmake/HYPRE_SetupCUDAToolkit.cmake b/src/config/cmake/HYPRE_SetupCUDAToolkit.cmake new file mode 100644 index 000000000..1ff259c31 --- /dev/null +++ b/src/config/cmake/HYPRE_SetupCUDAToolkit.cmake @@ -0,0 +1,67 @@ +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +# This handles the non-compiler aspect of the CUDA toolkit. +# Uses cmake find_package to locate the NVIDIA CUDA C tools +# for shared libraries. Otherwise for static libraries, assumes +# the libraries are located in ${CUDA_TOOLKIT_ROOT_DIR}/lib64. +# Please set cmake variable CUDA_TOOLKIT_ROOT_DIR. + +# TODO Eventually should require cmake>=3.17 +# and use cmake's FindCUDAToolkit (also helps handle +# shared vs. static libraries). + +# Collection of CUDA optional libraries +set(EXPORT_INTERFACE_CUDA_LIBS "") + +if (NOT CUDA_FOUND) + find_package(CUDA REQUIRED) +endif () + +if (HYPRE_ENABLE_CUSPARSE) + set(HYPRE_USING_CUSPARSE ON CACHE BOOL "" FORCE) + if (HYPRE_SHARED) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS ${CUDA_cusparse_LIBRARY}) + else () + list(APPEND EXPORT_INTERFACE_CUDA_LIBS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusparse_static.a) + endif () +endif () + +if (HYPRE_ENABLE_CURAND) + set(HYPRE_USING_CURAND ON CACHE BOOL "" FORCE) + if (HYPRE_SHARED) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS ${CUDA_curand_LIBRARY}) + else () + list(APPEND EXPORT_INTERFACE_CUDA_LIBS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a) + endif () +endif () + +if (HYPRE_ENABLE_CUBLAS) + set(HYPRE_USING_CUBLAS ON CACHE BOOL "" FORCE) + if (HYPRE_SHARED) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS ${CUDA_CUBLAS_LIBRARIES}) + else () + list(APPEND EXPORT_INTERFACE_CUDA_LIBS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublasLt_static.a) + endif (HYPRE_SHARED) +endif (HYPRE_ENABLE_CUBLAS) + +if (NOT HYPRE_SHARED) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a) +endif () + +if (HYPRE_ENABLE_GPU_PROFILING) + set(HYPRE_USING_NVTX ON CACHE BOOL "" FORCE) + find_library(NVTX_LIBRARY + NAME libnvToolsExt.so + PATHS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib) + message(STATUS "NVidia tools extension library found in " ${NVTX_LIBRARY}) + list(APPEND EXPORT_INTERFACE_CUDA_LIBS ${NVTX_LIBRARY}) +endif (HYPRE_ENABLE_GPU_PROFILING) diff --git a/src/config/compile b/src/config/compile deleted file mode 100755 index a81e000ae..000000000 --- a/src/config/compile +++ /dev/null @@ -1,136 +0,0 @@ -#! /bin/sh -# Wrapper for compilers which do not understand `-c -o'. - -scriptversion=2003-11-09.00 - -# Copyright (C) 1999, 2000, 2003 Free Software Foundation, Inc. -# Written by Tom Tromey . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# This file is maintained in Automake, please report -# bugs to or send patches to -# . - -case $1 in - '') - echo "$0: No command. Try \`$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: compile [--help] [--version] PROGRAM [ARGS] - -Wrapper for compilers which do not understand `-c -o'. -Remove `-o dest.o' from ARGS, run PROGRAM with the remaining -arguments, and rename the output as expected. - -If you are trying to build a whole package this is not the -right script to run: please start by reading the file `INSTALL'. - -Report bugs to . -EOF - exit 0 - ;; - -v | --v*) - echo "compile $scriptversion" - exit 0 - ;; -esac - - -prog=$1 -shift - -ofile= -cfile= -args= -while test $# -gt 0; do - case "$1" in - -o) - # configure might choose to run compile as `compile cc -o foo foo.c'. - # So we do something ugly here. - ofile=$2 - shift - case "$ofile" in - *.o | *.obj) - ;; - *) - args="$args -o $ofile" - ofile= - ;; - esac - ;; - *.c) - cfile=$1 - args="$args $1" - ;; - *) - args="$args $1" - ;; - esac - shift -done - -if test -z "$ofile" || test -z "$cfile"; then - # If no `-o' option was seen then we might have been invoked from a - # pattern rule where we don't need one. That is ok -- this is a - # normal compilation that the losing compiler can handle. If no - # `.c' file was seen then we are probably linking. That is also - # ok. - exec "$prog" $args -fi - -# Name of file we expect compiler to create. -cofile=`echo $cfile | sed -e 's|^.*/||' -e 's/\.c$/.o/'` - -# Create the lock directory. -# Note: use `[/.-]' here to ensure that we don't use the same name -# that we are using for the .o file. Also, base the name on the expected -# object file name, since that is what matters with a parallel build. -lockdir=`echo $cofile | sed -e 's|[/.-]|_|g'`.d -while true; do - if mkdir $lockdir > /dev/null 2>&1; then - break - fi - sleep 1 -done -# FIXME: race condition here if user kills between mkdir and trap. -trap "rmdir $lockdir; exit 1" 1 2 15 - -# Run the compile. -"$prog" $args -status=$? - -if test -f "$cofile"; then - mv "$cofile" "$ofile" -fi - -rmdir $lockdir -exit $status - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/src/config/configure.in b/src/config/configure.in index 58e603011..d54ac936d 100644 --- a/src/config/configure.in +++ b/src/config/configure.in @@ -48,10 +48,11 @@ dnl * Set package information so it only has to be modified in one place dnl ********************************************************************* m4_define([M4_HYPRE_NAME], [hypre]) -m4_define([M4_HYPRE_VERSION], [2.18.2]) -m4_define([M4_HYPRE_DATE], [2019/10/28]) +m4_define([M4_HYPRE_VERSION], [2.21.0]) +m4_define([M4_HYPRE_NUMBER], [22100]) +m4_define([M4_HYPRE_DATE], [2021/05/25]) m4_define([M4_HYPRE_TIME], [00:00:00]) -m4_define([M4_HYPRE_BUGS], [hypre-support@llnl.gov]) +m4_define([M4_HYPRE_BUGS], [https://github.com/hypre-space/hypre/issues]) m4_define([M4_HYPRE_SRCDIR], [`pwd`]) m4_include([config/hypre_blas_macros.m4]) @@ -82,19 +83,22 @@ dnl * of the file, 'm4_define' is used to define m4 macros first. dnl ********************************************************************* HYPRE_NAME="M4_HYPRE_NAME" HYPRE_VERSION="M4_HYPRE_VERSION" +HYPRE_NUMBER=M4_HYPRE_NUMBER HYPRE_DATE="M4_HYPRE_DATE" HYPRE_TIME="M4_HYPRE_TIME" HYPRE_BUGS="M4_HYPRE_BUGS" HYPRE_SRCDIR="M4_HYPRE_SRCDIR" AC_DEFINE_UNQUOTED(HYPRE_RELEASE_NAME, ["$HYPRE_NAME"], [Release name]) -AC_DEFINE_UNQUOTED(HYPRE_RELEASE_VERSION, ["$HYPRE_VERSION"], [Version number]) +AC_DEFINE_UNQUOTED(HYPRE_RELEASE_VERSION, ["$HYPRE_VERSION"], [Release version]) +AC_DEFINE_UNQUOTED(HYPRE_RELEASE_NUMBER, [$HYPRE_NUMBER], [Release number]) AC_DEFINE_UNQUOTED(HYPRE_RELEASE_DATE, ["$HYPRE_DATE"], [Date of release]) AC_DEFINE_UNQUOTED(HYPRE_RELEASE_TIME, ["$HYPRE_TIME"], [Time of release]) AC_DEFINE_UNQUOTED(HYPRE_RELEASE_BUGS, ["$HYPRE_BUGS"], [Bug reports]) AC_SUBST(HYPRE_NAME) AC_SUBST(HYPRE_VERSION) +AC_SUBST(HYPRE_NUMBER) AC_SUBST(HYPRE_DATE) AC_SUBST(HYPRE_TIME) AC_SUBST(HYPRE_BUGS) @@ -120,9 +124,10 @@ dnl ********************************************************************* hypre_user_chose_mpi=no hypre_user_chose_blas=no hypre_user_chose_lapack=no -hypre_user_chose_cuda=no hypre_user_chose_raja=no +hypre_using_raja=no hypre_user_chose_kokkos=no +hypre_using_kokkos=no hypre_using_c=yes hypre_using_cxx=yes @@ -144,11 +149,17 @@ hypre_using_cuda=no hypre_using_gpu=no hypre_using_um=no hypre_gpu_mpi=no +hypre_using_gpu_profiling=no hypre_using_cuda_streams=no -hypre_using_nvtx=no hypre_using_cusparse=yes hypre_using_cublas=no hypre_using_curand=yes +hypre_using_device_pool=no +hypre_using_umpire=no +hypre_using_umpire_host=no +hypre_using_umpire_device=no +hypre_using_umpire_um=no +hypre_using_umpire_pinned=no hypre_using_caliper=no hypre_user_gave_caliper_lib=no @@ -157,6 +168,20 @@ hypre_user_gave_caliper_inc=no hypre_found_cuda=no hypre_using_node_aware_mpi=no +hypre_using_memory_tracker=no + + +dnl ********************************************************************* +dnl * Initialize hypre-HIP variables +dnl ********************************************************************* +hypre_using_hip=no +hypre_using_rocsparse=no +hypre_using_rocblas=no +hypre_using_rocrand=no + +hypre_found_hip=no + + dnl ********************************************************************* dnl * Initialize flag-check variables @@ -224,7 +249,7 @@ AS_HELP_STRING([--enable-mixedint], ) if test "$hypre_using_mixedint" = "yes" then - AC_DEFINE(HYPRE_MIXEDINT, 1) + AC_DEFINE(HYPRE_MIXEDINT, 1, [Define to 1 if using long long int for HYPRE_BigInt]) fi AC_ARG_ENABLE(bigint, @@ -240,7 +265,7 @@ AS_HELP_STRING([--enable-bigint], ) if test "$hypre_using_bigint" = "yes" then - AC_DEFINE(HYPRE_BIGINT, 1) + AC_DEFINE(HYPRE_BIGINT, 1, [Define to 1 if using long long int for HYPRE_Int and HYPRE_BigInt]) fi AC_ARG_ENABLE(single, @@ -256,7 +281,7 @@ AS_HELP_STRING([--enable-single], ) if test "$hypre_using_single" = "yes" then - AC_DEFINE(HYPRE_SINGLE, 1) + AC_DEFINE(HYPRE_SINGLE, 1, [Define to 1 if using single precision values for HYPRE_Real]) fi AC_ARG_ENABLE(longdouble, @@ -272,7 +297,7 @@ AS_HELP_STRING([--enable-longdouble], ) if test "$hypre_using_longdouble" = "yes" then - AC_DEFINE(HYPRE_LONG_DOUBLE, 1) + AC_DEFINE(HYPRE_LONG_DOUBLE, 1, [Define to 1 if using quad precision values for HYPRE_Real]) fi AC_ARG_ENABLE(complex, @@ -288,7 +313,7 @@ AS_HELP_STRING([--enable-complex], ) if test "$hypre_using_complex" = "yes" then - AC_DEFINE(HYPRE_COMPLEX, 1) + AC_DEFINE(HYPRE_COMPLEX, 1, [Define to 1 if using complex values]) fi AC_ARG_ENABLE(maxdim, @@ -298,7 +323,7 @@ AS_HELP_STRING([--enable-maxdim=MAXDIM], [hypre_maxdim=${enableval}], [hypre_maxdim=3] ) -AC_DEFINE_UNQUOTED(HYPRE_MAXDIM, [$hypre_maxdim], [Max dimension]) +AC_DEFINE_UNQUOTED(HYPRE_MAXDIM, [$hypre_maxdim], [Define to be the max dimension size (must be at least 3)]) AC_ARG_ENABLE(persistent, AS_HELP_STRING([--enable-persistent], @@ -312,13 +337,13 @@ AS_HELP_STRING([--enable-persistent], ) if test "$hypre_using_persistent" = "yes" then - AC_DEFINE(HYPRE_USING_PERSISTENT_COMM, 1) + AC_DEFINE(HYPRE_USING_PERSISTENT_COMM, 1, [Define to 1 if using persistent communication]) fi AC_ARG_ENABLE(hopscotch, AS_HELP_STRING([--enable-hopscotch], [Uses hopscotch hashing if configured with OpenMP and - atomic capability available(default is NO).]), + atomic capability available(default is NO).]), [case "${enableval}" in yes) hypre_using_hopscotch=yes ;; no) hypre_using_hopscotch=no ;; @@ -328,30 +353,9 @@ AS_HELP_STRING([--enable-hopscotch], ) if test "$hypre_using_hopscotch" = "yes" then - AC_DEFINE(HYPRE_HOPSCOTCH, 1) + AC_DEFINE(HYPRE_HOPSCOTCH, 1, [Define to 1 if hopscotch hashing]) fi -dnl * The --with-no-global-partition option is retained here for -dnl * backward compatibility (no help string is printed). -dnl * The new --enable-global-partition option takes precedence. -hypre_using_global_partition=no -AC_ARG_WITH(no-global-partition,, -[case "${withval}" in - yes) hypre_using_global_partition=no ;; - no) hypre_using_global_partition=yes ;; - *) hypre_using_global_partition=no ;; - esac] -) -AC_ARG_ENABLE(global-partition, -AS_HELP_STRING([--enable-global-partition], - [Use global partitioning (default is NO).]), -[case "${enableval}" in - yes) hypre_using_global_partition=yes ;; - no) hypre_using_global_partition=no ;; - *) hypre_using_global_partition=yes ;; - esac] -) - AC_ARG_ENABLE(fortran, AS_HELP_STRING([--enable-fortran], [Require a working Fortran compiler (default is YES).]), @@ -385,17 +389,6 @@ AS_HELP_STRING([--enable-cuda-streams], [hypre_using_cuda_streams=yes] ) -AC_ARG_ENABLE(nvtx, -AS_HELP_STRING([--enable-nvtx], - [Use NVTX (default is NO).]), -[case "${enableval}" in - yes) hypre_using_nvtx=yes ;; - no) hypre_using_nvtx=no ;; - *) hypre_using_nvtx=no ;; - esac], -[hypre_using_nvtx=no] -) - AC_ARG_ENABLE(cusparse, AS_HELP_STRING([--enable-cusparse], [Use cuSPARSE (default is YES).]), @@ -407,6 +400,17 @@ AS_HELP_STRING([--enable-cusparse], [hypre_using_cusparse=yes] ) +AC_ARG_ENABLE(device-memory-pool, +AS_HELP_STRING([--enable-device-memory-pool], + [Use device pooling allocator (default is NO).]), +[case "${enableval}" in + yes) hypre_using_device_pool=yes ;; + no) hypre_using_device_pool=no ;; + *) hypre_using_device_pool=no ;; + esac], +[hypre_using_device_pool=no] +) + AC_ARG_ENABLE(cublas, AS_HELP_STRING([--enable-cublas], [Use cuBLAS (default is NO).]), @@ -429,6 +433,52 @@ AS_HELP_STRING([--enable-curand], [hypre_using_curand=yes] ) + + +AC_ARG_ENABLE(rocsparse, +AS_HELP_STRING([--enable-rocsparse], + [Use rocSPARSE (default is YES).]), +[case "${enableval}" in + yes) hypre_using_rocsparse=yes ;; + no) hypre_using_rocsparse=no ;; + *) hypre_using_rocsparse=yes ;; + esac], +[hypre_using_rocsparse=yes] +) + +AC_ARG_ENABLE(rocblas, +AS_HELP_STRING([--enable-rocblas], + [Use rocBLAS (default is NO).]), +[case "${enableval}" in + yes) hypre_using_rocblas=yes ;; + no) hypre_using_rocblas=no ;; + *) hypre_using_rocblas=no ;; + esac], +[hypre_using_rocblas=no] +) + +AC_ARG_ENABLE(rocrand, +AS_HELP_STRING([--enable-rocrand], + [Use rocRAND (default is YES).]), +[case "${enableval}" in + yes) hypre_using_rocrand=yes ;; + no) hypre_using_rocrand=no ;; + *) hypre_using_rocrand=yes ;; + esac], +[hypre_using_rocrand=yes] +) + +AC_ARG_ENABLE(gpu-profiling, +AS_HELP_STRING([--enable-gpu-profiling], + [Use NVTX on CUDA, rocTX on HIP (default is NO).]), +[case "${enableval}" in + yes) hypre_using_gpu_profiling=yes ;; + no) hypre_using_gpu_profiling=no ;; + *) hypre_using_gpu_profiling=no ;; + esac], +[hypre_using_gpu_profiling=no] +) + AC_ARG_ENABLE(gpu-aware-mpi, AS_HELP_STRING([--enable-gpu-aware-mpi], [Use GPU memory aware MPI]), @@ -477,6 +527,28 @@ else hypre_user_chose_cxxflags=yes fi +dnl ********************************************************************* +dnl * Determine if user provided CUDA compiler or flags +dnl ********************************************************************* +AC_ARG_VAR([CUDA_HOME], [CUDA home directory]) +AC_ARG_VAR([HYPRE_CUDA_SM], [CUDA architecture]) +AC_ARG_VAR([CUCC], [CUDA compiler command]) +AC_ARG_VAR([CUFLAGS], [CUDA compiler flags]) + +if test "x$CUCC" = "x" +then + hypre_user_chose_cudacompilers=no +else + hypre_user_chose_cudacompilers=yes +fi + +if test "x$CUFLAGS" = "x" +then + hypre_user_chose_cuflags=no +else + hypre_user_chose_cuflags=yes +fi + dnl ********************************************************************* dnl * Determine if user provided fortran compiler or flags dnl ********************************************************************* @@ -590,7 +662,8 @@ AS_HELP_STRING([--with-strict-checking], [Compiles without MPI ('--without-MPI') and tries to find a compiler option that warns of as many non-ISO features as possible.]), -[ +[case "${withval}" in + yes) hypre_user_chose_ccompilers=yes hypre_user_chose_cflags=yes hypre_user_chose_cxxcompilers=yes @@ -647,7 +720,9 @@ AS_HELP_STRING([--with-strict-checking], fi fi - AC_DEFINE(HYPRE_SEQUENTIAL,1,[No MPI being used])] + AC_DEFINE(HYPRE_SEQUENTIAL,1,[No MPI being used]) + ;; + esac] ) dnl ***** MPI @@ -726,6 +801,18 @@ AS_HELP_STRING([--with-node-aware-mpi-include=DIR], hypre_using_node_aware_mpi=yes] ) +dnl ***** memory tracker +AC_ARG_WITH(memory_tracker, +AS_HELP_STRING([--with-memory-tracker], + [Use memory tracker in hypre (default is NO).]), +[case "$withval" in + yes) hypre_using_memory_tracker=yes;; + no) hypre_using_memory_tracker=no ;; + *) hypre_using_memory_tracker=no ;; + esac], +[hypre_using_memory_tracker=no] +) + dnl ***** BLAS AC_ARG_WITH(blas-lib, @@ -832,11 +919,19 @@ AS_HELP_STRING([--with-fmangle=FMANGLE], [FMANGLE contains a string indicating the type of name mangling to use when calling hypre from Fortran. It can be set to: "no-underscores", "one-underscore", "two-underscores", - "caps-no-underscores, and "one-before-after".]), + "caps-no-underscores", and "one-before-after".]), [hypre_fmangle=0; AC_HYPRE_SET_FMANGLE], [hypre_fmangle=0] ) -AC_DEFINE_UNQUOTED(HYPRE_FMANGLE, [$hypre_fmangle], [Fortran mangling]) +AC_DEFINE_UNQUOTED(HYPRE_FMANGLE, + [$hypre_fmangle], +[Define as follows to set the Fortran name mangling scheme: + 0 = unspecified; + 1 = no underscores; + 2 = one underscore; + 3 = two underscores; + 4 = caps, no underscores; + 5 = one underscore before and after]) dnl * Define a generic macro to set hypre_fmangle_blaslapack based on withval AC_DEFUN([AC_HYPRE_SET_FMANGLE_BLAS], @@ -932,7 +1027,7 @@ AS_HELP_STRING([--with-superlu], ) AS_IF([test "x$with_superlu" = "xyes"], - [AC_DEFINE(HAVE_SUPERLU, 1, [Have external SuperLU library.])], + [AC_DEFINE(HAVE_SUPERLU, 1, [Define to 1 if using SuperLU])], []) AC_ARG_WITH(superlu-include, @@ -964,7 +1059,7 @@ AS_HELP_STRING([--with-dsuperlu], ) AS_IF([test "x$with_dsuperlu" = "xyes"], - [AC_DEFINE(HYPRE_USING_DSUPERLU, 1, [Have external DSuperLU library.])], + [AC_DEFINE(HYPRE_USING_DSUPERLU, 1, [Define to 1 if using DSuperLU])], []) AC_ARG_WITH(dsuperlu-include, @@ -1022,39 +1117,63 @@ AC_ARG_WITH(cuda, AS_HELP_STRING([--with-cuda], [Use CUDA. Require cuda-8.0 or higher (default is NO).]), [case "$withval" in - yes) hypre_user_chose_cuda=yes - hypre_using_cuda=yes ;; + yes) hypre_using_cuda=yes ;; no) hypre_using_cuda=no ;; *) hypre_using_cuda=no ;; esac], [hypre_using_cuda=no] ) + +dnl ***** HIP +AC_ARG_WITH(hip, +AS_HELP_STRING([--with-hip], + [Use HIP for AMD GPUs. (default is NO).]), +[case "$withval" in + yes) hypre_using_hip=yes ;; + no) hypre_using_hip=no ;; + *) hypre_using_hip=no ;; + esac], +[hypre_using_hip=no] +) + +AC_ARG_WITH(cuda-home, +AS_HELP_STRING([--with-cuda-home=DIR], + [User specifies CUDA_HOME in DIR.]), +[for cuda_dir in $withval; do + CUDA_HOME="$cuda_dir" + done; + hypre_using_cuda=yes] +) + +AC_ARG_WITH(gpu-arch, +AS_HELP_STRING([--with-gpu-arch=ARG], + [User specifies NVIDIA GPU architecture that the CUDA files will be compiled for in ARG, where ARG is a space-separated + list (enclosed in quotes) of numbers.]), +[ + if test "x${withval}" != "x" + then + if test "x${HYPRE_CUDA_SM}" = "x" + then + HYPRE_CUDA_SM="${withval}" + fi + fi +] +) + dnl ***** RAJA AC_ARG_WITH(raja, AS_HELP_STRING([--with-raja], [Use RAJA. Require RAJA package to be compiled properly (default is NO).]), [case "$withval" in - yes) hypre_user_chose_raja=yes;; - no) hypre_user_chose_raja=no ;; - *) hypre_user_chose_raja=no ;; + yes) hypre_using_raja=yes;; + no) hypre_using_raja=no ;; + *) hypre_using_raja=no ;; esac], [hypre_using_raja=no] ) -dnl ***** Kokkos - -AC_ARG_WITH(kokkos, -AS_HELP_STRING([--with-kokkos], - [Use Kokkos. Require kokkos package to be compiled properly(default is NO).]), -[case "$withval" in - yes) hypre_user_chose_kokkos=yes ;; - no) hypre_user_chose_kokkos=no ;; - *) hypre_user_chose_kokkos=no ;; - esac] -) - AC_ARG_WITH(raja-include, AS_HELP_STRING([--with-raja-include=DIR], [User specifies that RAJA/*.h is in DIR. The options @@ -1100,17 +1219,17 @@ AS_HELP_STRING([--with-raja-lib-dirs=DIRS], hypre_user_chose_raja=yes] ) -AC_ARG_WITH(kokkos-lib-dirs, -AS_HELP_STRING([--with-kokkos-lib-dirs=DIRS], - [DIRS is space-separated list (enclosed in quotes) of - directories containing the libraries and - Makefile.kokkos is assumed to be in DIRS/../ . - The options --with-kokkos-libs and --with-kokkos-dirs - must be used together.]), -[for kokkos_lib_dir in $withval; do - HYPRE_KOKKOS_LIB_DIR="-L$kokkos_lib_dir $HYPRE_KOKKOS_LIB_DIR" - done; -hypre_user_chose_kokkos=yes] +dnl ***** Kokkos + +AC_ARG_WITH(kokkos, +AS_HELP_STRING([--with-kokkos], + [Use Kokkos. Require kokkos package to be compiled properly(default is NO).]), +[case "$withval" in + yes) hypre_using_kokkos=yes ;; + no) hypre_using_kokkos=no ;; + *) hypre_using_kokkos=no ;; + esac], +[hypre_using_kokkos=no] ) AC_ARG_WITH(kokkos-include, @@ -1145,16 +1264,136 @@ AS_HELP_STRING([--with-kokkos-libs=LIBS], hypre_user_chose_kokkos=yes] ) +AC_ARG_WITH(kokkos-lib-dirs, +AS_HELP_STRING([--with-kokkos-lib-dirs=DIRS], + [DIRS is space-separated list (enclosed in quotes) of + directories containing the libraries and + Makefile.kokkos is assumed to be in DIRS/../ . + The options --with-kokkos-libs and --with-kokkos-dirs + must be used together.]), +[for kokkos_lib_dir in $withval; do + HYPRE_KOKKOS_LIB_DIR="-L$kokkos_lib_dir $HYPRE_KOKKOS_LIB_DIR" + done; +hypre_user_chose_kokkos=yes] +) + +dnl **** Umpire + +AC_ARG_WITH(umpire-host, +AS_HELP_STRING([--with-umpire-host], + [Use Umpire Allocator for host memory (default is NO).]), +[case "${withval}" in + yes) hypre_using_umpire_host=yes ;; + no) hypre_using_umpire_host=no ;; + *) hypre_using_umpire_host=no ;; + esac], +[hypre_using_umpire_host=no] +) + +AC_ARG_WITH(umpire-device, +AS_HELP_STRING([--with-umpire-device], + [Use Umpire Allocator for device memory (default is NO).]), +[case "${withval}" in + yes) hypre_using_umpire_device=yes ;; + no) hypre_using_umpire_device=no ;; + *) hypre_using_umpire_device=no ;; + esac], +[hypre_using_umpire_device=no] +) + +AC_ARG_WITH(umpire-um, +AS_HELP_STRING([--with-umpire-um], + [Use Umpire Allocator for unified memory (default is NO).]), +[case "${withval}" in + yes) hypre_using_umpire_um=yes ;; + no) hypre_using_umpire_um=no ;; + *) hypre_using_umpire_um=no ;; + esac], +[hypre_using_umpire_um=no] +) + +AC_ARG_WITH(umpire-pinned, +AS_HELP_STRING([--with-umpire-pinned], + [Use Umpire Allocator for pinned memory (default is NO).]), +[case "${withval}" in + yes) hypre_using_umpire_pinned=yes ;; + no) hypre_using_umpire_pinned=no ;; + *) hypre_using_umpire_pinned=no ;; + esac], +[hypre_using_umpire_pinned=no] +) + +dnl the default setting with Umpire, for device and um +AC_ARG_WITH(umpire, +AS_HELP_STRING([--with-umpire], + [Use Umpire Allocator for device and unified memory (default is NO).]), +[case "${withval}" in + yes) hypre_using_umpire_device=yes + hypre_using_umpire_um=yes ;; + no) ;; + *) ;; + esac], +[] +) + +AC_ARG_WITH(umpire-include, +AS_HELP_STRING([--with-umpire-include=DIR], + [User specifies that UMPIRE headers is in DIR. The options + --with-umpire-include --with-umpire-libs and + --with-umpire-dirs must be used together.]), +[for umpire_dir in $withval; do +HYPRE_UMPIRE_INCLUDE="-I$umpire_dir $HYPRE_UMPIRE_INCLUDE" +done; +] +) + +AC_ARG_WITH(umpire-lib, +AS_HELP_STRING([--with-umpire-lib=LIBS], + [LIBS is space-separated linkable list (enclosed in quotes) of libraries + needed for UMPIRE. OK to use -L and -l flags in the list]), +[for umpire_lib in $withval; do + HYPRE_UMPIRE_LIB="$umpire_lib $HYPRE_UMPIRE_LIB" + done; +] +) + +AC_ARG_WITH(umpire-libs, +AS_HELP_STRING([--with-umpire-libs=LIBS], + [LIBS is space-separated list (enclosed in quotes) of libraries + needed for UMPIRE (base name only). The options --with-umpire-libs and + --with-umpire-dirs must be used together.]), +[for umpire_lib in $withval; do + HYPRE_UMPIRE_LIB="-l$umpire_lib $HYPRE_UMPIRE_LIB" + done; +] +) + +AC_ARG_WITH(umpire-lib-dirs, +AS_HELP_STRING([--with-umpire-lib-dirs=DIRS], + [DIRS is space-separated list (enclosed in quotes) of + directories containing the libraries specified by + --with-umpire-libs, e.g "usr/lib /usr/local/lib". + The options --with-umpire-libs and --with-umpire-dirs + must be used together.]), +[for umpire_lib_dir in $withval; do + HYPRE_UMPIRE_LIB_DIR="-L$umpire_lib_dir $HYPRE_UMPIRE_LIB_DIR" + done; +] +) + dnl ***** Caliper AC_ARG_WITH(caliper, AS_HELP_STRING([--with-caliper], [Use Caliper instrumentation (default is NO).]), - [hypre_using_caliper=yes], - [hypre_using_caliper=no]) +[case "$withval" in + yes) hypre_using_caliper=yes;; + *) hypre_using_caliper=no ;; +esac], +[hypre_using_caliper=no]) AS_IF([test "x$with_caliper" = "xyes"], - [AC_DEFINE(HYPRE_USING_CALIPER, 1, [Using Caliper instrumentation])], + [AC_DEFINE(HYPRE_USING_CALIPER, 1, [Define to 1 if Caliper instrumentation is enabled])], []) AC_ARG_WITH(caliper-include, @@ -1183,11 +1422,6 @@ if test "$hypre_user_chose_ccompilers" = "no" then if test "$hypre_using_mpi" = "no" then - if test "$hypre_using_device_openmp" = "yes" - then - AC_CHECK_PROGS(CC, [xlc-gpu clang-gpu]) - fi - if test "$hypre_using_openmp" = "yes" then AC_CHECK_PROGS(CC, [xlc_r xlC_r xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC]) @@ -1195,16 +1429,11 @@ then AC_CHECK_PROGS(CC, [xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC]) fi else - if test "$hypre_using_device_openmp" = "yes" - then - AC_CHECK_PROGS(CC, [mpixlc-gpu mpiclang-gpu]) - fi - if test "$hypre_using_openmp" = "yes" then - AC_CHECK_PROGS(CC, [mpxlc mpixlc_r mpixlc mpiicc mpigcc mpicc mpipgcc]) + AC_CHECK_PROGS(CC, [mpxlc mpixlc_r mpixlc mpiicc mpigcc mpicc mpipgcc mpipgicc]) else - AC_CHECK_PROGS(CC, [mpxlc mpixlc mpiicc mpigcc mpicc mpipgcc]) + AC_CHECK_PROGS(CC, [mpxlc mpixlc mpiicc mpigcc mpicc mpipgcc mpipgicc]) fi fi @@ -1218,28 +1447,18 @@ if test "$hypre_user_chose_cxxcompilers" = "no" then if test "$hypre_using_mpi" = "no" then - if test "$hypre_using_device_openmp" = "yes" - then - AC_CHECK_PROGS(CXX, [xlC-gpu clang++-gpu]) - fi - if test "$hypre_using_openmp" = "yes" then - AC_CHECK_PROGS(CXX, [xlC_r xlc_r xlC xlc icpc icc g++ gcc pgCC pgcc CC cc KCC kcc]) + AC_CHECK_PROGS(CXX, [xlC_r xlc_r xlC xlc icpc icc g++ gcc pgCC pgcc pgc++ CC cc KCC kcc]) else - AC_CHECK_PROGS(CXX, [xlC xlc icpc icc g++ gcc pgCC pgcc CC cc KCC kcc]) + AC_CHECK_PROGS(CXX, [xlC xlc icpc icc g++ gcc pgCC pgcc pgc++ CC cc KCC kcc]) fi else - if test "$hypre_using_device_openmp" = "yes" - then - AC_CHECK_PROGS(CXX, [mpixlC-gpu mpiclang++-gpu]) - fi - if test "$hypre_using_openmp" = "yes" then - AC_CHECK_PROGS(CXX, [mpxlC mpixlcxx_r mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC]) + AC_CHECK_PROGS(CXX, [mpxlC mpixlcxx_r mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC mpipgic++]) else - AC_CHECK_PROGS(CXX, [mpxlC mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC]) + AC_CHECK_PROGS(CXX, [mpxlC mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC mpipgic++]) fi fi @@ -1255,16 +1474,16 @@ then then if test "$hypre_using_openmp" = "yes" then - AC_CHECK_PROGS(FC, [xlf_r ifort gfortran g77 g95 pgf77 f77]) + AC_CHECK_PROGS(FC, [xlf_r ifort gfortran g77 g95 pgf77 pgfortran f77]) else - AC_CHECK_PROGS(FC, [xlf ifort gfortran g77 g95 pgf77 f77]) + AC_CHECK_PROGS(FC, [xlf ifort gfortran g77 g95 pgf77 pgfortran f77]) fi else if test "$hypre_using_openmp" = "yes" then - AC_CHECK_PROGS(FC, [mpxlf mpixlf77_r mpiifort mpif77 mpipgf77]) + AC_CHECK_PROGS(FC, [mpxlf mpixlf77_r mpiifort mpif77 mpipgf77 mpipgifort]) else - AC_CHECK_PROGS(FC, [mpxlf mpixlf77 mpiifort mpif77 mpipgf77]) + AC_CHECK_PROGS(FC, [mpxlf mpixlf77 mpiifort mpif77 mpipgf77 mpipgifort]) fi fi @@ -1274,6 +1493,46 @@ then fi fi +if [test "x$hypre_using_cuda" = "xyes" && test "x$hypre_using_device_openmp" = "xyes"] +then + AC_MSG_ERROR([--with-cuda and --with-device-openmp are mutually exclusive]) +fi + +if [test "x$hypre_using_cuda" = "xyes" && test "x$hypre_using_hip" = "xyes"] +then + AC_MSG_ERROR([--with-cuda and --with-hip are mutually exclusive]) +fi + +if [test "x$hypre_using_hip" = "xyes" && test "x$hypre_using_device_openmp" = "xyes"] +then + AC_MSG_ERROR([--with-hip and --with-device-openmp are mutually exclusive]) +fi + + +if test "$hypre_user_chose_cudacompilers" = "no" +then + if test "$hypre_using_device_openmp" = "yes" + then + if test "$hypre_using_mpi" = "no" + then + AC_CHECK_PROGS(CUCC, [xlc-gpu clang-gpu]) + else + AC_CHECK_PROGS(CUCC, [mpixlc-gpu mpiclang-gpu]) + fi + fi + + if test "$hypre_using_cuda" = "yes" + then + AC_CHECK_PROGS(CUCC, nvcc, [""], ["${CUDA_HOME}/bin"]) + CUCC="\${HYPRE_CUDA_PATH}/bin/${CUCC} -ccbin=\${CXX}" + fi + + if test "$hypre_using_hip" = "yes" + then + AC_CHECK_PROGS(CUCC, hipcc) + fi +fi + dnl ********************************************************************* dnl * Check for general programs dnl ********************************************************************* @@ -1323,23 +1582,18 @@ else hypre_cv_func_MPI_Comm_f2c_macro=no)]) if test $ac_cv_func_MPI_Comm_f2c = yes \ || test $hypre_cv_func_MPI_Comm_f2c_macro = yes; then - AC_DEFINE(HYPRE_HAVE_MPI_COMM_F2C) + AC_DEFINE(HYPRE_HAVE_MPI_COMM_F2C,1,[Define to 1 if the routine MPI_Comm_f2c is found]) fi fi -dnl * It would be better if this could be defined above, but much work is needed -dnl * to make that happen. -if test "$hypre_using_global_partition" = "no" +if test "$hypre_using_node_aware_mpi" = "yes" then -dnl if test "$hypre_using_mpi" != "no" -dnl then - AC_DEFINE(HYPRE_NO_GLOBAL_PARTITION, 1) -dnl fi + AC_DEFINE(HYPRE_USING_NODE_AWARE_MPI, 1, [Define to 1 if Node Aware MPI library is used]) fi -if test "$hypre_using_node_aware_mpi" = "yes" +if test "$hypre_using_memory_tracker" = "yes" then - AC_DEFINE(HYPRE_USING_NODE_AWARE_MPI, 1) + AC_DEFINE(HYPRE_USING_MEMORY_TRACKER, 1, [Define to 1 if want to track memory operations in hypre]) fi dnl ********************************************************************* @@ -1449,7 +1703,7 @@ then then HYPRE_FEI_SUBDIRS="femli $HYPRE_FEI_SUBDIRS" HYPRE_FEI_FEMLI_FILES="$HYPRE_SRCDIR/FEI_mv/femli/*.o" - AC_DEFINE(HAVE_MLI, 1, [Using MLI.]) + AC_DEFINE(HAVE_MLI, 1, [Define to 1 if using MLI]) fi fi AC_CHECK_LIB(stdc++, __gxx_personality_v0, LIBS="$LIBS -lstdc++") @@ -1482,6 +1736,7 @@ dnl ********************************************************************* if test "$hypre_using_debug" = "yes" then AC_HYPRE_DEBUG_FLAGS + AC_DEFINE([HYPRE_DEBUG], 1, [Define to 1 if in debug mode]) else AC_HYPRE_OPTIMIZATION_FLAGS fi @@ -1504,9 +1759,9 @@ dnl ********************************************************************* dnl * Set default link commands and suffix values dnl ********************************************************************* dnl LINK_F77="${F77}" -LINK_FC="${FC}" -LINK_CC="${CC}" -LINK_CXX="${CXX}" +LINK_FC='${FC}' +LINK_CC='${CC}' +LINK_CXX='${CXX}' HYPRE_LIBSUFFIX=".a" @@ -1602,6 +1857,56 @@ dnl AC_HYPRE_CHECK_USER_LAPACKLIBS fi +dnl ********************************************************************* +dnl * Set flags if needed to enable shared libraries and Python, Java +dnl ********************************************************************* +if test "$hypre_using_shared" = "yes" +then + HYPRE_LIBSUFFIX=".so" + if test "$hypre_using_cuda" = "yes" + then + SHARED_SET_SONAME="-Xlinker=-soname," + SHARED_OPTIONS="-Xlinker=-z,defs" + else + SHARED_SET_SONAME="-Wl,-soname," + SHARED_OPTIONS="-Wl,-z,defs" + fi + SHARED_COMPILE_FLAG="-fPIC" + case $hypre_platform in + AIX* | aix* | Aix*) SHARED_COMPILE_FLAG="-qmkshrobj" + SHARED_BUILD_FLAG="-G" +dnl LINK_F77="${F77} -brtl" + LINK_FC='${FC} -brtl' + LINK_CC='${CC} -brtl' + LINK_CXX='${CXX} -brtl' ;; + DARWIN* | darwin* | Darwin*) SHARED_BUILD_FLAG="-dynamiclib -undefined dynamic_lookup" + HYPRE_LIBSUFFIX=".dylib" + SHARED_SET_SONAME="-install_name @rpath/" + SHARED_OPTIONS="-undefined error" ;; + *) SHARED_BUILD_FLAG="-shared" ;; + esac + SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${EXTRA_BUILDFLAGS}" + FFLAGS="${FFLAGS} ${SHARED_COMPILE_FLAG}" + CFLAGS="${CFLAGS} ${SHARED_COMPILE_FLAG}" + CXXFLAGS="${CXXFLAGS} ${SHARED_COMPILE_FLAG}" + +dnl BUILD_F77_SHARED="${F77} ${SHARED_BUILD_FLAG}" + BUILD_FC_SHARED="\${FC} ${SHARED_BUILD_FLAG}" + if test "$hypre_using_fei" = "yes" + then + BUILD_CC_SHARED="\${CXX} ${SHARED_BUILD_FLAG}" + else + BUILD_CC_SHARED="\${CC} ${SHARED_BUILD_FLAG}" + fi + BUILD_CXX_SHARED="\${CXX} ${SHARED_BUILD_FLAG}" + if test "$hypre_using_cuda" = "yes" + then + BUILD_CC_SHARED="\${CUCC} ${SHARED_BUILD_FLAG}" + fi + dnl TODO HIP +fi + + dnl ********************************************************************* dnl * Warn if caliper options are incomplete dnl ********************************************************************* @@ -1627,8 +1932,16 @@ then fi fi +if [test "x$hypre_using_um" = "xyes"] +then + if [test "x$hypre_using_cuda" != "xyes" && test "x$hypre_using_device_openmp" != "xyes" && test "x$hypre_using_hip" != "xyes"] + then + AC_MSG_ERROR([Asked for unified memory, but not using CUDA, HIP, or device OpenMP!]) + fi +fi dnl hypre_using_um -if [test "$hypre_using_cuda" = "yes" || test "$hypre_using_device_openmp" = "yes" || test "$hypre_using_um" = "yes"] +dnl CUDA +if [test "$hypre_using_cuda" = "yes" || test "$hypre_using_device_openmp" = "yes"] then AC_CHECK_HEADERS(["${CUDA_HOME}/include/cuda.h"], [hypre_found_cuda=yes; HYPRE_CUDA_PATH=${CUDA_HOME}]) @@ -1643,13 +1956,35 @@ then fi fi +dnl ********************************************************************* +dnl * Check for HIP header +dnl ********************************************************************* + +dnl If the user has requested to use HIP, we first check the environment +dnl for ROCM_PATH to point at the ROCm installation. If that is not found, +dnl then we default to `/opt/rocm`. +dnl +dnl TODO: Add an ARG_WITH for rocm so the user can control the ROCm path +dnl through the configure line +AS_IF([ test x"$hypre_using_hip" == x"yes" ], + [ AS_IF([ test -n "$ROCM_PATH"], + [ HYPRE_ROCM_PREFIX=$ROCM_PATH ], + [ HYPRE_ROCM_PREFIX=/opt/rocm ]) + + AC_SUBST(HYPRE_ROCM_PREFIX) + AC_CHECK_HEADERS( ["${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h"], + [hypre_found_hip=yes], + [AC_MSG_ERROR([unable to find ${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h ... Ensure ROCm is installed and set ROCM_PATH environment variable to ROCm installation path.])] )], + []) + + + dnl ********************************************************************* dnl * Set raja options dnl ********************************************************************* -if test "x$hypre_user_chose_raja" = "xyes" +if test "x$hypre_using_raja" = "xyes" then - RAJA_LIBS=" $HYPRE_RAJA_LIB_DIR $HYPRE_RAJA_LIB " - AC_DEFINE(HYPRE_USING_RAJA, 1, [RAJA being used]) + AC_DEFINE(HYPRE_USING_RAJA, 1, [Define to 1 if executing on host/device with RAJA]) if test "$hypre_using_cuda" != "yes" then @@ -1668,10 +2003,9 @@ fi dnl ********************************************************************* dnl * Set kokkos options dnl ********************************************************************* -if test "x$hypre_user_chose_kokkos" = "xyes" +if test "x$hypre_using_kokkos" = "xyes" then - KOKKOS_LIBS="$HYPRE_KOKKOS_LIB_DIR $HYPRE_KOKKOS_LIB" - AC_DEFINE(HYPRE_USING_KOKKOS, 1, [KOKKOS being used]) + AC_DEFINE(HYPRE_USING_KOKKOS, 1, [Define to 1 if executing on host/device with KOKKOS]) if test "$hypre_using_cuda" != "yes" then @@ -1687,62 +2021,114 @@ then CFLAGS=${CXXFLAGS} fi +dnl ********************************************************************* +dnl * Set umpire options +dnl ********************************************************************* +if test "x$hypre_using_umpire_host" = "xyes" +then + hypre_using_umpire=yes + AC_DEFINE(HYPRE_USING_UMPIRE_HOST, 1, [Define to 1 if using UMPIRE for host memory]) +fi + +if test "x$hypre_using_umpire_device" = "xyes" +then + hypre_using_umpire=yes + AC_DEFINE(HYPRE_USING_UMPIRE_DEVICE, 1, [Define to 1 if using UMPIRE for device memory]) +fi + +if test "x$hypre_using_umpire_um" = "xyes" +then + hypre_using_umpire=yes + AC_DEFINE(HYPRE_USING_UMPIRE_UM, 1, [Define to 1 if using UMPIRE for unified memory]) +fi + +if test "x$hypre_using_umpire_pinned" = "xyes" +then + hypre_using_umpire=yes + AC_DEFINE(HYPRE_USING_UMPIRE_PINNED, 1, [Define to 1 if using UMPIRE for pinned memory]) +fi + +if test "x$hypre_using_umpire" = "xyes" +then + AC_DEFINE(HYPRE_USING_UMPIRE, 1, [Define to 1 if using UMPIRE]) +fi + dnl ********************************************************************* dnl * Set cuda options dnl ********************************************************************* -if test "$hypre_user_chose_cuda" = "yes" +if test "$hypre_using_cuda" = "yes" then - AC_DEFINE(HYPRE_USING_CUDA, 1, [CUDA being used]) + AC_DEFINE(HYPRE_USING_GPU, 1, [Define to 1 if executing on GPU device]) + + AC_DEFINE(HYPRE_USING_CUDA, 1, [Define to 1 if executing on device with CUDA]) - AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [cuSPARSE being used]) + AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [Define to 1 if using cuSPARSE]) - if test "$hypre_using_nvtx" = "yes" + if test "$hypre_using_gpu_profiling" = "yes" then - AC_DEFINE(HYPRE_USING_NVTX, 1, [NVTX being used]) + AC_DEFINE(HYPRE_USING_NVTX, 1, [Define to 1 if using NVIDIA Tools Extension (NVTX)]) fi if test "$hypre_using_cusparse" = "yes" then - AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [CUSPARSE being used]) + AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [Define to 1 if using cuSPARSE]) + fi + + if test "$hypre_using_device_pool" = "yes" + then + AC_DEFINE(HYPRE_USING_DEVICE_POOL, 1, [Define to 1 if using device pooling allocator]) fi if test "$hypre_using_cublas" = "yes" then - AC_DEFINE(HYPRE_USING_CUBLAS, 1, [CUBLAS being used]) + AC_DEFINE(HYPRE_USING_CUBLAS, 1, [Define to 1 if using cuBLAS]) fi if test "$hypre_using_curand" = "yes" then - AC_DEFINE(HYPRE_USING_CURAND, 1, [CURAND being used]) + AC_DEFINE(HYPRE_USING_CURAND, 1, [Define to 1 if using cuRAND]) fi - dnl let CC/CXX and LINK be CUCC and let host compiler be CXX - AC_CHECK_PROGS(CUCC, nvcc) - NVCCBIN=${CXX} - CC=${CUCC} - CXX=${CUCC} - LINK_CC=${CXX} - LINK_CXX=${CXX} + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' dnl CUDA SM if test "x$HYPRE_CUDA_SM" = "x" then - HYPRE_CUDA_SM=60 + HYPRE_CUDA_SM=70 fi - HYPRE_CUDA_GENCODE="-gencode arch=compute_${HYPRE_CUDA_SM},\"code=sm_${HYPRE_CUDA_SM}\"" + HYPRE_CUDA_GENCODE="" + for sm in ${HYPRE_CUDA_SM}; do + HYPRE_CUDA_GENCODE="${HYPRE_CUDA_GENCODE}-gencode arch=compute_${sm},code=sm_${sm} " + done - CUFLAGS+="-O2 -ccbin=$NVCCBIN ${HYPRE_CUDA_GENCODE} -expt-extended-lambda -dc -std=c++11 -Xcompiler -Wno-deprecated-register --x cu" - if test "$hypre_using_debug" = "yes" + if test "$hypre_user_chose_cuflags" = "no" then - CUFLAGS="-g ${CUFLAGS}" + CUFLAGS="-lineinfo -expt-extended-lambda -dc -std=c++11 --x cu" + if test "$hypre_using_debug" = "yes" + then + CUFLAGS="-g -O0 ${CUFLAGS}" + else + CUFLAGS="-O2 ${CUFLAGS}" + fi fi - CXXFLAGS="${CUFLAGS} -Xcompiler \"${CXXFLAGS}\"" - CFLAGS=${CXXFLAGS} - LDFLAGS="-ccbin=$NVCCBIN ${HYPRE_CUDA_GENCODE} -Xcompiler \"${LDFLAGS}\"" - HYPRE_CUDA_INCL="-I${HYPRE_CUDA_PATH}/include" - HYPRE_CUDA_LIBS="-L${HYPRE_CUDA_PATH}/lib64 -lcudart" - if test "$hypre_using_nvtx" = "yes" + + if test "$hypre_user_chose_cxxflags" = "no" + then + if [test "${CXX}" = "mpixlC" || test "${CXX}" = "xlC_r" || test "${CXX}" = "xlC"] + then + CXXFLAGS+=" -Wno-deprecated-register -Wenum-compare" + fi + fi + + CUFLAGS="${CUFLAGS} -Xcompiler \"${CXXFLAGS}\"" + + dnl CFLAGS=${CXXFLAGS} + LDFLAGS="-Xcompiler \"${LDFLAGS}\"" + HYPRE_CUDA_INCLUDE='-I${HYPRE_CUDA_PATH}/include' + HYPRE_CUDA_LIBS='-L${HYPRE_CUDA_PATH}/lib64 -lcudart' + if test "$hypre_using_gpu_profiling" = "yes" then HYPRE_CUDA_LIBS+=" -lnvToolsExt" fi @@ -1763,27 +2149,129 @@ then fi fi +dnl ********************************************************************* +dnl * Set HIP options +dnl ********************************************************************* +AS_IF([test x"$hypre_using_hip" == x"yes"], + [ + AC_DEFINE(HYPRE_USING_GPU, 1, [Define to 1 if executing on GPU device]) + AC_DEFINE(HYPRE_USING_HIP, 1, [HIP being used]) + + dnl hipcc is just a perl script that wraps things like detection + dnl of the AMD GPU and the actual invocation of the clang compiler + dnl from ROCm that supports HIP and all the command line foo needed + dnl by the compiler. You can force hipcc to emit what it actually does + dnl by setting HIPCC_VERBOSE=7 in your environment. + dnl AC_CHECK_PROGS(HIPCC, hipcc) + + dnl (Ab)Using CUCC when compiling HIP + dnl At this time, we need the linker to be hipcc in order to link + dnl in device code. + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' + + if test "x${HYPRE_CUDA_SM}" != "x" + then + HYPRE_CUDA_GENCODE="--amdgpu-target=" + for sm in ${HYPRE_CUDA_SM}; do + HYPRE_CUDA_GENCODE="${HYPRE_CUDA_GENCODE}${sm}," + done + HYPRE_CUDA_GENCODE="`echo ${HYPRE_CUDA_GENCODE}|sed 's/,$//'`" + fi + + dnl The "-x hip" is necessary to override the detection of .c files which clang + dnl interprets as C and therefore invokes the C compiler rather than the HIP part + dnl of clang. Put HIPCXXFLAGS at the end so the user can override from + dnl from the configure line. + HIPCXXFLAGS="-x hip -std=c++14 ${HIPCXXFLAGS}" + + dnl If not in debug mode, at least -O2, but the user can override with + dnl with HIPCXXFLAGS on the configure line. If in debug mode, -O0 -Wall + dnl plus flags for debugging symbols + AS_IF([test x"$hypre_using_debug" == x"yes"], + [HIPCXXFLAGS="-O0 -Wall -g -ggdb ${HIPCXXFLAGS}"], + [HIPCXXFLAGS="-O2 ${HIPCXXFLAGS}"],) + + dnl (Ab)Use CUFLAGS to capture HIP compilation flags + dnl Put HIPCXXFLAGS at the end so the user can override the optimization level. + if test "$hypre_user_chose_cuflags" = "no" + then + CUFLAGS="${HIPCPPFLAGS} ${HIPCXXFLAGS}" + fi + + CUFLAGS="${CUFLAGS} ${CXXFLAGS}" + + dnl rocThrust depends on rocPrim so we need both for Thrust on AMD GPUs. + dnl These are header-only so no linking needed. + HYPRE_HIP_INCL="-I${HYPRE_ROCM_PREFIX}/rocthrust/include" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocprim/include" + + dnl HIP library + HYPRE_HIP_LIBS="-L${HYPRE_ROCM_PREFIX}/lib -lamdhip64" + + dnl rocSPARSE, for things like dcsrmv on AMD GPUs + AS_IF([test x"$hypre_using_rocsparse" == x"yes"], + [AC_DEFINE(HYPRE_USING_ROCSPARSE, 1, [rocSPARSE being used]) + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocsparse/include" + ]) + + dnl Note rocSPARSE requires rocBLAS, so this is only controlling + dnl whether HYPRE explicitly uses rocBLAS in other places or not. + dnl So we don't need to add any extra libs or anything. + AS_IF([test x"$hypre_using_rocblas" == x"yes"], + [AC_DEFINE(HYPRE_USING_ROCBLAS, 1, [rocBLAS being used]) + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocblas/include" + ]) + + dnl rocRAND: random number generation on AMD GPUs + AS_IF([test x"$hypre_using_rocrand" == x"yes"], + [AC_DEFINE(HYPRE_USING_ROCRAND, 1, [rocRAND being used]) + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocrand/include" + ]) + + dnl rocTX tracing API + AS_IF([test x"$hypre_using_gpu_profiling" == x"yes"], + [AC_DEFINE(HYPRE_USING_ROCTX, 1, [Define to 1 if using AMD rocTX profiling]) + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/roctracer/include" + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64" + ]) + + ]) dnl AS_IF([test x"$hypre_using_hip" == x"yes"] + + + + dnl ********************************************************************* dnl * Set unified memory options dnl ********************************************************************* if test "$hypre_using_um" != "yes" then dnl Without UM - if test "$hypre_user_chose_cuda" = "yes" + if test "$hypre_using_cuda" = "yes" then - AC_MSG_NOTICE([***********************************************************]) + AC_MSG_NOTICE([***********************************************************************]) AC_MSG_NOTICE([Configuring with --with-cuda=yes without unified memory.]) - AC_MSG_NOTICE([It only works for struct interface.]) + AC_MSG_NOTICE([It only works for structured solvers and selected unstructured solvers]) AC_MSG_NOTICE([Use --enable-unified-memory to compile with unified memory.]) - AC_MSG_NOTICE([***********************************************************]) + AC_MSG_NOTICE([***********************************************************************]) + fi + if test "$hypre_using_hip" = "yes" + then + AC_MSG_NOTICE([***********************************************************************]) + AC_MSG_NOTICE([Configuring with --with-hip=yes without unified memory.]) + AC_MSG_NOTICE([It only works for structured solvers and selected unstructured solvers]) + AC_MSG_NOTICE([Use --enable-unified-memory to compile with unified memory.]) + AC_MSG_NOTICE([***********************************************************************]) fi if test "$hypre_using_device_openmp" = "yes" then - AC_MSG_NOTICE([***********************************************************]) + AC_MSG_NOTICE([***********************************************************************]) AC_MSG_NOTICE([Configuring with --with-device-openmp=yes without unified memory.]) - AC_MSG_NOTICE([It only works for struct interface.]) + AC_MSG_NOTICE([It only works for structured solvers and selected unstructured solvers]) AC_MSG_NOTICE([Use --enable-unified-memory to compile with unified memory.]) - AC_MSG_NOTICE([***********************************************************]) + AC_MSG_NOTICE([***********************************************************************]) fi fi @@ -1795,102 +2283,66 @@ then AC_DEFINE([HYPRE_USING_OPENMP],1,[Enable OpenMP support]) fi +dnl ********************************************************************* +dnl * Set Device OpenMP options +dnl ********************************************************************* if test "$hypre_using_device_openmp" = "yes" then - AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [cuSPARSE being used]) + AC_DEFINE(HYPRE_USING_CUSPARSE, 1, [Define to 1 if using cuSPARSE]) - if test "$hypre_using_nvtx" = "yes" + if test "$hypre_using_gpu_profiling" = "yes" then AC_DEFINE(HYPRE_USING_NVTX, 1, [NVTX being used]) fi - AC_DEFINE(HYPRE_USING_DEVICE_OPENMP, 1, [Enable OpenMP (>=4.5) device directives]) - AC_DEFINE(HYPRE_DEVICE_OPENMP_ALLOC, 1, [Enable device OpenMP target alloc version]) - dnl AC_DEFINE(HYPRE_DEVICE_OPENMP_MAPPED, 1, [Enable device OpenMP target mapped version]) + AC_DEFINE(HYPRE_USING_DEVICE_OPENMP, 1, [Define to 1 if executing on device with OpenMP]) - AC_CHECK_PROGS(CUCC, nvcc) + AC_DEFINE(HYPRE_USING_GPU, 1, [Define to 1 if executing on GPU device]) - CFLAGS="${CFLAGS}" - CXXFLAGS="${CXXFLAGS}" + AC_DEFINE(HYPRE_DEVICE_OPENMP_ALLOC, 1, [Define to 1 if using OpenMP on device [target alloc version]]) - if [test "$CC" = "clang-gpu" || test "$CC" = "mpiclang-gpu"] - then - CFLAGS+=" -fopenmp-nonaliased-maps" - fi - if [test "$CXX" = "clang++-gpu" || test "$CXX" = "mpiclang++-gpu"] + dnl AC_DEFINE(HYPRE_DEVICE_OPENMP_MAPPED, 1, [Define to 1 if using OpenMP on device [target mapped version]]) + + CUFLAGS=${CFLAGS} + + if test "$hypre_user_chose_cuflags" = "no" then - CXXFLAGS+=" -fopenmp-nonaliased-maps" + if [test "$CUCC" = "clang-gpu" || test "$CUCC" = "mpiclang-gpu"] + then + CUFLAGS+=" -fopenmp-nonaliased-maps" + fi + if [test "$CUCC" = "clang++-gpu" || test "$CUCC" = "mpiclang++-gpu"] + then + CUFLAGS+=" -fopenmp-nonaliased-maps" + fi fi + if test "$hypre_using_debug" = "yes" then - AC_DEFINE(HYPRE_DEVICE_OPENMP_CHECK, 1, [Strictly checking OpenMP offload directives]) + AC_DEFINE(HYPRE_DEVICE_OPENMP_CHECK, 1, [Define to 1 if strictly checking OpenMP offload directives]) fi - HYPRE_CUDA_INCL="-I${HYPRE_CUDA_PATH}/include" - HYPRE_CUDA_LIBS="-L${HYPRE_CUDA_PATH}/lib64 -lcusparse -lcudart -lcurand" - if test "$hypre_using_nvtx" = "yes" + HYPRE_CUDA_INCLUDE='-I${HYPRE_CUDA_PATH}/include' + HYPRE_CUDA_LIBS='-L${HYPRE_CUDA_PATH}/lib64 -lcudart -lcusparse -lcurand' + if test "$hypre_using_gpu_profiling" = "yes" then HYPRE_CUDA_LIBS+=" -lnvToolsExt" fi dnl let CC be CXX - CC=${CXX} - LINK_CC=${LINK_CXX} - CXXFLAGS="-x c++ ${CXXFLAGS}" - CFLAGS=${CXXFLAGS} + dnl CC=${CXX} + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' + dnl CXXFLAGS="-x c++ ${CXXFLAGS}" + dnl CFLAGS=${CXXFLAGS} fi -dnl ********************************************************************* -dnl * Set flags if needed to enable shared libraries and Python, Java -dnl ********************************************************************* -if test "$hypre_using_shared" = "yes" +if [test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes"] then - HYPRE_LIBSUFFIX=".so" - if [test "x$CXX" = "xnvcc" || test "x$CC" = "xnvcc"] - then - SHARED_SET_SONAME="-Xlinker=-soname," - SHARED_OPTIONS="-Xlinker=-z,defs" - SHARED_COMPILE_FLAG="-Xcompiler \"-fPIC\"" - else - SHARED_SET_SONAME="-Wl,-soname," - SHARED_OPTIONS="-Wl,-z,defs" - SHARED_COMPILE_FLAG="-fPIC" - fi - case $hypre_platform in - AIX* | aix* | Aix*) SHARED_COMPILE_FLAG="-qmkshrobj" - SHARED_BUILD_FLAG="-G" -dnl LINK_F77="${F77} -brtl" - LINK_FC="${FC} -brtl" - LINK_CC="${CC} -brtl" - LINK_CXX="${CXX} -brtl" ;; - DARWIN* | darwin* | Darwin*) SHARED_BUILD_FLAG="-dynamiclib -undefined dynamic_lookup" - HYPRE_LIBSUFFIX=".dylib" - SHARED_SET_SONAME="-install_name @rpath/" - SHARED_OPTIONS="-undefined error" ;; - *) SHARED_BUILD_FLAG="-shared" ;; - esac - if [test "x$CXX" = "xnvcc" || test "x$CC" = "xnvcc"] + if test "x$hypre_using_cuda_streams" = "xyes"] then - SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${HYPRE_CUDA_GENCODE}" + AC_DEFINE([HYPRE_USING_CUDA_STREAMS],1,[Define to 1 if using CUDA streams]) fi - SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${EXTRA_BUILDFLAGS}" - FFLAGS="${FFLAGS} ${SHARED_COMPILE_FLAG}" - CFLAGS="${CFLAGS} ${SHARED_COMPILE_FLAG}" - CXXFLAGS="${CXXFLAGS} ${SHARED_COMPILE_FLAG}" -dnl BUILD_F77_SHARED="${F77} ${SHARED_BUILD_FLAG}" - BUILD_FC_SHARED="${FC} ${SHARED_BUILD_FLAG}" - if test "$hypre_using_fei" = "yes" - then - BUILD_CC_SHARED="${CXX} ${SHARED_BUILD_FLAG}" - else - BUILD_CC_SHARED="${CC} ${SHARED_BUILD_FLAG}" - fi - BUILD_CXX_SHARED="${CXX} ${SHARED_BUILD_FLAG}" -fi - -if test "x$hypre_using_cuda_streams" = "xyes" -then - AC_DEFINE([HYPRE_USING_CUDA_STREAMS],1,[HYPRE WITH CUDA STREAMS]) fi dnl ********************************************************************* @@ -1898,19 +2350,19 @@ dnl * Set memory env dnl ********************************************************************* if test "x$hypre_using_um" = "xyes" then - AC_DEFINE([HYPRE_USING_UNIFIED_MEMORY],1,[HYPRE WITH UNIFIED MEMORY]) + AC_DEFINE([HYPRE_USING_UNIFIED_MEMORY],1,[Define to 1 if using unified memory]) else - if [test "x$hypre_user_chose_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes"] + if [test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes"] then - AC_DEFINE([HYPRE_USING_DEVICE_MEMORY],1,[HYPRE WITH DEVICE MEMORY]) + AC_DEFINE([HYPRE_USING_DEVICE_MEMORY],1,[Define to 1 if using device memory without UM]) else - AC_DEFINE([HYPRE_USING_HOST_MEMORY],1,[HYPRE WITH HOST MEMORY]) + AC_DEFINE([HYPRE_USING_HOST_MEMORY],1,[Define to 1 if using host memory only]) fi fi if test "$hypre_gpu_mpi" = "yes" then - AC_DEFINE([HYPRE_WITH_GPU_AWARE_MPI],1,[HYPRE WITH GPU AWARE MPI]) + AC_DEFINE([HYPRE_WITH_GPU_AWARE_MPI],1,[Define to 1 if using GPU aware MPI]) fi dnl ********************************************************************* @@ -2017,7 +2469,6 @@ AC_SUBST(HYPRE_FEI_FEMLI_FILES) dnl ********************************************************************* dnl * BLAS & LAPACK related information dnl ********************************************************************* -AC_SUBST(HYPRE_KOKKOS_PATH) AC_SUBST(BLASLIBDIRS) AC_SUBST(BLASLIBS) AC_SUBST(LAPACKLIBDIRS) @@ -2029,24 +2480,38 @@ dnl ********************************************************************* AC_SUBST(HYPRE_RAJA_LIB_DIR) AC_SUBST(HYPRE_RAJA_INCLUDE) AC_SUBST(HYPRE_RAJA_LIB) -AC_SUBST(RAJA_LIBS) dnl ********************************************************************* dnl * KOKKOS information dnl ********************************************************************* +AC_SUBST(HYPRE_KOKKOS_SRC_DIR) AC_SUBST(HYPRE_KOKKOS_LIB_DIR) AC_SUBST(HYPRE_KOKKOS_INCLUDE) AC_SUBST(HYPRE_KOKKOS_LIB) -AC_SUBST(KOKKOS_LIBS) -AC_SUBST(HYPRE_KOKKOS_INC_FILE) + +dnl ********************************************************************* +dnl * UMPIRE information +dnl ********************************************************************* +AC_SUBST(HYPRE_UMPIRE_LIB_DIR) +AC_SUBST(HYPRE_UMPIRE_INCLUDE) +AC_SUBST(HYPRE_UMPIRE_LIB) dnl ********************************************************************* dnl * CUDA stuff dnl ********************************************************************* AC_SUBST(CUFLAGS) -AC_SUBST(HYPRE_CUDA_INCL) +AC_SUBST(CUCC) +AC_SUBST(HYPRE_CUDA_GENCODE) +AC_SUBST(HYPRE_CUDA_PATH) +AC_SUBST(HYPRE_CUDA_INCLUDE) AC_SUBST(HYPRE_CUDA_LIBS) +dnl ********************************************************************* +dnl * HIP stuff +dnl ********************************************************************* +AC_SUBST(HYPRE_HIP_INCL) +AC_SUBST(HYPRE_HIP_LIBS) + dnl ********************************************************************* dnl * Caliper instrumentation dnl ********************************************************************* @@ -2076,4 +2541,4 @@ dnl * Define the files to be configured and made dnl ********************************************************************* AC_CONFIG_FILES([config/Makefile.config]) -AC_OUTPUT +AC_OUTPUT() diff --git a/src/config/depcomp b/src/config/depcomp deleted file mode 100755 index 9e5522d04..000000000 --- a/src/config/depcomp +++ /dev/null @@ -1,520 +0,0 @@ -#! /bin/sh -# depcomp - compile a program generating dependencies as side-effects - -scriptversion=2003-11-08.23 - -# Copyright (C) 1999, 2000, 2003 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Alexandre Oliva . - -case $1 in - '') - echo "$0: No command. Try \`$0 --help' for more information." 1>&2 - exit 1; - ;; - -h | --h*) - cat <<\EOF -Usage: depcomp [--help] [--version] PROGRAM [ARGS] - -Run PROGRAMS ARGS to compile a file, generating dependencies -as side-effects. - -Environment variables: - depmode Dependency tracking mode. - source Source file read by `PROGRAMS ARGS'. - object Object file output by `PROGRAMS ARGS'. - depfile Dependency file to output. - tmpdepfile Temporary file to use when outputing dependencies. - libtool Whether libtool is used (yes/no). - -Report bugs to . -EOF - exit 0 - ;; - -v | --v*) - echo "depcomp $scriptversion" - exit 0 - ;; -esac - -if test -z "$depmode" || test -z "$source" || test -z "$object"; then - echo "depcomp: Variables source, object and depmode must be set" 1>&2 - exit 1 -fi -# `libtool' can also be set to `yes' or `no'. - -if test -z "$depfile"; then - base=`echo "$object" | sed -e 's,^.*/,,' -e 's,\.\([^.]*\)$,.P\1,'` - dir=`echo "$object" | sed 's,/.*$,/,'` - if test "$dir" = "$object"; then - dir= - fi - # FIXME: should be _deps on DOS. - depfile="$dir.deps/$base" -fi - -tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} - -rm -f "$tmpdepfile" - -# Some modes work just like other modes, but use different flags. We -# parameterize here, but still list the modes in the big case below, -# to make depend.m4 easier to write. Note that we *cannot* use a case -# here, because this file can only contain one case statement. -if test "$depmode" = hp; then - # HP compiler uses -M and no extra arg. - gccflag=-M - depmode=gcc -fi - -if test "$depmode" = dashXmstdout; then - # This is just like dashmstdout with a different argument. - dashmflag=-xM - depmode=dashmstdout -fi - -case "$depmode" in -gcc3) -## gcc 3 implements dependency tracking that does exactly what -## we want. Yay! Note: for some reason libtool 1.4 doesn't like -## it if -MD -MP comes after the -MF stuff. Hmm. - "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - mv "$tmpdepfile" "$depfile" - ;; - -gcc) -## There are various ways to get dependency output from gcc. Here's -## why we pick this rather obscure method: -## - Don't want to use -MD because we'd like the dependencies to end -## up in a subdir. Having to rename by hand is ugly. -## (We might end up doing this anyway to support other compilers.) -## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). -## - Using -M directly means running the compiler twice (even worse -## than renaming). - if test -z "$gccflag"; then - gccflag=-MD, - fi - "$@" -Wp,"$gccflag$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -## The second -e expression handles DOS-style file names with drive letters. - sed -e 's/^[^:]*: / /' \ - -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -## This next piece of magic avoids the `deleted header file' problem. -## The problem is that when a header file which appears in a .P file -## is deleted, the dependency causes make to die (because there is -## typically no way to rebuild the header). We avoid this by adding -## dummy dependencies for each header file. Too bad gcc doesn't do -## this for us directly. - tr ' ' ' -' < "$tmpdepfile" | -## Some versions of gcc put a space before the `:'. On the theory -## that the space means something, we add a space to the output as -## well. -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -sgi) - if test "$libtool" = yes; then - "$@" "-Wp,-MDupdate,$tmpdepfile" - else - "$@" -MDupdate "$tmpdepfile" - fi - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - - if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files - echo "$object : \\" > "$depfile" - - # Clip off the initial element (the dependent). Don't try to be - # clever and replace this with sed code, as IRIX sed won't handle - # lines with more than a fixed number of characters (4096 in - # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; - # the IRIX cc adds comments like `#:fec' to the end of the - # dependency line. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ - tr ' -' ' ' >> $depfile - echo >> $depfile - - # The second pass generates a dummy entry for each header file. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ - >> $depfile - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -aix) - # The C for AIX Compiler uses -M and outputs the dependencies - # in a .u file. In older versions, this file always lives in the - # current directory. Also, the AIX compiler puts `$object:' at the - # start of each line; $object doesn't have directory information. - # Version 6 uses the directory in both cases. - stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` - tmpdepfile="$stripped.u" - if test "$libtool" = yes; then - "$@" -Wc,-M - else - "$@" -M - fi - stat=$? - - if test -f "$tmpdepfile"; then : - else - stripped=`echo "$stripped" | sed 's,^.*/,,'` - tmpdepfile="$stripped.u" - fi - - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - - if test -f "$tmpdepfile"; then - outname="$stripped.o" - # Each line is of the form `foo.o: dependent.h'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" - sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -icc) - # Intel's C compiler understands `-MD -MF file'. However on - # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c - # ICC 7.0 will fill foo.d with something like - # foo.o: sub/foo.c - # foo.o: sub/foo.h - # which is wrong. We want: - # sub/foo.o: sub/foo.c - # sub/foo.o: sub/foo.h - # sub/foo.c: - # sub/foo.h: - # ICC 7.1 will output - # foo.o: sub/foo.c sub/foo.h - # and will wrap long lines using \ : - # foo.o: sub/foo.c ... \ - # sub/foo.h ... \ - # ... - - "$@" -MD -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each line is of the form `foo.o: dependent.h', - # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | - sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -tru64) - # The Tru64 compiler uses -MD to generate dependencies as a side - # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. - # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put - # dependencies in `foo.d' instead, so we check for that too. - # Subdirectories are respected. - dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` - test "x$dir" = "x$object" && dir= - base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` - - if test "$libtool" = yes; then - tmpdepfile1="$dir.libs/$base.lo.d" - tmpdepfile2="$dir.libs/$base.d" - "$@" -Wc,-MD - else - tmpdepfile1="$dir$base.o.d" - tmpdepfile2="$dir$base.d" - "$@" -MD - fi - - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile1" "$tmpdepfile2" - exit $stat - fi - - if test -f "$tmpdepfile1"; then - tmpdepfile="$tmpdepfile1" - else - tmpdepfile="$tmpdepfile2" - fi - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" - # That's a tab and a space in the []. - sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" - else - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -#nosideeffect) - # This comment above is used by automake to tell side-effect - # dependency tracking mechanisms from slower ones. - -dashmstdout) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - test -z "$dashmflag" && dashmflag=-M - # Require at least two characters before searching for `:' - # in the target name. This is to cope with DOS-style filenames: - # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. - "$@" $dashmflag | - sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - tr ' ' ' -' < "$tmpdepfile" | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -dashXmstdout) - # This case only exists to satisfy depend.m4. It is never actually - # run, as this mode is specially recognized in the preamble. - exit 1 - ;; - -makedepend) - "$@" || exit $? - # Remove any Libtool call - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - # X makedepend - shift - cleared=no - for arg in "$@"; do - case $cleared in - no) - set ""; shift - cleared=yes ;; - esac - case "$arg" in - -D*|-I*) - set fnord "$@" "$arg"; shift ;; - # Strip any option that makedepend may not understand. Remove - # the object too, otherwise makedepend will parse it as a source file. - -*|$object) - ;; - *) - set fnord "$@" "$arg"; shift ;; - esac - done - obj_suffix="`echo $object | sed 's/^.*\././'`" - touch "$tmpdepfile" - ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - sed '1,2d' "$tmpdepfile" | tr ' ' ' -' | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" "$tmpdepfile".bak - ;; - -cpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - "$@" -E | - sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | - sed '$ s: \\$::' > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - cat < "$tmpdepfile" >> "$depfile" - sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvisualcpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o, - # because we must use -o when running libtool. - "$@" || exit $? - IFS=" " - for arg - do - case "$arg" in - "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") - set fnord "$@" - shift - shift - ;; - *) - set fnord "$@" "$arg" - shift - shift - ;; - esac - done - "$@" -E | - sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" - echo " " >> "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -none) - exec "$@" - ;; - -*) - echo "Unknown depmode $depmode" 1>&2 - exit 1 - ;; -esac - -exit 0 - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/src/config/hypre_blas_macros.m4 b/src/config/hypre_blas_macros.m4 index ac691e69a..8335f21e8 100644 --- a/src/config/hypre_blas_macros.m4 +++ b/src/config/hypre_blas_macros.m4 @@ -297,7 +297,7 @@ dnl ************************************************************** else hypre_fmangle_blas=4 fi - AC_DEFINE_UNQUOTED(HYPRE_FMANGLE_BLAS, [$hypre_fmangle_blas], [BLAS mangling]) + AC_DEFINE_UNQUOTED(HYPRE_FMANGLE_BLAS, [$hypre_fmangle_blas], [Define as in HYPRE_FMANGLE to set the BLAS name mangling scheme]) fi dnl ************************************************************** dnl Restore LIBS and LDFLAGS diff --git a/src/config/hypre_lapack_macros.m4 b/src/config/hypre_lapack_macros.m4 index 03d64fed2..501cb0c2a 100644 --- a/src/config/hypre_lapack_macros.m4 +++ b/src/config/hypre_lapack_macros.m4 @@ -272,7 +272,7 @@ dnl ************************************************************** else hypre_fmangle_lapack=4 fi - AC_DEFINE_UNQUOTED(HYPRE_FMANGLE_LAPACK, [$hypre_fmangle_lapack], [LAPACK mangling]) + AC_DEFINE_UNQUOTED(HYPRE_FMANGLE_LAPACK, [$hypre_fmangle_lapack], [Define as in HYPRE_FMANGLE to set the LAPACK name mangling scheme]) fi dnl ************************************************************** diff --git a/src/config/hypre_macros_misc.m4 b/src/config/hypre_macros_misc.m4 index 4bc8f0c6c..14319b509 100644 --- a/src/config/hypre_macros_misc.m4 +++ b/src/config/hypre_macros_misc.m4 @@ -8,7 +8,7 @@ dnl * AC_HYPRE_CHECK_MPI dnl * dnl try to determine what the MPI flags should be dnl AC_HYPRE_CHECK_MPI([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) -dnl ACTION-IF-FOUND is a list of shell commands to run +dnl ACTION-IF-FOUND is a list of shell commands to run dnl if an MPI library is found, and dnl ACTION-IF-NOT-FOUND is a list of commands to run it dnl if it is not found. If ACTION-IF-FOUND is not specified, @@ -54,7 +54,7 @@ if test x = x"$MPILIBS"; then $2 : else - AC_DEFINE(HYPRE_HAVE_MPI,1,[Found the MPI library.]) + AC_DEFINE(HYPRE_HAVE_MPI,1,[Define to 1 if an MPI library is found]) $1 : fi @@ -98,7 +98,7 @@ AC_DEFUN([AC_HYPRE_OPTIMIZATION_FLAGS], if test "x${hypre_user_chose_cflags}" = "xno" then - case "${CC}" in + case `basename "${CC}"` in gcc|mpigcc|mpicc) CFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -113,7 +113,7 @@ then LDFLAGS="$LDFLAGS -qopenmp" fi ;; - pgcc|mpipgcc) + pgcc|mpipgcc|mpipgicc) CFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then CFLAGS="$CFLAGS -mp" @@ -138,7 +138,7 @@ fi if test "x${hypre_user_chose_cxxflags}" = "xno" then - case "${CXX}" in + case `basename "${CXX}"` in g++|gCC|mpig++|mpicxx|mpic++|mpiCC) CXXFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -151,7 +151,7 @@ then CXXFLAGS="$CXXFLAGS -qopenmp" fi ;; - pgCC|mpipgCC) + pgCC|mpipgCC|pgc++|mpipgic++) CXXFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then CXXFLAGS="$CXXFLAGS -mp" @@ -174,7 +174,7 @@ fi if test "x${hypre_user_chose_fflags}" = "xno" then - case "${FC}" in + case `basename "${FC}"` in g77|gfortran|mpigfortran|mpif77) FFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -187,7 +187,7 @@ then FFLAGS="$FFLAGS -qopenmp" fi ;; - pgf77|mpipgf77) + pgf77|mpipgf77|pgfortran|mpipgifort) FFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then FFLAGS="$FFLAGS -mp" @@ -218,7 +218,7 @@ AC_DEFUN([AC_HYPRE_DEBUG_FLAGS], if test "x${hypre_user_chose_cflags}" = "xno" then - case "${CC}" in + case `basename "${CC}"` in gcc|mpigcc|mpicc) CFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -233,7 +233,7 @@ then LDFLAGS="$LDFLAGS -qopenmp" fi ;; - pgcc|mpipgcc) + pgcc|mpipgcc|mpipgicc) CFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then CFLAGS="$CFLAGS -mp" @@ -258,7 +258,7 @@ fi if test "x${hypre_user_chose_cxxflags}" = "xno" then - case "${CXX}" in + case `basename "${CXX}"` in g++|gCC|mpig++|mpicxx|mpic++|mpiCC) CXXFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -271,7 +271,7 @@ then CXXFLAGS="$CXXFLAGS -qopenmp" fi ;; - pgCC|mpipgCC) + pgCC|mpipgCC|pgc++|mpipgic++) CXXFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then CXXFLAGS="$CXXFLAGS -mp" @@ -294,7 +294,7 @@ fi if test "x${hypre_user_chose_fflags}" = "xno" then - case "${FC}" in + case `basename "${FC}"` in g77|gfortran|mpigfortran|mpif77) FFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -307,7 +307,7 @@ then FFLAGS="$FFLAGS -qopenmp" fi ;; - pgf77|mpipgf77) + pgf77|mpipgf77|pgfortran|mpipgifort) FFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then FFLAGS="$FFLAGS -mp" @@ -340,7 +340,7 @@ dnl * Find the hostname and assign it to an exported macro $HOSTNAME. dnl * Guesses a one-word name for the current architecture, unless ARCH dnl * has been preset. This is an alternative to the built-in macro dnl * AC_CANONICAL_HOST, which gives a three-word name. Uses the utility -dnl * 'tarch', which is a Bourne shell script that should be in the same +dnl * 'tarch', which is a Bourne shell script that should be in the same dnl * directory as the configure script. If tarch is not present or if it dnl * fails, ARCH is set to the value, if any, of shell variable HOSTTYPE, dnl * otherwise ARCH is set to "unknown". @@ -356,7 +356,7 @@ AC_DEFUN([AC_HYPRE_SET_ARCH], HOSTNAME="`$hypre_hostname`" dnl * if $HOSTNAME is still empty, give it the value "unknown". - if test -z "$HOSTNAME" + if test -z "$HOSTNAME" then HOSTNAME=unknown AC_MSG_WARN(hostname is unknown) @@ -415,38 +415,38 @@ dnl * dnl * define type of architecture case $HYPRE_ARCH in alpha) - AC_DEFINE(HYPRE_ALPHA) + AC_DEFINE(HYPRE_ALPHA,1,[Define to 1 for Alpha platforms]) ;; sun* | solaris*) - AC_DEFINE(HYPRE_SOLARIS) + AC_DEFINE(HYPRE_SOLARIS,1,[Define to 1 for Solaris.]) ;; hp* | HP*) - AC_DEFINE(HYPRE_HPPA) + AC_DEFINE(HYPRE_HPPA,1,[Define to 1 for HP platforms]) ;; rs6000 | RS6000 | *bgl* | *BGL* | ppc64*) - AC_DEFINE(HYPRE_RS6000) + AC_DEFINE(HYPRE_RS6000,1,[Define to 1 for RS6000 platforms]) ;; IRIX64) - AC_DEFINE(HYPRE_IRIX64) + AC_DEFINE(HYPRE_IRIX64,1,[Define to 1 for IRIX64 platforms]) ;; Linux | linux | LINUX) if test -r /etc/home.config then systemtype=`grep ^SYS_TYPE /etc/home.config | cut -d" " -f2` - case $systemtype in + case $systemtype in chaos*) - AC_DEFINE(HYPRE_LINUX_CHAOS) + AC_DEFINE(HYPRE_LINUX_CHAOS,1,[Define to 1 for Linux on platforms running any version of CHAOS]) ;; *) - AC_DEFINE(HYPRE_LINUX) + AC_DEFINE(HYPRE_LINUX,1,[Define to 1 for Linux platform]) ;; esac else - AC_DEFINE(HYPRE_LINUX) + AC_DEFINE(HYPRE_LINUX,1,[Define to 1 for Linux platform]) fi ;; esac - + dnl * dnl * return architecture and host name values AC_SUBST(HYPRE_ARCH) diff --git a/src/config/missing b/src/config/missing deleted file mode 100755 index e7ef83a1c..000000000 --- a/src/config/missing +++ /dev/null @@ -1,360 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. - -scriptversion=2003-09-02.23 - -# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003 -# Free Software Foundation, Inc. -# Originally by Fran,cois Pinard , 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -msg="missing on your system" - -case "$1" in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - # Exit code 63 means version mismatch. This often happens - # when the user try to use an ancient version of a tool on - # a file that requires a minimum version. In this case we - # we should proceed has if the program had been absent, or - # if --run hadn't been passed. - if test $? = 63; then - run=: - msg="probably too old" - fi - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case "$1" in - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch] - -Send bug reports to ." - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing $scriptversion (GNU Automake)" - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - - aclocal*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is needed, but is $msg. - You might have modified some files without having the - proper tools for further handling them. - You can get \`$1' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` - test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' $msg. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` - fi - if [ -f "$file" ]; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then - # We have makeinfo, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is $msg. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - tar) - shift - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - fi - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case "$firstarg" in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case "$firstarg" in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and is $msg. - You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequisites for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-end: "$" -# End: diff --git a/src/config/update.sh b/src/config/update.sh new file mode 100755 index 000000000..08d93d047 --- /dev/null +++ b/src/config/update.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +currentdir=`pwd` +currentdir=`basename $currentdir` +if [ "$currentdir" != "src" ]; then + echo "ERROR: Run this script from the 'src' directory (i.e., 'config/update.sh')." + exit +fi + +source config/version.sh + +# Check that the version number is not smaller than before +currentnum=`grep "HYPRE_NUMBER=" configure | cut -d= -f 2` +if [ $hypre_number -lt $currentnum ]; then + echo "ERROR: HYPRE version number is smaller than the current version!" + exit +elif [ $hypre_number -gt $currentnum ]; then + echo "HYPRE version number is greater than the current version" +else + echo "HYPRE version number is the same as the current version" +fi + +##### Update release information and configure script for Linux build system + +# NOTE: Using '#' as delimiter in sed to allow for '/' in reldate +cat config/configure.in | +sed -e 's#m4_define.*HYPRE_VERS[^)]*#m4_define([M4_HYPRE_VERSION], ['$hypre_version']#' | +sed -e 's#m4_define.*HYPRE_NUMB[^)]*#m4_define([M4_HYPRE_NUMBER], ['$hypre_number']#' | +sed -e 's#m4_define.*HYPRE_DATE[^)]*#m4_define([M4_HYPRE_DATE], ['$hypre_reldate']#' \ +> config/configure.in.tmp +mv config/configure.in.tmp config/configure.in + +ln -s config/configure.in . +rm -rf aclocal.m4 configure autom4te.cache +autoconf --include=config +autoheader configure.in +rm configure.in + +cat >> configure < HYPRE_config.h +rm -f HYPRE_config.h.tmp + +EOF + +##### Update release information for CMake build system + +# NOTE: Using '#' as delimiter in sed to allow for '/' in reldate +cat CMakeLists.txt | +sed -e 's#set(HYPRE_VERS[^)]*#set(HYPRE_VERSION '$hypre_version'#' | +sed -e 's#set(HYPRE_NUMB[^)]*#set(HYPRE_NUMBER '$hypre_number'#' | +sed -e 's#set(HYPRE_DATE[^)]*#set(HYPRE_DATE '$hypre_reldate'#' \ +> CMakeLists.txt.tmp +mv CMakeLists.txt.tmp CMakeLists.txt + +##### Update release information in documentation + +(cd docs; ./update-release.sh) + diff --git a/src/config/version.sh b/src/config/version.sh new file mode 100755 index 000000000..1aff1aa4d --- /dev/null +++ b/src/config/version.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other +# HYPRE Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +hypre_version="2.21.0" +hypre_reldate="2021/05/25" + +hypre_major=`echo $hypre_version | cut -d. -f 1` +hypre_minor=`echo $hypre_version | cut -d. -f 2` +hypre_patch=`echo $hypre_version | cut -d. -f 3` + +let hypre_number="$hypre_major*10000 + $hypre_minor*100 + $hypre_patch" + diff --git a/src/config/windows.sh b/src/config/windows.sh deleted file mode 100755 index 565ca10ca..000000000 --- a/src/config/windows.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh -# Copyright 1998-2019 Lawrence Livermore National Security, LLC and other -# HYPRE Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - - -# Small modifications to several hypre Makefiles needed to allow the use of the -# Visual Studio CL.exe compiler on Windows. Note that this script should be run -# after ../configure, and should not be called more than once! - -# Move *.obj to *.o after compiling an object file -sed -e s,' -c $<',' -c $<; mv -f $*.obj $*.o',g \ - config/Makefile.config > /tmp/Makefile.config -mv -f /tmp/Makefile.config config/Makefile.config - -# Take care of the special compilation of lapack/dlamch.c -sed -e s,'-c dlamch.c','-c dlamch.c ; mv -f dlamch.obj dlamch.o',g \ - lapack/Makefile > /tmp/Makefile.lapack -mv -f /tmp/Makefile.lapack lapack/Makefile - -# Take care of the special compilation of SuperLU/superlu_timer.c -sed -e s,' $<',' $<; mv -f $*.obj $*.o',g \ - FEI_mv/SuperLU/SRC/Makefile > /tmp/Makefile.SuperLU -mv -f /tmp/Makefile.SuperLU FEI_mv/SuperLU/SRC/Makefile diff --git a/src/configure b/src/configure index 92b867945..62b3f3fb2 100755 --- a/src/configure +++ b/src/configure @@ -1,7 +1,7 @@ #! /bin/sh # From configure.in Id. # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for hypre 2.18.2. +# Generated by GNU Autoconf 2.69 for hypre 2.21.0. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -581,8 +581,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='hypre' PACKAGE_TARNAME='hypre' -PACKAGE_VERSION='2.18.2' -PACKAGE_STRING='hypre 2.18.2' +PACKAGE_VERSION='2.21.0' +PACKAGE_STRING='hypre 2.21.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -633,15 +633,19 @@ SUPERLU_LIBS SUPERLU_INCLUDE CALIPER_LIBS CALIPER_INCLUDE +HYPRE_HIP_LIBS +HYPRE_HIP_INCL HYPRE_CUDA_LIBS -HYPRE_CUDA_INCL -CUFLAGS -HYPRE_KOKKOS_INC_FILE -KOKKOS_LIBS +HYPRE_CUDA_INCLUDE +HYPRE_CUDA_PATH +HYPRE_CUDA_GENCODE +HYPRE_UMPIRE_LIB +HYPRE_UMPIRE_INCLUDE +HYPRE_UMPIRE_LIB_DIR HYPRE_KOKKOS_LIB HYPRE_KOKKOS_INCLUDE HYPRE_KOKKOS_LIB_DIR -RAJA_LIBS +HYPRE_KOKKOS_SRC_DIR HYPRE_RAJA_LIB HYPRE_RAJA_INCLUDE HYPRE_RAJA_LIB_DIR @@ -649,7 +653,6 @@ LAPACKLIBS LAPACKLIBDIRS BLASLIBS BLASLIBDIRS -HYPRE_KOKKOS_PATH HYPRE_FEI_FEMLI_FILES HYPRE_FEI_HYPRE_FILES HYPRE_FEI_SUBDIRS @@ -684,7 +687,7 @@ LINK_FC FFLAGS HOSTNAME HYPRE_ARCH -CUCC +HYPRE_ROCM_PREFIX EGREP GREP CPP @@ -705,6 +708,10 @@ SET_MAKE FC CXX CC +CUFLAGS +CUCC +HYPRE_CUDA_SM +CUDA_HOME host_os host_vendor host_cpu @@ -717,6 +724,7 @@ HYPRE_SRCDIR HYPRE_BUGS HYPRE_TIME HYPRE_DATE +HYPRE_NUMBER HYPRE_VERSION HYPRE_NAME target_alias @@ -770,15 +778,17 @@ enable_complex enable_maxdim enable_persistent enable_hopscotch -with_no_global_partition -enable_global_partition enable_fortran enable_unified_memory enable_cuda_streams -enable_nvtx enable_cusparse +enable_device_memory_pool enable_cublas enable_curand +enable_rocsparse +enable_rocblas +enable_rocrand +enable_gpu_profiling enable_gpu_aware_mpi with_LD with_LDFLAGS @@ -795,6 +805,7 @@ with_MPI_lib_dirs with_MPI_flags with_node_aware_mpi with_node_aware_mpi_include +with_memory_tracker with_blas_lib with_blas_libs with_blas_lib_dirs @@ -818,16 +829,28 @@ with_fei_inc_dir with_mli with_MPI with_cuda +with_hip +with_cuda_home +with_gpu_arch with_raja -with_kokkos with_raja_include with_raja_lib with_raja_libs with_raja_lib_dirs -with_kokkos_lib_dirs +with_kokkos with_kokkos_include with_kokkos_lib with_kokkos_libs +with_kokkos_lib_dirs +with_umpire_host +with_umpire_device +with_umpire_um +with_umpire_pinned +with_umpire +with_umpire_include +with_umpire_lib +with_umpire_libs +with_umpire_lib_dirs with_caliper with_caliper_include with_caliper_lib @@ -837,6 +860,10 @@ with_lapack ac_precious_vars='build_alias host_alias target_alias +CUDA_HOME +HYPRE_CUDA_SM +CUCC +CUFLAGS CC CFLAGS LDFLAGS @@ -1388,7 +1415,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures hypre 2.18.2 to adapt to many kinds of systems. +\`configure' configures hypre 2.21.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1453,7 +1480,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of hypre 2.18.2:";; + short | recursive ) echo "Configuration of hypre 2.21.0:";; esac cat <<\_ACEOF @@ -1476,22 +1503,24 @@ Optional Features: --enable-persistent Uses persistent communication (default is NO). --enable-hopscotch Uses hopscotch hashing if configured with OpenMP and atomic capability available(default is NO). - --enable-global-partition - Use global partitioning (default is NO). --enable-fortran Require a working Fortran compiler (default is YES). --enable-unified-memory Use unified memory for allocating the memory (default is NO). --enable-cuda-streams Use CUDA streams (default is YES). - --enable-nvtx Use NVTX (default is NO). --enable-cusparse Use cuSPARSE (default is YES). + --enable-device-memory-pool + Use device pooling allocator (default is NO). --enable-cublas Use cuBLAS (default is NO). --enable-curand Use cuRAND (default is YES). + --enable-rocsparse Use rocSPARSE (default is YES). + --enable-rocblas Use rocBLAS (default is NO). + --enable-rocrand Use rocRAND (default is YES). + --enable-gpu-profiling Use NVTX on CUDA, rocTX on HIP (default is NO). --enable-gpu-aware-mpi Use GPU memory aware MPI Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --with-LD=ARG Set linker to ARG. The environment variable 'LD' will be overridden. --with-LDFLAGS=ARG User can manually set linker flags. The 'LDFLAGS' @@ -1542,6 +1571,7 @@ Optional Packages: --with-node-aware-mpi Use Node Aware MPI (default is NO). --with-node-aware-mpi-include=DIR User specifies that nap_comm.hpp is in DIR. + --with-memory-tracker Use memory tracker in hypre (default is NO). --with-blas-lib=LIBS LIBS is space-separated linkable list (enclosed in quotes) of libraries needed for BLAS. OK to use -L and -l flags in the list @@ -1573,7 +1603,7 @@ Optional Packages: name mangling to use when calling hypre from Fortran. It can be set to: "no-underscores", "one-underscore", "two-underscores", - "caps-no-underscores, and "one-before-after". + "caps-no-underscores", and "one-before-after". --with-fmangle-blas=FMANGLE Name mangling for BLAS. See --with-fmangle. --with-fmangle-lapack=FMANGLE @@ -1604,10 +1634,14 @@ Optional Packages: may affect which compiler is chosen. --with-cuda Use CUDA. Require cuda-8.0 or higher (default is NO). + --with-hip Use HIP for AMD GPUs. (default is NO). + --with-cuda-home=DIR User specifies CUDA_HOME in DIR. + --with-gpu-arch=ARG User specifies NVIDIA GPU architecture that the CUDA + files will be compiled for in ARG, where ARG is a + space-separated list (enclosed in quotes) of + numbers. --with-raja Use RAJA. Require RAJA package to be compiled properly (default is NO). - --with-kokkos Use Kokkos. Require kokkos package to be compiled - properly(default is NO). --with-raja-include=DIR User specifies that RAJA/*.h is in DIR. The options --with-raja-include --with-raja-libs and --with-raja-lib-dirs must be used together. @@ -1624,12 +1658,8 @@ Optional Packages: --with-raja-libs, e.g "usr/lib /usr/local/lib". The options --with-raja-libs and --raja-blas-lib-dirs must be used together. - --with-kokkos-lib-dirs=DIRS - DIRS is space-separated list (enclosed in quotes) of - directories containing the libraries and - Makefile.kokkos is assumed to be in DIRS/../ . The - options --with-kokkos-libs and --with-kokkos-dirs - must be used together. + --with-kokkos Use Kokkos. Require kokkos package to be compiled + properly(default is NO). --with-kokkos-include=DIR User specifies that KOKKOS headers is in DIR. The options --with-kokkos-include --with-kokkos-libs and @@ -1641,6 +1671,39 @@ Optional Packages: libraries needed for KOKKOS (base name only). The options --with-kokkos-libs and --with-kokkos-dirs must be used together. + --with-kokkos-lib-dirs=DIRS + DIRS is space-separated list (enclosed in quotes) of + directories containing the libraries and + Makefile.kokkos is assumed to be in DIRS/../ . The + options --with-kokkos-libs and --with-kokkos-dirs + must be used together. + --with-umpire-host Use Umpire Allocator for host memory (default is + NO). + --with-umpire-device Use Umpire Allocator for device memory (default is + NO). + --with-umpire-um Use Umpire Allocator for unified memory (default is + NO). + --with-umpire-pinned Use Umpire Allocator for pinned memory (default is + NO). + --with-umpire Use Umpire Allocator for device and unified memory + (default is NO). + --with-umpire-include=DIR + User specifies that UMPIRE headers is in DIR. The + options --with-umpire-include --with-umpire-libs and + --with-umpire-dirs must be used together. + --with-umpire-lib=LIBS LIBS is space-separated linkable list (enclosed in + quotes) of libraries needed for UMPIRE. OK to use -L + and -l flags in the list + --with-umpire-libs=LIBS LIBS is space-separated list (enclosed in quotes) of + libraries needed for UMPIRE (base name only). The + options --with-umpire-libs and --with-umpire-dirs + must be used together. + --with-umpire-lib-dirs=DIRS + DIRS is space-separated list (enclosed in quotes) of + directories containing the libraries specified by + --with-umpire-libs, e.g "usr/lib /usr/local/lib". + The options --with-umpire-libs and + --with-umpire-dirs must be used together. --with-caliper Use Caliper instrumentation (default is NO). --with-caliper-include=DIR Directory where Caliper is installed. @@ -1651,6 +1714,11 @@ Optional Packages: --with-lapack Find a system-provided LAPACK library Some influential environment variables: + CUDA_HOME CUDA home directory + HYPRE_CUDA_SM + CUDA architecture + CUCC CUDA compiler command + CUFLAGS CUDA compiler flags CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a @@ -1730,7 +1798,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -hypre configure 2.18.2 +hypre configure 2.21.0 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2182,7 +2250,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by hypre $as_me 2.18.2, which was +It was created by hypre $as_me 2.21.0, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2568,10 +2636,11 @@ ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. HYPRE_NAME="hypre" -HYPRE_VERSION="2.18.2" -HYPRE_DATE="2019/10/28" +HYPRE_VERSION="2.21.0" +HYPRE_NUMBER=22100 +HYPRE_DATE="2021/05/25" HYPRE_TIME="00:00:00" -HYPRE_BUGS="hypre-support@llnl.gov" +HYPRE_BUGS="https://github.com/hypre-space/hypre/issues" HYPRE_SRCDIR="`pwd`" @@ -2585,6 +2654,11 @@ cat >>confdefs.h <<_ACEOF _ACEOF +cat >>confdefs.h <<_ACEOF +#define HYPRE_RELEASE_NUMBER $HYPRE_NUMBER +_ACEOF + + cat >>confdefs.h <<_ACEOF #define HYPRE_RELEASE_DATE "$HYPRE_DATE" _ACEOF @@ -2608,6 +2682,7 @@ _ACEOF + PACKAGE_DATE= PACKAGE_TIME= PACKAGE_DATETIME= @@ -2620,9 +2695,10 @@ PACKAGE_BUGREPORT= hypre_user_chose_mpi=no hypre_user_chose_blas=no hypre_user_chose_lapack=no -hypre_user_chose_cuda=no hypre_user_chose_raja=no +hypre_using_raja=no hypre_user_chose_kokkos=no +hypre_using_kokkos=no hypre_using_c=yes hypre_using_cxx=yes @@ -2644,11 +2720,17 @@ hypre_using_cuda=no hypre_using_gpu=no hypre_using_um=no hypre_gpu_mpi=no +hypre_using_gpu_profiling=no hypre_using_cuda_streams=no -hypre_using_nvtx=no hypre_using_cusparse=yes hypre_using_cublas=no hypre_using_curand=yes +hypre_using_device_pool=no +hypre_using_umpire=no +hypre_using_umpire_host=no +hypre_using_umpire_device=no +hypre_using_umpire_um=no +hypre_using_umpire_pinned=no hypre_using_caliper=no hypre_user_gave_caliper_lib=no @@ -2657,6 +2739,17 @@ hypre_user_gave_caliper_inc=no hypre_found_cuda=no hypre_using_node_aware_mpi=no +hypre_using_memory_tracker=no + + +hypre_using_hip=no +hypre_using_rocsparse=no +hypre_using_rocblas=no +hypre_using_rocrand=no + +hypre_found_hip=no + + hypre_blas_lib_old_style=no hypre_blas_lib_dir_old_style=no @@ -2790,7 +2883,8 @@ fi if test "$hypre_using_mixedint" = "yes" then - $as_echo "#define HYPRE_MIXEDINT 1" >>confdefs.h + +$as_echo "#define HYPRE_MIXEDINT 1" >>confdefs.h fi @@ -2809,7 +2903,8 @@ fi if test "$hypre_using_bigint" = "yes" then - $as_echo "#define HYPRE_BIGINT 1" >>confdefs.h + +$as_echo "#define HYPRE_BIGINT 1" >>confdefs.h fi @@ -2828,7 +2923,8 @@ fi if test "$hypre_using_single" = "yes" then - $as_echo "#define HYPRE_SINGLE 1" >>confdefs.h + +$as_echo "#define HYPRE_SINGLE 1" >>confdefs.h fi @@ -2847,7 +2943,8 @@ fi if test "$hypre_using_longdouble" = "yes" then - $as_echo "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h + +$as_echo "#define HYPRE_LONG_DOUBLE 1" >>confdefs.h fi @@ -2866,7 +2963,8 @@ fi if test "$hypre_using_complex" = "yes" then - $as_echo "#define HYPRE_COMPLEX 1" >>confdefs.h + +$as_echo "#define HYPRE_COMPLEX 1" >>confdefs.h fi @@ -2898,7 +2996,8 @@ fi if test "$hypre_using_persistent" = "yes" then - $as_echo "#define HYPRE_USING_PERSISTENT_COMM 1" >>confdefs.h + +$as_echo "#define HYPRE_USING_PERSISTENT_COMM 1" >>confdefs.h fi @@ -2916,33 +3015,11 @@ fi if test "$hypre_using_hopscotch" = "yes" then - $as_echo "#define HYPRE_HOPSCOTCH 1" >>confdefs.h - -fi - -hypre_using_global_partition=no -# Check whether --with-no-global-partition was given. -if test "${with_no_global_partition+set}" = set; then : - withval=$with_no_global_partition; case "${withval}" in - yes) hypre_using_global_partition=no ;; - no) hypre_using_global_partition=yes ;; - *) hypre_using_global_partition=no ;; - esac - -fi - -# Check whether --enable-global-partition was given. -if test "${enable_global_partition+set}" = set; then : - enableval=$enable_global_partition; case "${enableval}" in - yes) hypre_using_global_partition=yes ;; - no) hypre_using_global_partition=no ;; - *) hypre_using_global_partition=yes ;; - esac +$as_echo "#define HYPRE_HOPSCOTCH 1" >>confdefs.h fi - # Check whether --enable-fortran was given. if test "${enable_fortran+set}" = set; then : enableval=$enable_fortran; case "${enableval}" in @@ -2982,19 +3059,6 @@ else fi -# Check whether --enable-nvtx was given. -if test "${enable_nvtx+set}" = set; then : - enableval=$enable_nvtx; case "${enableval}" in - yes) hypre_using_nvtx=yes ;; - no) hypre_using_nvtx=no ;; - *) hypre_using_nvtx=no ;; - esac -else - hypre_using_nvtx=no - -fi - - # Check whether --enable-cusparse was given. if test "${enable_cusparse+set}" = set; then : enableval=$enable_cusparse; case "${enableval}" in @@ -3008,6 +3072,19 @@ else fi +# Check whether --enable-device-memory-pool was given. +if test "${enable_device_memory_pool+set}" = set; then : + enableval=$enable_device_memory_pool; case "${enableval}" in + yes) hypre_using_device_pool=yes ;; + no) hypre_using_device_pool=no ;; + *) hypre_using_device_pool=no ;; + esac +else + hypre_using_device_pool=no + +fi + + # Check whether --enable-cublas was given. if test "${enable_cublas+set}" = set; then : enableval=$enable_cublas; case "${enableval}" in @@ -3034,6 +3111,60 @@ else fi + + +# Check whether --enable-rocsparse was given. +if test "${enable_rocsparse+set}" = set; then : + enableval=$enable_rocsparse; case "${enableval}" in + yes) hypre_using_rocsparse=yes ;; + no) hypre_using_rocsparse=no ;; + *) hypre_using_rocsparse=yes ;; + esac +else + hypre_using_rocsparse=yes + +fi + + +# Check whether --enable-rocblas was given. +if test "${enable_rocblas+set}" = set; then : + enableval=$enable_rocblas; case "${enableval}" in + yes) hypre_using_rocblas=yes ;; + no) hypre_using_rocblas=no ;; + *) hypre_using_rocblas=no ;; + esac +else + hypre_using_rocblas=no + +fi + + +# Check whether --enable-rocrand was given. +if test "${enable_rocrand+set}" = set; then : + enableval=$enable_rocrand; case "${enableval}" in + yes) hypre_using_rocrand=yes ;; + no) hypre_using_rocrand=no ;; + *) hypre_using_rocrand=yes ;; + esac +else + hypre_using_rocrand=yes + +fi + + +# Check whether --enable-gpu-profiling was given. +if test "${enable_gpu_profiling+set}" = set; then : + enableval=$enable_gpu_profiling; case "${enableval}" in + yes) hypre_using_gpu_profiling=yes ;; + no) hypre_using_gpu_profiling=no ;; + *) hypre_using_gpu_profiling=no ;; + esac +else + hypre_using_gpu_profiling=no + +fi + + # Check whether --enable-gpu-aware-mpi was given. if test "${enable_gpu_aware_mpi+set}" = set; then : enableval=$enable_gpu_aware_mpi; case "${enableval}" in @@ -3076,6 +3207,25 @@ else hypre_user_chose_cxxflags=yes fi + + + + + +if test "x$CUCC" = "x" +then + hypre_user_chose_cudacompilers=no +else + hypre_user_chose_cudacompilers=yes +fi + +if test "x$CUFLAGS" = "x" +then + hypre_user_chose_cuflags=no +else + hypre_user_chose_cuflags=yes +fi + if test "x$F77" != "x" && test "x$FC" = "x" then FC="$F77" @@ -3181,7 +3331,8 @@ fi # Check whether --with-strict-checking was given. if test "${with_strict_checking+set}" = set; then : - withval=$with_strict_checking; + withval=$with_strict_checking; case "${withval}" in + yes) hypre_user_chose_ccompilers=yes hypre_user_chose_cflags=yes hypre_user_chose_cxxcompilers=yes @@ -3367,6 +3518,8 @@ test -n "$FC" || FC="""" $as_echo "#define HYPRE_SEQUENTIAL 1" >>confdefs.h + ;; + esac fi @@ -3447,6 +3600,20 @@ fi +# Check whether --with-memory_tracker was given. +if test "${with_memory_tracker+set}" = set; then : + withval=$with_memory_tracker; case "$withval" in + yes) hypre_using_memory_tracker=yes;; + no) hypre_using_memory_tracker=no ;; + *) hypre_using_memory_tracker=no ;; + esac +else + hypre_using_memory_tracker=no + +fi + + + # Check whether --with-blas-lib was given. if test "${with_blas_lib+set}" = set; then : @@ -3760,8 +3927,7 @@ fi # Check whether --with-cuda was given. if test "${with_cuda+set}" = set; then : withval=$with_cuda; case "$withval" in - yes) hypre_user_chose_cuda=yes - hypre_using_cuda=yes ;; + yes) hypre_using_cuda=yes ;; no) hypre_using_cuda=no ;; *) hypre_using_cuda=no ;; esac @@ -3773,28 +3939,57 @@ fi -# Check whether --with-raja was given. -if test "${with_raja+set}" = set; then : - withval=$with_raja; case "$withval" in - yes) hypre_user_chose_raja=yes;; - no) hypre_user_chose_raja=no ;; - *) hypre_user_chose_raja=no ;; +# Check whether --with-hip was given. +if test "${with_hip+set}" = set; then : + withval=$with_hip; case "$withval" in + yes) hypre_using_hip=yes ;; + no) hypre_using_hip=no ;; + *) hypre_using_hip=no ;; esac else - hypre_using_raja=no + hypre_using_hip=no fi +# Check whether --with-cuda-home was given. +if test "${with_cuda_home+set}" = set; then : + withval=$with_cuda_home; for cuda_dir in $withval; do + CUDA_HOME="$cuda_dir" + done; + hypre_using_cuda=yes -# Check whether --with-kokkos was given. -if test "${with_kokkos+set}" = set; then : - withval=$with_kokkos; case "$withval" in - yes) hypre_user_chose_kokkos=yes ;; - no) hypre_user_chose_kokkos=no ;; - *) hypre_user_chose_kokkos=no ;; +fi + + + +# Check whether --with-gpu-arch was given. +if test "${with_gpu_arch+set}" = set; then : + withval=$with_gpu_arch; + if test "x${withval}" != "x" + then + if test "x${HYPRE_CUDA_SM}" = "x" + then + HYPRE_CUDA_SM="${withval}" + fi + fi + + +fi + + + + +# Check whether --with-raja was given. +if test "${with_raja+set}" = set; then : + withval=$with_raja; case "$withval" in + yes) hypre_using_raja=yes;; + no) hypre_using_raja=no ;; + *) hypre_using_raja=no ;; esac +else + hypre_using_raja=no fi @@ -3844,12 +4039,16 @@ fi -# Check whether --with-kokkos-lib-dirs was given. -if test "${with_kokkos_lib_dirs+set}" = set; then : - withval=$with_kokkos_lib_dirs; for kokkos_lib_dir in $withval; do - HYPRE_KOKKOS_LIB_DIR="-L$kokkos_lib_dir $HYPRE_KOKKOS_LIB_DIR" - done; -hypre_user_chose_kokkos=yes + +# Check whether --with-kokkos was given. +if test "${with_kokkos+set}" = set; then : + withval=$with_kokkos; case "$withval" in + yes) hypre_using_kokkos=yes ;; + no) hypre_using_kokkos=no ;; + *) hypre_using_kokkos=no ;; + esac +else + hypre_using_kokkos=no fi @@ -3888,10 +4087,137 @@ fi +# Check whether --with-kokkos-lib-dirs was given. +if test "${with_kokkos_lib_dirs+set}" = set; then : + withval=$with_kokkos_lib_dirs; for kokkos_lib_dir in $withval; do + HYPRE_KOKKOS_LIB_DIR="-L$kokkos_lib_dir $HYPRE_KOKKOS_LIB_DIR" + done; +hypre_user_chose_kokkos=yes + +fi + + + + +# Check whether --with-umpire-host was given. +if test "${with_umpire_host+set}" = set; then : + withval=$with_umpire_host; case "${withval}" in + yes) hypre_using_umpire_host=yes ;; + no) hypre_using_umpire_host=no ;; + *) hypre_using_umpire_host=no ;; + esac +else + hypre_using_umpire_host=no + +fi + + + +# Check whether --with-umpire-device was given. +if test "${with_umpire_device+set}" = set; then : + withval=$with_umpire_device; case "${withval}" in + yes) hypre_using_umpire_device=yes ;; + no) hypre_using_umpire_device=no ;; + *) hypre_using_umpire_device=no ;; + esac +else + hypre_using_umpire_device=no + +fi + + + +# Check whether --with-umpire-um was given. +if test "${with_umpire_um+set}" = set; then : + withval=$with_umpire_um; case "${withval}" in + yes) hypre_using_umpire_um=yes ;; + no) hypre_using_umpire_um=no ;; + *) hypre_using_umpire_um=no ;; + esac +else + hypre_using_umpire_um=no + +fi + + + +# Check whether --with-umpire-pinned was given. +if test "${with_umpire_pinned+set}" = set; then : + withval=$with_umpire_pinned; case "${withval}" in + yes) hypre_using_umpire_pinned=yes ;; + no) hypre_using_umpire_pinned=no ;; + *) hypre_using_umpire_pinned=no ;; + esac +else + hypre_using_umpire_pinned=no + +fi + + + +# Check whether --with-umpire was given. +if test "${with_umpire+set}" = set; then : + withval=$with_umpire; case "${withval}" in + yes) hypre_using_umpire_device=yes + hypre_using_umpire_um=yes ;; + no) ;; + *) ;; + esac +fi + + + +# Check whether --with-umpire-include was given. +if test "${with_umpire_include+set}" = set; then : + withval=$with_umpire_include; for umpire_dir in $withval; do +HYPRE_UMPIRE_INCLUDE="-I$umpire_dir $HYPRE_UMPIRE_INCLUDE" +done; + + +fi + + + +# Check whether --with-umpire-lib was given. +if test "${with_umpire_lib+set}" = set; then : + withval=$with_umpire_lib; for umpire_lib in $withval; do + HYPRE_UMPIRE_LIB="$umpire_lib $HYPRE_UMPIRE_LIB" + done; + + +fi + + + +# Check whether --with-umpire-libs was given. +if test "${with_umpire_libs+set}" = set; then : + withval=$with_umpire_libs; for umpire_lib in $withval; do + HYPRE_UMPIRE_LIB="-l$umpire_lib $HYPRE_UMPIRE_LIB" + done; + + +fi + + + +# Check whether --with-umpire-lib-dirs was given. +if test "${with_umpire_lib_dirs+set}" = set; then : + withval=$with_umpire_lib_dirs; for umpire_lib_dir in $withval; do + HYPRE_UMPIRE_LIB_DIR="-L$umpire_lib_dir $HYPRE_UMPIRE_LIB_DIR" + done; + + +fi + + + # Check whether --with-caliper was given. if test "${with_caliper+set}" = set; then : - withval=$with_caliper; hypre_using_caliper=yes + withval=$with_caliper; case "$withval" in + yes) hypre_using_caliper=yes;; + *) hypre_using_caliper=no ;; +esac else hypre_using_caliper=no fi @@ -3929,9 +4255,9 @@ if test "$hypre_user_chose_ccompilers" = "no" then if test "$hypre_using_mpi" = "no" then - if test "$hypre_using_device_openmp" = "yes" + if test "$hypre_using_openmp" = "yes" then - for ac_prog in xlc-gpu clang-gpu + for ac_prog in xlc_r xlC_r xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3973,11 +4299,8 @@ fi test -n "$CC" && break done - fi - - if test "$hypre_using_openmp" = "yes" - then - for ac_prog in xlc_r xlC_r xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC + else + for ac_prog in xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4019,8 +4342,11 @@ fi test -n "$CC" && break done - else - for ac_prog in xlc xlC icc icpc gcc g++ pgcc pgCC cc CC kcc KCC + fi + else + if test "$hypre_using_openmp" = "yes" + then + for ac_prog in mpxlc mpixlc_r mpixlc mpiicc mpigcc mpicc mpipgcc mpipgicc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4062,11 +4388,8 @@ fi test -n "$CC" && break done - fi - else - if test "$hypre_using_device_openmp" = "yes" - then - for ac_prog in mpixlc-gpu mpiclang-gpu + else + for ac_prog in mpxlc mpixlc mpiicc mpigcc mpicc mpipgcc mpipgicc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4109,20 +4432,31 @@ fi done fi + fi + + if test "x$CC" = "x" + then + hypre_using_c=no + fi +fi +if test "$hypre_user_chose_cxxcompilers" = "no" +then + if test "$hypre_using_mpi" = "no" + then if test "$hypre_using_openmp" = "yes" then - for ac_prog in mpxlc mpixlc_r mpixlc mpiicc mpigcc mpicc mpipgcc + for ac_prog in xlC_r xlc_r xlC xlc icpc icc g++ gcc pgCC pgcc pgc++ CC cc KCC kcc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : +if ${ac_cv_prog_CXX+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4131,7 +4465,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_prog" + ac_cv_prog_CXX="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4141,31 +4475,31 @@ IFS=$as_save_IFS fi fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CC" && break + test -n "$CXX" && break done else - for ac_prog in mpxlc mpixlc mpiicc mpigcc mpicc mpipgcc + for ac_prog in xlC xlc icpc icc g++ gcc pgCC pgcc pgc++ CC cc KCC kcc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : +if ${ac_cv_prog_CXX+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4174,7 +4508,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_prog" + ac_cv_prog_CXX="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4184,35 +4518,24 @@ IFS=$as_save_IFS fi fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CC" && break + test -n "$CXX" && break done fi - fi - - if test "x$CC" = "x" - then - hypre_using_c=no - fi -fi - -if test "$hypre_user_chose_cxxcompilers" = "no" -then - if test "$hypre_using_mpi" = "no" - then - if test "$hypre_using_device_openmp" = "yes" + else + if test "$hypre_using_openmp" = "yes" then - for ac_prog in xlC-gpu clang++-gpu + for ac_prog in mpxlC mpixlcxx_r mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC mpipgic++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4254,11 +4577,8 @@ fi test -n "$CXX" && break done - fi - - if test "$hypre_using_openmp" = "yes" - then - for ac_prog in xlC_r xlc_r xlC xlc icpc icc g++ gcc pgCC pgcc CC cc KCC kcc + else + for ac_prog in mpxlC mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC mpipgic++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4300,18 +4620,32 @@ fi test -n "$CXX" && break done - else - for ac_prog in xlC xlc icpc icc g++ gcc pgCC pgcc CC cc KCC kcc + fi + fi + + if test "x$CXX" = "x" + then + hypre_using_cxx=no + fi +fi + +if test "$hypre_using_fortran" = "yes" -a "$hypre_user_chose_fcompilers" = "no" +then + if test "$hypre_using_mpi" = "no" + then + if test "$hypre_using_openmp" = "yes" + then + for ac_prog in xlf_r ifort gfortran g77 g95 pgf77 pgfortran f77 do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CXX+:} false; then : +if ${ac_cv_prog_FC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CXX"; then - ac_cv_prog_CXX="$CXX" # Let the user override the test. + if test -n "$FC"; then + ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4320,7 +4654,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CXX="$ac_prog" + ac_cv_prog_FC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4330,34 +4664,31 @@ IFS=$as_save_IFS fi fi -CXX=$ac_cv_prog_CXX -if test -n "$CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 -$as_echo "$CXX" >&6; } +FC=$ac_cv_prog_FC +if test -n "$FC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 +$as_echo "$FC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CXX" && break + test -n "$FC" && break done - fi - else - if test "$hypre_using_device_openmp" = "yes" - then - for ac_prog in mpixlC-gpu mpiclang++-gpu + else + for ac_prog in xlf ifort gfortran g77 g95 pgf77 pgfortran f77 do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CXX+:} false; then : +if ${ac_cv_prog_FC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CXX"; then - ac_cv_prog_CXX="$CXX" # Let the user override the test. + if test -n "$FC"; then + ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4366,7 +4697,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CXX="$ac_prog" + ac_cv_prog_FC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4376,34 +4707,34 @@ IFS=$as_save_IFS fi fi -CXX=$ac_cv_prog_CXX -if test -n "$CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 -$as_echo "$CXX" >&6; } +FC=$ac_cv_prog_FC +if test -n "$FC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 +$as_echo "$FC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CXX" && break + test -n "$FC" && break done fi - + else if test "$hypre_using_openmp" = "yes" then - for ac_prog in mpxlC mpixlcxx_r mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC + for ac_prog in mpxlf mpixlf77_r mpiifort mpif77 mpipgf77 mpipgifort do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CXX+:} false; then : +if ${ac_cv_prog_FC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CXX"; then - ac_cv_prog_CXX="$CXX" # Let the user override the test. + if test -n "$FC"; then + ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4412,7 +4743,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CXX="$ac_prog" + ac_cv_prog_FC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4422,31 +4753,31 @@ IFS=$as_save_IFS fi fi -CXX=$ac_cv_prog_CXX -if test -n "$CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 -$as_echo "$CXX" >&6; } +FC=$ac_cv_prog_FC +if test -n "$FC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 +$as_echo "$FC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CXX" && break + test -n "$FC" && break done else - for ac_prog in mpxlC mpixlcxx mpixlC mpiicpc mpig++ mpic++ mpicxx mpiCC mpipgCC + for ac_prog in mpxlf mpixlf77 mpiifort mpif77 mpipgf77 mpipgifort do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CXX+:} false; then : +if ${ac_cv_prog_FC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$CXX"; then - ac_cv_prog_CXX="$CXX" # Let the user override the test. + if test -n "$FC"; then + ac_cv_prog_FC="$FC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4455,7 +4786,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CXX="$ac_prog" + ac_cv_prog_FC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4465,45 +4796,61 @@ IFS=$as_save_IFS fi fi -CXX=$ac_cv_prog_CXX -if test -n "$CXX"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 -$as_echo "$CXX" >&6; } +FC=$ac_cv_prog_FC +if test -n "$FC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 +$as_echo "$FC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$CXX" && break + test -n "$FC" && break done fi fi - if test "x$CXX" = "x" + if test "x$FC" = "x" then - hypre_using_cxx=no + hypre_using_fortran=no fi fi -if test "$hypre_using_fortran" = "yes" -a "$hypre_user_chose_fcompilers" = "no" +if test "x$hypre_using_cuda" = "xyes" && test "x$hypre_using_device_openmp" = "xyes" then - if test "$hypre_using_mpi" = "no" + as_fn_error $? "--with-cuda and --with-device-openmp are mutually exclusive" "$LINENO" 5 +fi + +if test "x$hypre_using_cuda" = "xyes" && test "x$hypre_using_hip" = "xyes" +then + as_fn_error $? "--with-cuda and --with-hip are mutually exclusive" "$LINENO" 5 +fi + +if test "x$hypre_using_hip" = "xyes" && test "x$hypre_using_device_openmp" = "xyes" +then + as_fn_error $? "--with-hip and --with-device-openmp are mutually exclusive" "$LINENO" 5 +fi + + +if test "$hypre_user_chose_cudacompilers" = "no" +then + if test "$hypre_using_device_openmp" = "yes" then - if test "$hypre_using_openmp" = "yes" + if test "$hypre_using_mpi" = "no" then - for ac_prog in xlf_r ifort gfortran g77 g95 pgf77 f77 + for ac_prog in xlc-gpu clang-gpu do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_FC+:} false; then : +if ${ac_cv_prog_CUCC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$FC"; then - ac_cv_prog_FC="$FC" # Let the user override the test. + if test -n "$CUCC"; then + ac_cv_prog_CUCC="$CUCC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4512,7 +4859,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_FC="$ac_prog" + ac_cv_prog_CUCC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4522,31 +4869,31 @@ IFS=$as_save_IFS fi fi -FC=$ac_cv_prog_FC -if test -n "$FC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 -$as_echo "$FC" >&6; } +CUCC=$ac_cv_prog_CUCC +if test -n "$CUCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 +$as_echo "$CUCC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$FC" && break + test -n "$CUCC" && break done else - for ac_prog in xlf ifort gfortran g77 g95 pgf77 f77 + for ac_prog in mpixlc-gpu mpiclang-gpu do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_FC+:} false; then : +if ${ac_cv_prog_CUCC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$FC"; then - ac_cv_prog_FC="$FC" # Let the user override the test. + if test -n "$CUCC"; then + ac_cv_prog_CUCC="$CUCC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4555,7 +4902,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_FC="$ac_prog" + ac_cv_prog_CUCC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4565,43 +4912,44 @@ IFS=$as_save_IFS fi fi -FC=$ac_cv_prog_FC -if test -n "$FC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 -$as_echo "$FC" >&6; } +CUCC=$ac_cv_prog_CUCC +if test -n "$CUCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 +$as_echo "$CUCC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$FC" && break + test -n "$CUCC" && break done fi - else - if test "$hypre_using_openmp" = "yes" - then - for ac_prog in mpxlf mpixlf77_r mpiifort mpif77 mpipgf77 + fi + + if test "$hypre_using_cuda" = "yes" + then + for ac_prog in nvcc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_FC+:} false; then : +if ${ac_cv_prog_CUCC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$FC"; then - ac_cv_prog_FC="$FC" # Let the user override the test. + if test -n "$CUCC"; then + ac_cv_prog_CUCC="$CUCC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH +for as_dir in "${CUDA_HOME}/bin" do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_FC="$ac_prog" + ac_cv_prog_CUCC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4611,31 +4959,36 @@ IFS=$as_save_IFS fi fi -FC=$ac_cv_prog_FC -if test -n "$FC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 -$as_echo "$FC" >&6; } +CUCC=$ac_cv_prog_CUCC +if test -n "$CUCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 +$as_echo "$CUCC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$FC" && break + test -n "$CUCC" && break done +test -n "$CUCC" || CUCC="""" - else - for ac_prog in mpxlf mpixlf77 mpiifort mpif77 mpipgf77 + CUCC="\${HYPRE_CUDA_PATH}/bin/${CUCC} -ccbin=\${CXX}" + fi + + if test "$hypre_using_hip" = "yes" + then + for ac_prog in hipcc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_FC+:} false; then : +if ${ac_cv_prog_CUCC+:} false; then : $as_echo_n "(cached) " >&6 else - if test -n "$FC"; then - ac_cv_prog_FC="$FC" # Let the user override the test. + if test -n "$CUCC"; then + ac_cv_prog_CUCC="$CUCC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH @@ -4644,7 +4997,7 @@ do test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_FC="$ac_prog" + ac_cv_prog_CUCC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi @@ -4654,25 +5007,19 @@ IFS=$as_save_IFS fi fi -FC=$ac_cv_prog_FC -if test -n "$FC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $FC" >&5 -$as_echo "$FC" >&6; } +CUCC=$ac_cv_prog_CUCC +if test -n "$CUCC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 +$as_echo "$CUCC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi - test -n "$FC" && break + test -n "$CUCC" && break done - fi - fi - - if test "x$FC" = "x" - then - hypre_using_fortran=no fi fi @@ -7224,20 +7571,23 @@ fi $as_echo "$hypre_cv_func_MPI_Comm_f2c_macro" >&6; } if test $ac_cv_func_MPI_Comm_f2c = yes \ || test $hypre_cv_func_MPI_Comm_f2c_macro = yes; then - $as_echo "#define HYPRE_HAVE_MPI_COMM_F2C 1" >>confdefs.h + +$as_echo "#define HYPRE_HAVE_MPI_COMM_F2C 1" >>confdefs.h fi fi -if test "$hypre_using_global_partition" = "no" +if test "$hypre_using_node_aware_mpi" = "yes" then - $as_echo "#define HYPRE_NO_GLOBAL_PARTITION 1" >>confdefs.h + +$as_echo "#define HYPRE_USING_NODE_AWARE_MPI 1" >>confdefs.h fi -if test "$hypre_using_node_aware_mpi" = "yes" +if test "$hypre_using_memory_tracker" = "yes" then - $as_echo "#define HYPRE_USING_NODE_AWARE_MPI 1" >>confdefs.h + +$as_echo "#define HYPRE_USING_MEMORY_TRACKER 1" >>confdefs.h fi @@ -7739,7 +8089,7 @@ then if test "x${hypre_user_chose_cflags}" = "xno" then - case "${CC}" in + case `basename "${CC}"` in gcc|mpigcc|mpicc) CFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -7754,7 +8104,7 @@ then LDFLAGS="$LDFLAGS -qopenmp" fi ;; - pgcc|mpipgcc) + pgcc|mpipgcc|mpipgicc) CFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then CFLAGS="$CFLAGS -mp" @@ -7779,7 +8129,7 @@ fi if test "x${hypre_user_chose_cxxflags}" = "xno" then - case "${CXX}" in + case `basename "${CXX}"` in g++|gCC|mpig++|mpicxx|mpic++|mpiCC) CXXFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -7792,7 +8142,7 @@ then CXXFLAGS="$CXXFLAGS -qopenmp" fi ;; - pgCC|mpipgCC) + pgCC|mpipgCC|pgc++|mpipgic++) CXXFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then CXXFLAGS="$CXXFLAGS -mp" @@ -7815,7 +8165,7 @@ fi if test "x${hypre_user_chose_fflags}" = "xno" then - case "${FC}" in + case `basename "${FC}"` in g77|gfortran|mpigfortran|mpif77) FFLAGS="-g -Wall" if test "$hypre_using_openmp" = "yes" ; then @@ -7828,7 +8178,7 @@ then FFLAGS="$FFLAGS -qopenmp" fi ;; - pgf77|mpipgf77) + pgf77|mpipgf77|pgfortran|mpipgifort) FFLAGS="-g" if test "$hypre_using_openmp" = "yes" ; then FFLAGS="$FFLAGS -mp" @@ -7848,11 +8198,14 @@ then ;; esac fi + +$as_echo "#define HYPRE_DEBUG 1" >>confdefs.h + else if test "x${hypre_user_chose_cflags}" = "xno" then - case "${CC}" in + case `basename "${CC}"` in gcc|mpigcc|mpicc) CFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -7867,7 +8220,7 @@ then LDFLAGS="$LDFLAGS -qopenmp" fi ;; - pgcc|mpipgcc) + pgcc|mpipgcc|mpipgicc) CFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then CFLAGS="$CFLAGS -mp" @@ -7892,7 +8245,7 @@ fi if test "x${hypre_user_chose_cxxflags}" = "xno" then - case "${CXX}" in + case `basename "${CXX}"` in g++|gCC|mpig++|mpicxx|mpic++|mpiCC) CXXFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -7905,7 +8258,7 @@ then CXXFLAGS="$CXXFLAGS -qopenmp" fi ;; - pgCC|mpipgCC) + pgCC|mpipgCC|pgc++|mpipgic++) CXXFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then CXXFLAGS="$CXXFLAGS -mp" @@ -7928,7 +8281,7 @@ fi if test "x${hypre_user_chose_fflags}" = "xno" then - case "${FC}" in + case `basename "${FC}"` in g77|gfortran|mpigfortran|mpif77) FFLAGS="-O2" if test "$hypre_using_openmp" = "yes" ; then @@ -7941,7 +8294,7 @@ then FFLAGS="$FFLAGS -qopenmp" fi ;; - pgf77|mpipgf77) + pgf77|mpipgf77|pgfortran|mpipgifort) FFLAGS="-fast" if test "$hypre_using_openmp" = "yes" ; then FFLAGS="$FFLAGS -mp" @@ -7974,9 +8327,9 @@ case $hypre_platform in FFLAGS="${FFLAGS} -blpdata" ;; esac -LINK_FC="${FC}" -LINK_CC="${CC}" -LINK_CXX="${CXX}" +LINK_FC='${FC}' +LINK_CC='${CC}' +LINK_CXX='${CXX}' HYPRE_LIBSUFFIX=".a" @@ -8103,6 +8456,50 @@ fi fi +if test "$hypre_using_shared" = "yes" +then + HYPRE_LIBSUFFIX=".so" + if test "$hypre_using_cuda" = "yes" + then + SHARED_SET_SONAME="-Xlinker=-soname," + SHARED_OPTIONS="-Xlinker=-z,defs" + else + SHARED_SET_SONAME="-Wl,-soname," + SHARED_OPTIONS="-Wl,-z,defs" + fi + SHARED_COMPILE_FLAG="-fPIC" + case $hypre_platform in + AIX* | aix* | Aix*) SHARED_COMPILE_FLAG="-qmkshrobj" + SHARED_BUILD_FLAG="-G" + LINK_FC='${FC} -brtl' + LINK_CC='${CC} -brtl' + LINK_CXX='${CXX} -brtl' ;; + DARWIN* | darwin* | Darwin*) SHARED_BUILD_FLAG="-dynamiclib -undefined dynamic_lookup" + HYPRE_LIBSUFFIX=".dylib" + SHARED_SET_SONAME="-install_name @rpath/" + SHARED_OPTIONS="-undefined error" ;; + *) SHARED_BUILD_FLAG="-shared" ;; + esac + SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${EXTRA_BUILDFLAGS}" + FFLAGS="${FFLAGS} ${SHARED_COMPILE_FLAG}" + CFLAGS="${CFLAGS} ${SHARED_COMPILE_FLAG}" + CXXFLAGS="${CXXFLAGS} ${SHARED_COMPILE_FLAG}" + + BUILD_FC_SHARED="\${FC} ${SHARED_BUILD_FLAG}" + if test "$hypre_using_fei" = "yes" + then + BUILD_CC_SHARED="\${CXX} ${SHARED_BUILD_FLAG}" + else + BUILD_CC_SHARED="\${CC} ${SHARED_BUILD_FLAG}" + fi + BUILD_CXX_SHARED="\${CXX} ${SHARED_BUILD_FLAG}" + if test "$hypre_using_cuda" = "yes" + then + BUILD_CC_SHARED="\${CUCC} ${SHARED_BUILD_FLAG}" + fi + fi + + if test "$hypre_using_caliper" = "yes" then if test "$hypre_user_gave_caliper_inc" != "yes" @@ -8137,8 +8534,14 @@ $as_echo "$as_me: WARNING: ***************************************************** fi fi - -if test "$hypre_using_cuda" = "yes" || test "$hypre_using_device_openmp" = "yes" || test "$hypre_using_um" = "yes" +if test "x$hypre_using_um" = "xyes" +then + if test "x$hypre_using_cuda" != "xyes" && test "x$hypre_using_device_openmp" != "xyes" && test "x$hypre_using_hip" != "xyes" + then + as_fn_error $? "Asked for unified memory, but not using CUDA, HIP, or device OpenMP!" "$LINENO" 5 + fi +fi +if test "$hypre_using_cuda" = "yes" || test "$hypre_using_device_openmp" = "yes" then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 $as_echo_n "checking for ANSI C header files... " >&6; } @@ -8314,9 +8717,36 @@ done fi fi -if test "x$hypre_user_chose_raja" = "xyes" + +if test x"$hypre_using_hip" == x"yes" ; then : + if test -n "$ROCM_PATH"; then : + HYPRE_ROCM_PREFIX=$ROCM_PATH +else + HYPRE_ROCM_PREFIX=/opt/rocm +fi + + + for ac_header in "${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h" +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + hypre_found_hip=yes +else + as_fn_error $? "unable to find ${HYPRE_ROCM_PREFIX}/include/hip/hip_common.h ... Ensure ROCm is installed and set ROCM_PATH environment variable to ROCm installation path." "$LINENO" 5 +fi + +done + +fi + + + +if test "x$hypre_using_raja" = "xyes" then - RAJA_LIBS=" $HYPRE_RAJA_LIB_DIR $HYPRE_RAJA_LIB " $as_echo "#define HYPRE_USING_RAJA 1" >>confdefs.h @@ -8334,9 +8764,8 @@ $as_echo "#define HYPRE_USING_RAJA 1" >>confdefs.h CFLAGS=${CXXFLAGS} fi -if test "x$hypre_user_chose_kokkos" = "xyes" +if test "x$hypre_using_kokkos" = "xyes" then - KOKKOS_LIBS="$HYPRE_KOKKOS_LIB_DIR $HYPRE_KOKKOS_LIB" $as_echo "#define HYPRE_USING_KOKKOS 1" >>confdefs.h @@ -8354,9 +8783,52 @@ $as_echo "#define HYPRE_USING_KOKKOS 1" >>confdefs.h CFLAGS=${CXXFLAGS} fi -if test "$hypre_user_chose_cuda" = "yes" +if test "x$hypre_using_umpire_host" = "xyes" +then + hypre_using_umpire=yes + +$as_echo "#define HYPRE_USING_UMPIRE_HOST 1" >>confdefs.h + +fi + +if test "x$hypre_using_umpire_device" = "xyes" +then + hypre_using_umpire=yes + +$as_echo "#define HYPRE_USING_UMPIRE_DEVICE 1" >>confdefs.h + +fi + +if test "x$hypre_using_umpire_um" = "xyes" +then + hypre_using_umpire=yes + +$as_echo "#define HYPRE_USING_UMPIRE_UM 1" >>confdefs.h + +fi + +if test "x$hypre_using_umpire_pinned" = "xyes" +then + hypre_using_umpire=yes + +$as_echo "#define HYPRE_USING_UMPIRE_PINNED 1" >>confdefs.h + +fi + +if test "x$hypre_using_umpire" = "xyes" +then + +$as_echo "#define HYPRE_USING_UMPIRE 1" >>confdefs.h + +fi + +if test "$hypre_using_cuda" = "yes" then +$as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h + + + $as_echo "#define HYPRE_USING_CUDA 1" >>confdefs.h @@ -8364,7 +8836,7 @@ $as_echo "#define HYPRE_USING_CUDA 1" >>confdefs.h $as_echo "#define HYPRE_USING_CUSPARSE 1" >>confdefs.h - if test "$hypre_using_nvtx" = "yes" + if test "$hypre_using_gpu_profiling" = "yes" then $as_echo "#define HYPRE_USING_NVTX 1" >>confdefs.h @@ -8378,6 +8850,13 @@ $as_echo "#define HYPRE_USING_CUSPARSE 1" >>confdefs.h fi + if test "$hypre_using_device_pool" = "yes" + then + +$as_echo "#define HYPRE_USING_DEVICE_POOL 1" >>confdefs.h + + fi + if test "$hypre_using_cublas" = "yes" then @@ -8392,72 +8871,44 @@ $as_echo "#define HYPRE_USING_CURAND 1" >>confdefs.h fi - for ac_prog in nvcc -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CUCC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CUCC"; then - ac_cv_prog_CUCC="$CUCC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CUCC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CUCC=$ac_cv_prog_CUCC -if test -n "$CUCC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 -$as_echo "$CUCC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$CUCC" && break -done - - NVCCBIN=${CXX} - CC=${CUCC} - CXX=${CUCC} - LINK_CC=${CXX} - LINK_CXX=${CXX} + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' if test "x$HYPRE_CUDA_SM" = "x" then - HYPRE_CUDA_SM=60 + HYPRE_CUDA_SM=70 fi - HYPRE_CUDA_GENCODE="-gencode arch=compute_${HYPRE_CUDA_SM},\"code=sm_${HYPRE_CUDA_SM}\"" + HYPRE_CUDA_GENCODE="" + for sm in ${HYPRE_CUDA_SM}; do + HYPRE_CUDA_GENCODE="${HYPRE_CUDA_GENCODE}-gencode arch=compute_${sm},code=sm_${sm} " + done - CUFLAGS+="-O2 -ccbin=$NVCCBIN ${HYPRE_CUDA_GENCODE} -expt-extended-lambda -dc -std=c++11 -Xcompiler -Wno-deprecated-register --x cu" - if test "$hypre_using_debug" = "yes" + if test "$hypre_user_chose_cuflags" = "no" then - CUFLAGS="-g ${CUFLAGS}" + CUFLAGS="-lineinfo -expt-extended-lambda -dc -std=c++11 --x cu" + if test "$hypre_using_debug" = "yes" + then + CUFLAGS="-g -O0 ${CUFLAGS}" + else + CUFLAGS="-O2 ${CUFLAGS}" + fi fi - CXXFLAGS="${CUFLAGS} -Xcompiler \"${CXXFLAGS}\"" - CFLAGS=${CXXFLAGS} - LDFLAGS="-ccbin=$NVCCBIN ${HYPRE_CUDA_GENCODE} -Xcompiler \"${LDFLAGS}\"" - HYPRE_CUDA_INCL="-I${HYPRE_CUDA_PATH}/include" - HYPRE_CUDA_LIBS="-L${HYPRE_CUDA_PATH}/lib64 -lcudart" - if test "$hypre_using_nvtx" = "yes" + + if test "$hypre_user_chose_cxxflags" = "no" + then + if test "${CXX}" = "mpixlC" || test "${CXX}" = "xlC_r" || test "${CXX}" = "xlC" + then + CXXFLAGS+=" -Wno-deprecated-register -Wenum-compare" + fi + fi + + CUFLAGS="${CUFLAGS} -Xcompiler \"${CXXFLAGS}\"" + + LDFLAGS="-Xcompiler \"${LDFLAGS}\"" + HYPRE_CUDA_INCLUDE='-I${HYPRE_CUDA_PATH}/include' + HYPRE_CUDA_LIBS='-L${HYPRE_CUDA_PATH}/lib64 -lcudart' + if test "$hypre_using_gpu_profiling" = "yes" then HYPRE_CUDA_LIBS+=" -lnvToolsExt" fi @@ -8478,33 +8929,128 @@ done fi fi +if test x"$hypre_using_hip" == x"yes"; then : + + +$as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h + + +$as_echo "#define HYPRE_USING_HIP 1" >>confdefs.h + + + + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' + + if test "x${HYPRE_CUDA_SM}" != "x" + then + HYPRE_CUDA_GENCODE="--amdgpu-target=" + for sm in ${HYPRE_CUDA_SM}; do + HYPRE_CUDA_GENCODE="${HYPRE_CUDA_GENCODE}${sm}," + done + HYPRE_CUDA_GENCODE="`echo ${HYPRE_CUDA_GENCODE}|sed 's/,$//'`" + fi + + HIPCXXFLAGS="-x hip -std=c++14 ${HIPCXXFLAGS}" + + if test x"$hypre_using_debug" == x"yes"; then : + HIPCXXFLAGS="-O0 -Wall -g -ggdb ${HIPCXXFLAGS}" +elif HIPCXXFLAGS="-O2 ${HIPCXXFLAGS}"; then : + +fi + + if test "$hypre_user_chose_cuflags" = "no" + then + CUFLAGS="${HIPCPPFLAGS} ${HIPCXXFLAGS}" + fi + + CUFLAGS="${CUFLAGS} ${CXXFLAGS}" + + HYPRE_HIP_INCL="-I${HYPRE_ROCM_PREFIX}/rocthrust/include" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocprim/include" + + HYPRE_HIP_LIBS="-L${HYPRE_ROCM_PREFIX}/lib -lamdhip64" + + if test x"$hypre_using_rocsparse" == x"yes"; then : + +$as_echo "#define HYPRE_USING_ROCSPARSE 1" >>confdefs.h + + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocsparse" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocsparse/include" + +fi + + if test x"$hypre_using_rocblas" == x"yes"; then : + +$as_echo "#define HYPRE_USING_ROCBLAS 1" >>confdefs.h + + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocblas/include" + +fi + + if test x"$hypre_using_rocrand" == x"yes"; then : + +$as_echo "#define HYPRE_USING_ROCRAND 1" >>confdefs.h + + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lrocrand" + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/rocrand/include" + +fi + + if test x"$hypre_using_gpu_profiling" == x"yes"; then : + +$as_echo "#define HYPRE_USING_ROCTX 1" >>confdefs.h + + HYPRE_HIP_INCL="${HYPRE_HIP_INCL} -I${HYPRE_ROCM_PREFIX}/roctracer/include" + HYPRE_HIP_LIBS="${HYPRE_HIP_LIBS} -lroctx64" + +fi + + +fi + + + if test "$hypre_using_um" != "yes" then - if test "$hypre_user_chose_cuda" = "yes" + if test "$hypre_using_cuda" = "yes" then - { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************" >&5 -$as_echo "$as_me: ***********************************************************" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Configuring with --with-cuda=yes without unified memory." >&5 $as_echo "$as_me: Configuring with --with-cuda=yes without unified memory." >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: It only works for struct interface." >&5 -$as_echo "$as_me: It only works for struct interface." >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: It only works for structured solvers and selected unstructured solvers" >&5 +$as_echo "$as_me: It only works for structured solvers and selected unstructured solvers" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: Use --enable-unified-memory to compile with unified memory." >&5 +$as_echo "$as_me: Use --enable-unified-memory to compile with unified memory." >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} + fi + if test "$hypre_using_hip" = "yes" + then + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: Configuring with --with-hip=yes without unified memory." >&5 +$as_echo "$as_me: Configuring with --with-hip=yes without unified memory." >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: It only works for structured solvers and selected unstructured solvers" >&5 +$as_echo "$as_me: It only works for structured solvers and selected unstructured solvers" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Use --enable-unified-memory to compile with unified memory." >&5 $as_echo "$as_me: Use --enable-unified-memory to compile with unified memory." >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************" >&5 -$as_echo "$as_me: ***********************************************************" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} fi if test "$hypre_using_device_openmp" = "yes" then - { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************" >&5 -$as_echo "$as_me: ***********************************************************" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Configuring with --with-device-openmp=yes without unified memory." >&5 $as_echo "$as_me: Configuring with --with-device-openmp=yes without unified memory." >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: It only works for struct interface." >&5 -$as_echo "$as_me: It only works for struct interface." >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: It only works for structured solvers and selected unstructured solvers" >&5 +$as_echo "$as_me: It only works for structured solvers and selected unstructured solvers" >&6;} { $as_echo "$as_me:${as_lineno-$LINENO}: Use --enable-unified-memory to compile with unified memory." >&5 $as_echo "$as_me: Use --enable-unified-memory to compile with unified memory." >&6;} - { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************" >&5 -$as_echo "$as_me: ***********************************************************" >&6;} + { $as_echo "$as_me:${as_lineno-$LINENO}: ***********************************************************************" >&5 +$as_echo "$as_me: ***********************************************************************" >&6;} fi fi @@ -8521,74 +9067,40 @@ then $as_echo "#define HYPRE_USING_CUSPARSE 1" >>confdefs.h - if test "$hypre_using_nvtx" = "yes" + if test "$hypre_using_gpu_profiling" = "yes" then $as_echo "#define HYPRE_USING_NVTX 1" >>confdefs.h fi -$as_echo "#define HYPRE_USING_DEVICE_OPENMP 1" >>confdefs.h +$as_echo "#define HYPRE_USING_DEVICE_OPENMP 1" >>confdefs.h -$as_echo "#define HYPRE_DEVICE_OPENMP_ALLOC 1" >>confdefs.h +$as_echo "#define HYPRE_USING_GPU 1" >>confdefs.h - for ac_prog in nvcc -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CUCC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CUCC"; then - ac_cv_prog_CUCC="$CUCC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CUCC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS -fi -fi -CUCC=$ac_cv_prog_CUCC -if test -n "$CUCC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CUCC" >&5 -$as_echo "$CUCC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi +$as_echo "#define HYPRE_DEVICE_OPENMP_ALLOC 1" >>confdefs.h - test -n "$CUCC" && break -done - CFLAGS="${CFLAGS}" - CXXFLAGS="${CXXFLAGS}" + CUFLAGS=${CFLAGS} - if test "$CC" = "clang-gpu" || test "$CC" = "mpiclang-gpu" - then - CFLAGS+=" -fopenmp-nonaliased-maps" - fi - if test "$CXX" = "clang++-gpu" || test "$CXX" = "mpiclang++-gpu" + if test "$hypre_user_chose_cuflags" = "no" then - CXXFLAGS+=" -fopenmp-nonaliased-maps" + if test "$CUCC" = "clang-gpu" || test "$CUCC" = "mpiclang-gpu" + then + CUFLAGS+=" -fopenmp-nonaliased-maps" + fi + if test "$CUCC" = "clang++-gpu" || test "$CUCC" = "mpiclang++-gpu" + then + CUFLAGS+=" -fopenmp-nonaliased-maps" + fi fi + if test "$hypre_using_debug" = "yes" then @@ -8596,67 +9108,25 @@ $as_echo "#define HYPRE_DEVICE_OPENMP_CHECK 1" >>confdefs.h fi - HYPRE_CUDA_INCL="-I${HYPRE_CUDA_PATH}/include" - HYPRE_CUDA_LIBS="-L${HYPRE_CUDA_PATH}/lib64 -lcusparse -lcudart -lcurand" - if test "$hypre_using_nvtx" = "yes" + HYPRE_CUDA_INCLUDE='-I${HYPRE_CUDA_PATH}/include' + HYPRE_CUDA_LIBS='-L${HYPRE_CUDA_PATH}/lib64 -lcudart -lcusparse -lcurand' + if test "$hypre_using_gpu_profiling" = "yes" then HYPRE_CUDA_LIBS+=" -lnvToolsExt" fi - CC=${CXX} - LINK_CC=${LINK_CXX} - CXXFLAGS="-x c++ ${CXXFLAGS}" - CFLAGS=${CXXFLAGS} -fi + LINK_CC='${CUCC}' + LINK_CXX='${CUCC}' + fi -if test "$hypre_using_shared" = "yes" +if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" then - HYPRE_LIBSUFFIX=".so" - if test "x$CXX" = "xnvcc" || test "x$CC" = "xnvcc" - then - SHARED_SET_SONAME="-Xlinker=-soname," - SHARED_OPTIONS="-Xlinker=-z,defs" - SHARED_COMPILE_FLAG="-Xcompiler \"-fPIC\"" - else - SHARED_SET_SONAME="-Wl,-soname," - SHARED_OPTIONS="-Wl,-z,defs" - SHARED_COMPILE_FLAG="-fPIC" - fi - case $hypre_platform in - AIX* | aix* | Aix*) SHARED_COMPILE_FLAG="-qmkshrobj" - SHARED_BUILD_FLAG="-G" - LINK_FC="${FC} -brtl" - LINK_CC="${CC} -brtl" - LINK_CXX="${CXX} -brtl" ;; - DARWIN* | darwin* | Darwin*) SHARED_BUILD_FLAG="-dynamiclib -undefined dynamic_lookup" - HYPRE_LIBSUFFIX=".dylib" - SHARED_SET_SONAME="-install_name @rpath/" - SHARED_OPTIONS="-undefined error" ;; - *) SHARED_BUILD_FLAG="-shared" ;; - esac - if test "x$CXX" = "xnvcc" || test "x$CC" = "xnvcc" - then - SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${HYPRE_CUDA_GENCODE}" - fi - SHARED_BUILD_FLAG="${SHARED_BUILD_FLAG} ${EXTRA_BUILDFLAGS}" - FFLAGS="${FFLAGS} ${SHARED_COMPILE_FLAG}" - CFLAGS="${CFLAGS} ${SHARED_COMPILE_FLAG}" - CXXFLAGS="${CXXFLAGS} ${SHARED_COMPILE_FLAG}" - BUILD_FC_SHARED="${FC} ${SHARED_BUILD_FLAG}" - if test "$hypre_using_fei" = "yes" + if test "x$hypre_using_cuda_streams" = "xyes"] then - BUILD_CC_SHARED="${CXX} ${SHARED_BUILD_FLAG}" - else - BUILD_CC_SHARED="${CC} ${SHARED_BUILD_FLAG}" - fi - BUILD_CXX_SHARED="${CXX} ${SHARED_BUILD_FLAG}" -fi - -if test "x$hypre_using_cuda_streams" = "xyes" -then $as_echo "#define HYPRE_USING_CUDA_STREAMS 1" >>confdefs.h + fi fi if test "x$hypre_using_um" = "xyes" @@ -8665,7 +9135,7 @@ then $as_echo "#define HYPRE_USING_UNIFIED_MEMORY 1" >>confdefs.h else - if test "x$hypre_user_chose_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" + if test "x$hypre_using_cuda" = "xyes" || test "x$hypre_using_device_openmp" = "xyes" || test "x$hypre_using_hip" = "xyes" then $as_echo "#define HYPRE_USING_DEVICE_MEMORY 1" >>confdefs.h @@ -8753,23 +9223,28 @@ $as_echo "$HYPRE_ARCH" >&6; } fi case $HYPRE_ARCH in alpha) - $as_echo "#define HYPRE_ALPHA 1" >>confdefs.h + +$as_echo "#define HYPRE_ALPHA 1" >>confdefs.h ;; sun* | solaris*) - $as_echo "#define HYPRE_SOLARIS 1" >>confdefs.h + +$as_echo "#define HYPRE_SOLARIS 1" >>confdefs.h ;; hp* | HP*) - $as_echo "#define HYPRE_HPPA 1" >>confdefs.h + +$as_echo "#define HYPRE_HPPA 1" >>confdefs.h ;; rs6000 | RS6000 | *bgl* | *BGL* | ppc64*) - $as_echo "#define HYPRE_RS6000 1" >>confdefs.h + +$as_echo "#define HYPRE_RS6000 1" >>confdefs.h ;; IRIX64) - $as_echo "#define HYPRE_IRIX64 1" >>confdefs.h + +$as_echo "#define HYPRE_IRIX64 1" >>confdefs.h ;; Linux | linux | LINUX) @@ -8778,16 +9253,19 @@ $as_echo "$HYPRE_ARCH" >&6; } systemtype=`grep ^SYS_TYPE /etc/home.config | cut -d" " -f2` case $systemtype in chaos*) - $as_echo "#define HYPRE_LINUX_CHAOS 1" >>confdefs.h + +$as_echo "#define HYPRE_LINUX_CHAOS 1" >>confdefs.h ;; *) - $as_echo "#define HYPRE_LINUX 1" >>confdefs.h + +$as_echo "#define HYPRE_LINUX 1" >>confdefs.h ;; esac else - $as_echo "#define HYPRE_LINUX 1" >>confdefs.h + +$as_echo "#define HYPRE_LINUX 1" >>confdefs.h fi ;; @@ -8878,6 +9356,13 @@ $as_echo "$HYPRE_ARCH" >&6; } + + + + + + + @@ -9395,7 +9880,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by hypre $as_me 2.18.2, which was +This file was extended by hypre $as_me 2.21.0, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -9457,7 +9942,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -hypre config.status 2.18.2 +hypre config.status 2.21.0 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/src/distributed_ls/CMakeLists.txt b/src/distributed_ls/CMakeLists.txt index 0102467b2..511f366ce 100644 --- a/src/distributed_ls/CMakeLists.txt +++ b/src/distributed_ls/CMakeLists.txt @@ -4,13 +4,9 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) set(HDRS "") -set(SRCS "") add_subdirectory(Euclid) add_subdirectory(ParaSails) add_subdirectory(pilut) set(HYPRE_HEADERS ${HYPRE_HEADERS} ${HDRS} PARENT_SCOPE) -set(HYPRE_SOURCES ${HYPRE_SOURCES} ${SRCS} PARENT_SCOPE) - - diff --git a/src/distributed_ls/Euclid/CMakeLists.txt b/src/distributed_ls/Euclid/CMakeLists.txt index 82d16ae0e..2ae900c4b 100644 --- a/src/distributed_ls/Euclid/CMakeLists.txt +++ b/src/distributed_ls/Euclid/CMakeLists.txt @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -set(EUCLID_SRCS +set(SRCS blas_dh.c Euclid_apply.c Euclid_dh.c @@ -33,9 +33,7 @@ set(EUCLID_SRCS Timer_dh.c Vec_dh.c ) - -convert_filenames_to_full_paths(EUCLID_SRCS) - -set(SRCS ${SRCS} ${EUCLID_SRCS} PARENT_SCOPE) - +target_sources(${PROJECT_NAME} + PRIVATE ${SRCS} +) diff --git a/src/distributed_ls/Euclid/Euclid_dh.c b/src/distributed_ls/Euclid/Euclid_dh.c index 65498d097..05d04cd13 100644 --- a/src/distributed_ls/Euclid/Euclid_dh.c +++ b/src/distributed_ls/Euclid/Euclid_dh.c @@ -21,8 +21,8 @@ static void get_runtime_params_private(Euclid_dh ctx); static void invert_diagonals_private(Euclid_dh ctx); -static void compute_rho_private(Euclid_dh ctx); -static void factor_private(Euclid_dh ctx); +static void compute_rho_private(Euclid_dh ctx); +static void factor_private(Euclid_dh ctx); /* static void discard_indices_private(Euclid_dh ctx); */ static void reduce_timings_private(Euclid_dh ctx); @@ -70,7 +70,7 @@ void Euclid_dhCreate(Euclid_dh *ctxOUT) strcpy(ctx->krylovMethod, "bicgstab"); ctx->maxIts = 200; ctx->rtol = 1e-5; - ctx->atol = _ATOL_; + ctx->atol = HYPRE_REAL_MIN; ctx->its = 0; ctx->itsTotal = 0; ctx->setupCount = 0; @@ -112,7 +112,7 @@ void Euclid_dhDestroy(Euclid_dh ctx) if (ctx->work2 != NULL) { FREE_DH(ctx->work2); CHECK_V_ERROR; } if (ctx->slist != NULL) { SortedList_dhDestroy(ctx->slist); CHECK_V_ERROR; } if (ctx->extRows != NULL) { ExternalRows_dhDestroy(ctx->extRows); CHECK_V_ERROR; } - FREE_DH(ctx); CHECK_V_ERROR; + FREE_DH(ctx); CHECK_V_ERROR; --ref_counter; END_FUNC_DH @@ -166,7 +166,7 @@ void Euclid_dhSetup(Euclid_dh ctx) } EuclidGetDimensions(ctx->A, &beg_row, &m, &n); CHECK_V_ERROR; - + ctx->m = m; ctx->n = n; @@ -218,7 +218,7 @@ void Euclid_dhSetup(Euclid_dh ctx) } if (! strcmp(ctx->algo_par, "bj")) bj = false; - /*--------------------------------------------------------- + /*--------------------------------------------------------- * allocate and initialize storage for row-scaling * (ctx->isScaled is set in get_runtime_params_private(); ) *---------------------------------------------------------*/ @@ -227,13 +227,13 @@ void Euclid_dhSetup(Euclid_dh ctx) } { HYPRE_Int i; for (i=0; iscale[i] = 1.0; } - /*------------------------------------------------------------------ + /*------------------------------------------------------------------ * allocate work vectors; used in factorization and triangular solves; *------------------------------------------------------------------*/ - if ( ctx->work == NULL) { + if ( ctx->work == NULL) { ctx->work = (REAL_DH*)MALLOC_DH(m*sizeof(REAL_DH)); CHECK_V_ERROR; } - if ( ctx->work2 == NULL) { + if ( ctx->work2 == NULL) { ctx->work2 = (REAL_DH*)MALLOC_DH(m*sizeof(REAL_DH)); CHECK_V_ERROR; } @@ -241,40 +241,40 @@ void Euclid_dhSetup(Euclid_dh ctx) * perform the incomplete factorization (this should be, at least * for higher level ILUK, the most time-intensive portion of setup) *-----------------------------------------------------------------*/ - t1 = hypre_MPI_Wtime(); + t1 = hypre_MPI_Wtime(); factor_private(ctx); CHECK_V_ERROR; ctx->timing[FACTOR_T] += (hypre_MPI_Wtime() - t1); - /*-------------------------------------------------------------- + /*-------------------------------------------------------------- * invert diagonals, for faster triangular solves *--------------------------------------------------------------*/ if (strcmp(ctx->algo_par, "none")) { - invert_diagonals_private(ctx); CHECK_V_ERROR; + invert_diagonals_private(ctx); CHECK_V_ERROR; } - /*-------------------------------------------------------------- + /*-------------------------------------------------------------- * compute rho_final: global ratio of nzF/nzA - * also, if -sparseA > 0, compute ratio of nzA + * also, if -sparseA > 0, compute ratio of nzA * used in factorization *--------------------------------------------------------------*/ /* for some reason compute_rho_private() was expensive, so now it's an option, unless there's only one mpi task. */ if (Parser_dhHasSwitch(parser_dh, "-computeRho") || np_dh == 1) { - if (strcmp(ctx->algo_par, "none")) { + if (strcmp(ctx->algo_par, "none")) { t1 = hypre_MPI_Wtime(); - compute_rho_private(ctx); CHECK_V_ERROR; + compute_rho_private(ctx); CHECK_V_ERROR; ctx->timing[COMPUTE_RHO_T] += (hypre_MPI_Wtime() - t1); } } - /*-------------------------------------------------------------- + /*-------------------------------------------------------------- * if using PILU, set up persistent comms and global-to-local * number scheme, for efficient triangular solves. * (Thanks to Edmond Chow for these algorithmic ideas.) *--------------------------------------------------------------*/ - if (! strcmp(ctx->algo_par, "pilu") && np_dh > 1) { + if (! strcmp(ctx->algo_par, "pilu") && np_dh > 1) { t1 = hypre_MPI_Wtime(); Factor_dhSolveSetup(ctx->F, ctx->sg); CHECK_V_ERROR; ctx->timing[SOLVE_SETUP_T] += (hypre_MPI_Wtime() - t1); @@ -286,7 +286,7 @@ END_OF_FUNCTION: ; * internal timing *-------------------------------------------------------*/ ctx->timing[SETUP_T] += (hypre_MPI_Wtime() - ctx->timing[SOLVE_START_T]); - ctx->setupCount += 1; + ctx->setupCount += 1; ctx->isSetup = true; @@ -302,7 +302,7 @@ void get_runtime_params_private(Euclid_dh ctx) char *tmp; /* params for use of internal solvers */ - Parser_dhReadInt(parser_dh, "-maxIts",&(ctx->maxIts)); + Parser_dhReadInt(parser_dh, "-maxIts",&(ctx->maxIts)); Parser_dhReadDouble(parser_dh, "-rtol", &(ctx->rtol)); Parser_dhReadDouble(parser_dh, "-atol", &(ctx->atol)); @@ -318,7 +318,7 @@ void get_runtime_params_private(Euclid_dh ctx) /* factorization parameters */ - Parser_dhReadDouble(parser_dh, "-rho", &(ctx->rho_init)); + Parser_dhReadDouble(parser_dh, "-rho", &(ctx->rho_init)); /* inital storage allocation for factor */ Parser_dhReadInt(parser_dh, "-level", &ctx->level); Parser_dhReadInt(parser_dh, "-pc_ilu_levels", &ctx->level); @@ -332,21 +332,21 @@ void get_runtime_params_private(Euclid_dh ctx) /* make sure both algo_par and algo_ilu are set to "none," if at least one is. */ - if (! strcmp(ctx->algo_par, "none")) { + if (! strcmp(ctx->algo_par, "none")) { strcpy(ctx->algo_ilu, "none"); } - else if (! strcmp(ctx->algo_ilu, "none")) { + else if (! strcmp(ctx->algo_ilu, "none")) { strcpy(ctx->algo_par, "none"); } - Parser_dhReadDouble(parser_dh, "-sparseA",&(ctx->sparseTolA)); + Parser_dhReadDouble(parser_dh, "-sparseA",&(ctx->sparseTolA)); /* sparsify A before factoring */ - Parser_dhReadDouble(parser_dh, "-sparseF",&(ctx->sparseTolF)); + Parser_dhReadDouble(parser_dh, "-sparseF",&(ctx->sparseTolF)); /* sparsify after factoring */ - Parser_dhReadDouble(parser_dh, "-pivotMin", &(ctx->pivotMin)); + Parser_dhReadDouble(parser_dh, "-pivotMin", &(ctx->pivotMin)); /* adjust pivots if smaller than this */ - Parser_dhReadDouble(parser_dh, "-pivotFix", &(ctx->pivotFix)); + Parser_dhReadDouble(parser_dh, "-pivotFix", &(ctx->pivotFix)); /* how to adjust pivots */ /* set row scaling for mandatory cases */ @@ -380,7 +380,7 @@ void invert_diagonals_private(Euclid_dh ctx) } else { HYPRE_Int i, m = ctx->F->m; for (i=0; ialgo_par == bj. */ - } - } + } + } /* ILUT factorization */ else if (! strcmp(ctx->algo_ilu, "ilut")) { @@ -492,7 +492,7 @@ void factor_private(Euclid_dh ctx) /* all other factorization methods */ else { - hypre_sprintf(msgBuf_dh, "factorization method: %s is not implemented", + hypre_sprintf(msgBuf_dh, "factorization method: %s is not implemented", ctx->algo_ilu); SET_V_ERROR(msgBuf_dh); } @@ -527,13 +527,13 @@ void factor_private(Euclid_dh ctx) /* if (Parser_dhHasSwitch(parser_dh, "-test")) { hypre_printf("[%i] Euclid_dh :: TESTING ilu_seq\n", myid_dh); - iluk_seq(ctx); CHECK_V_ERROR; + iluk_seq(ctx); CHECK_V_ERROR; } else { - iluk_mpi_pilu(ctx); CHECK_V_ERROR; + iluk_mpi_pilu(ctx); CHECK_V_ERROR; } */ - iluk_seq(ctx); CHECK_V_ERROR; + iluk_seq(ctx); CHECK_V_ERROR; /* get external rows from lower ordered neighbors in the subdomain graph; these rows are needed for factoring @@ -565,11 +565,11 @@ if (Parser_dhHasSwitch(parser_dh, "-test")) { ctx->slist = NULL; ExternalRows_dhDestroy(ctx->extRows); CHECK_V_ERROR; ctx->extRows = NULL; - } + } /* all other factorization methods */ else { - hypre_sprintf(msgBuf_dh, "factorization method: %s is not implemented", + hypre_sprintf(msgBuf_dh, "factorization method: %s is not implemented", ctx->algo_ilu); SET_V_ERROR(msgBuf_dh); } @@ -615,7 +615,7 @@ void discard_indices_private(Euclid_dh ctx) } if (flag) { - cval[j] = -1; + cval[j] = -1; ++count; } } @@ -632,11 +632,11 @@ void discard_indices_private(Euclid_dh ctx) for (j=start_of_row; jtiming[COMPUTE_RHO_T]); - fprintf_dh(fp, " misc (should be small): %0.2f\n", - timing[SETUP_T] - + fprintf_dh(fp, " misc (should be small): %0.2f\n", + timing[SETUP_T] - (timing[SUB_GRAPH_T]+timing[FACTOR_T]+ timing[SOLVE_SETUP_T]+timing[COMPUTE_RHO_T])); @@ -822,12 +822,12 @@ void Euclid_dhPrintStatsShort(Euclid_dh ctx, HYPRE_Real setup, HYPRE_Real solve, /* special: for scalability studies */ fprintf_dh(fp, "\n%6s %6s %6s %6s %6s %6s WW\n", "method", "level", "subGph", "factor", "solveS", "perIt"); fprintf_dh(fp, "------ ----- ----- ----- ----- ----- WW\n"); - fprintf_dh(fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n", + fprintf_dh(fp, "%6s %6i %6.2f %6.2f %6.2f %6.4f WWW\n", ctx->algo_par, ctx->level, - timing[SUB_GRAPH_T], - timing[FACTOR_T], - timing[SOLVE_SETUP_T], + timing[SUB_GRAPH_T], + timing[FACTOR_T], + timing[SOLVE_SETUP_T], apply_per_it); #endif END_FUNC_DH @@ -940,8 +940,8 @@ void Euclid_dhPrintHypreReport(Euclid_dh ctx, FILE *fp) fprintf_dh(fp, " factorization: %0.2f\n", timing[FACTOR_T]); fprintf_dh(fp, " solve setup: %0.2f\n", timing[SOLVE_SETUP_T]); fprintf_dh(fp, " rho: %0.2f\n", ctx->timing[COMPUTE_RHO_T]); - fprintf_dh(fp, " misc (should be small): %0.2f\n", - timing[SETUP_T] - + fprintf_dh(fp, " misc (should be small): %0.2f\n", + timing[SETUP_T] - (timing[SUB_GRAPH_T]+timing[FACTOR_T]+ timing[SOLVE_SETUP_T]+timing[COMPUTE_RHO_T])); @@ -962,7 +962,7 @@ void Euclid_dhPrintHypreReport(Euclid_dh ctx, FILE *fp) void Euclid_dhPrintTestData(Euclid_dh ctx, FILE *fp) { START_FUNC_DH - /* Print data that should remain that will hopefully + /* Print data that should remain that will hopefully remain the same for any platform. Possibly "tri solves" may change . . . */ diff --git a/src/distributed_ls/Euclid/Hash_dh.c b/src/distributed_ls/Euclid/Hash_dh.c index 00e7c2a2d..d8c6c8ba3 100644 --- a/src/distributed_ls/Euclid/Hash_dh.c +++ b/src/distributed_ls/Euclid/Hash_dh.c @@ -104,7 +104,8 @@ HashData * Hash_dhLookup(Hash_dh h, HYPRE_Int key) for (i=0; isize, + HYPRE_Int i, + old_size = h->size, new_size = old_size*2, oldCurMark = h->curMark; - Hash_i_Record *oldData = h->data, + Hash_i_Record *oldData = h->data, *newData; hypre_sprintf(msgBuf_dh, "rehashing; old_size= %i, new_size= %i", old_size, new_size); @@ -239,7 +241,7 @@ void rehash_private(Hash_i_dh h) h->count = 0; h->curMark = 0; - for (i=h->count; icount; i pair, or altering * the value of an existing pair from within user apps. */ -extern void Parser_dhUpdateFromFile(Parser_dh p, char *name); +extern void Parser_dhUpdateFromFile(Parser_dh p, const char *name); extern void Parser_dhInit(Parser_dh p, HYPRE_Int argc, char *argv[]); /* Init enters pairs in its internal database in the following order: - (1) $PCPACK_DIR/options_database + (1) $PCPACK_DIR/options_database (2) "database" in local directory, if the file exists (3) "pathname/foo" if argv[] contains a pair of entries: -db_filename pathname/foo (4) flag,value pairs from the command line (ie, argv) If a flag already exists, its value is updated if it is - encountered a second time. + encountered a second time. WARNING! to enter a negative value, you must use two dashes, e.g: -myvalue --0.1 @@ -67,4 +67,3 @@ extern void Parser_dhInit(Parser_dh p, HYPRE_Int argc, char *argv[]); */ #endif - diff --git a/src/distributed_ls/Euclid/Vec_dh.c b/src/distributed_ls/Euclid/Vec_dh.c index bf1fb686e..515e56962 100644 --- a/src/distributed_ls/Euclid/Vec_dh.c +++ b/src/distributed_ls/Euclid/Vec_dh.c @@ -63,7 +63,7 @@ void Vec_dhCopy(Vec_dh x, Vec_dh y) void Vec_dhDuplicate(Vec_dh v, Vec_dh *out) { START_FUNC_DH - Vec_dh tmp; + Vec_dh tmp; HYPRE_Int size = v->n; if (v->vals == NULL) SET_V_ERROR("v->vals is NULL"); Vec_dhCreate(out); CHECK_V_ERROR; @@ -102,7 +102,7 @@ void Vec_dhSetRand(Vec_dh v) * so all values are in [0.0,1.0] */ for (i=0; i= number of threads. @@ -108,7 +111,7 @@ extern "C" { /*--------------------------------------------------------------------- * Memory management. These macros work with functions in Mem_dh.c; - * Change if you want to use some memory management and reporting schemes + * Change if you want to use some memory management and reporting schemes * other than that supplied with Euclid. These depend on the global * object "Mem_dh mem_dh" which is defined in globalObjects.c (yuck!) ---------------------------------------------------------------------*/ @@ -125,11 +128,11 @@ extern "C" { #endif - /* The actual calls used by Mem_dh objects to allocate/free memory + /* The actual calls used by Mem_dh objects to allocate/free memory * from the heap. */ -#define PRIVATE_MALLOC malloc -#define PRIVATE_FREE free +#define PRIVATE_MALLOC(size) ( hypre_TAlloc(char, size, HYPRE_MEMORY_HOST) ) +#define PRIVATE_FREE(ptr) ( hypre_TFree(ptr, HYPRE_MEMORY_HOST) ) /*------------------ Memory management end -----------------------------*/ @@ -146,6 +149,13 @@ you need to write EUCLID_GET_ROW() functions: see src/getRow.c */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef MACROS_DH #define MACROS_DH @@ -169,16 +179,14 @@ you need to write EUCLID_GET_ROW() functions: see src/getRow.c #define FABS(a) ((a) < 0 ? -(a) : a) #endif +/* used in Mat_SEQ_PrintTriples, so matlab won't discard zeros (yuck!) */ #ifdef HYPRE_SINGLE -#define _ATOL_ 1.0e-16 /* used to compute absolute tolerance for Euclid's internal Krylov solvers */ -#define _MATLAB_ZERO_ 1e-30 /* used in Mat_SEQ_PrintTriples, so matlab won't discard zeros (yuck!) */ +#define _MATLAB_ZERO_ 1e-30 #else // default -#define _ATOL_ 1.0e-50 #define _MATLAB_ZERO_ 1e-100 #endif - /*---------------------------------------------------------------------- * macros for error handling everyplace except in main. *---------------------------------------------------------------------- */ @@ -330,11 +338,19 @@ you need to write EUCLID_GET_ROW() functions: see src/getRow.c #endif #endif /* #ifndef MACROS_DH */ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef COMMON_DH #define COMMON_DH #include #include +#include #include #include #include @@ -350,7 +366,7 @@ you need to write EUCLID_GET_ROW() functions: see src/getRow.c #if ( !defined(FAKE_MPI) && defined(USING_MPI) && \ !defined(HYPRE_MODE) && !defined(PETSC_MODE) ) -#include +#include #endif #if defined(FAKE_MPI) @@ -369,8 +385,8 @@ you need to write EUCLID_GET_ROW() functions: see src/getRow.c /* #include "macros_dh.h" */ /* macros for error checking, etc */ -/*----------------------------------------------------------- - * Euclid classes +/*----------------------------------------------------------- + * Euclid classes *-----------------------------------------------------------*/ typedef struct _matgenfd* MatGenFD; typedef struct _subdomain_dh* SubdomainGraph_dh; @@ -399,21 +415,9 @@ typedef struct _apply_dh* Apply_dh; typedef struct _externalRows_dh* ExternalRows_dh; */ -/*--------------------------------------------------------------------- - * misc. - *---------------------------------------------------------------------*/ - - -#if defined(__cplusplus) -#else -typedef HYPRE_Int bool; -#define true 1 -#define false 0 -#endif - /* ------------------------------------------------------------------ * Globally scoped variables, error handling functions, etc. - * These are all defined in /src/globalObjects.c + * These are all defined in /src/globalObjects.c * ------------------------------------------------------------------*/ extern Parser_dh parser_dh; /* for setting/getting runtime options */ extern TimeLog_dh tlog_dh; /* internal timing functionality */ @@ -421,7 +425,7 @@ extern Mem_dh mem_dh; /* memory management */ extern FILE *logFile; extern HYPRE_Int np_dh; /* number of processors and subdomains */ extern HYPRE_Int myid_dh; /* rank of this processor (and subdomain) */ -extern MPI_Comm comm_dh; +extern MPI_Comm comm_dh; extern bool ignoreMe; /* used to stop compiler complaints */ @@ -437,8 +441,8 @@ extern HYPRE_Int ref_counter; /* for internal use only! Reference counter * macros defined in "macros_dh.h" */ extern bool errFlag_dh; -extern void setInfo_dh(const char *msg,const char *function,const char *file, HYPRE_Int line); -extern void setError_dh(const char *msg,const char *function,const char *file, HYPRE_Int line); +extern void setInfo_dh(const char *msg, const char *function, const char *file, HYPRE_Int line); +extern void setError_dh(const char *msg, const char *function, const char *file, HYPRE_Int line); extern void printErrorMsg(FILE *fp); #ifndef hypre_MPI_MAX_ERROR_STRING @@ -446,15 +450,11 @@ extern void printErrorMsg(FILE *fp); #endif #define MSG_BUF_SIZE_DH MAX(1024, hypre_MPI_MAX_ERROR_STRING) -#if defined(HYPRE_USING_RAJA) || defined(HYPRE_USING_KOKKOS) || defined(HYPRE_USING_CUDA) -static char msgBuf_dh[MSG_BUF_SIZE_DH]; -#else extern char msgBuf_dh[MSG_BUF_SIZE_DH]; -#endif /* Each processor (may) open a logfile. - * The bools are switches for controlling the amount of informational - * output, and where it gets written to. Function trace logging is only + * The bools are switches for controlling the amount of informational + * output, and where it gets written to. Function trace logging is only * enabled when compiled with the debugging (-g) option. */ extern void openLogfile_dh(HYPRE_Int argc, char *argv[]); @@ -465,15 +465,15 @@ extern bool logFuncsToStderr; extern bool logFuncsToFile; extern void Error_dhStartFunc(char *function, char *file, HYPRE_Int line); extern void Error_dhEndFunc(char *function); -extern void dh_StartFunc(const char *function,const char *file, HYPRE_Int line, HYPRE_Int priority); +extern void dh_StartFunc(const char *function, const char *file, HYPRE_Int line, HYPRE_Int priority); extern void dh_EndFunc(const char *function, HYPRE_Int priority); extern void printFunctionStack(FILE *fp); extern void EuclidInitialize(HYPRE_Int argc, char *argv[], char *help); /* instantiates global objects */ extern void EuclidFinalize(); /* deletes global objects */ -extern bool EuclidIsInitialized(); +extern bool EuclidIsInitialized(); extern void printf_dh(const char *fmt, ...); -extern void fprintf_dh(FILE *fp,const char *fmt, ...); +extern void fprintf_dh(FILE *fp, const char *fmt, ...); /* echo command line invocation to stdout. The "prefix" string is for grepping; it may be NULL. @@ -482,6 +482,13 @@ extern void echoInvocation_dh(MPI_Comm comm, char *prefix, HYPRE_Int argc, char #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /* for internal use */ #ifndef EXTERNAL_ROWS_DH_H @@ -545,6 +552,13 @@ struct _extrows_dh { }; #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef FACTOR_DH #define FACTOR_DH @@ -632,6 +646,13 @@ extern void Factor_dhPrintRows(Factor_dh mat, FILE *fp); /* prints local matrix to logfile, if open */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef VEC_DH_H #define VEC_DH_H @@ -667,6 +688,13 @@ extern void Vec_dhReadBIN(Vec_dh *v, char *filename); extern void Vec_dhPrint(Vec_dh v, SubdomainGraph_dh sg, char *filename); extern void Vec_dhPrintBIN(Vec_dh v, SubdomainGraph_dh sg, char *filename); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef MATGENFD_DH_DH #define MATGENFD_DH_DH @@ -798,6 +826,13 @@ extern HYPRE_Real box_1(HYPRE_Real coeff, HYPRE_Real x, HYPRE_Real y, HYPRE_Real extern HYPRE_Real box_2(HYPRE_Real coeff, HYPRE_Real x, HYPRE_Real y, HYPRE_Real z); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef MAT_DH_DH #define MAT_DH_DH @@ -943,6 +978,13 @@ extern void dldperm(HYPRE_Int job, HYPRE_Int n, HYPRE_Int nnz, HYPRE_Int colptr[ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef SUBDOMAIN_GRAPH_DH #define SUBDOMAIN_GRAPH_DH @@ -1054,6 +1096,13 @@ extern void SubdomainGraph_dhPrintRatios(SubdomainGraph_dh s, FILE *fp); extern void SubdomainGraph_dhPrintStats(SubdomainGraph_dh sg, FILE *fp); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /* Euclid employs a global object: @@ -1076,6 +1125,13 @@ extern void TimeLog_dhMark(TimeLog_dh t, const char *description); extern void TimeLog_dhPrint(TimeLog_dh t, FILE *fp, bool allPrint); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef SORTED_SET_DH #define SORTED_SET_DH @@ -1094,6 +1150,13 @@ extern void SortedSet_dhGetList(SortedSet_dh ss, HYPRE_Int **list, HYPRE_Int *co #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef MEM_DH_DH #define MEM_DH_DH @@ -1114,6 +1177,13 @@ extern void Mem_dhPrint(Mem_dh m, FILE* fp, bool allPrint); */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef SUPPORT_DH #define SUPPORT_DH @@ -1129,6 +1199,13 @@ extern void shellSort_int_int_float(HYPRE_Int n, HYPRE_Int *x, HYPRE_Int *y, HYP */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef NUMBERING_DH_H #define NUMBERING_DH_H @@ -1172,6 +1249,13 @@ extern void Numbering_dhGlobalToLocal(Numbering_dh numb, HYPRE_Int len, HYPRE_Int *global_in, HYPRE_Int *local_out); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /* This is similar to the Hash_i_dh class (woe, for a lack of templates); this this class is for hashing data consisting of single, non-negative integers. @@ -1208,6 +1292,13 @@ extern HYPRE_Int Hash_i_dhLookup(Hash_i_dh h, HYPRE_Int key); */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef TIMER_DH_H #define TIMER_DH_H @@ -1288,6 +1379,13 @@ extern HYPRE_Real Timer_dhReadUsage(Timer_dh t); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef PARSER_DH_DH #define PARSER_DH_DH @@ -1296,12 +1394,12 @@ extern HYPRE_Real Timer_dhReadUsage(Timer_dh t); extern void Parser_dhCreate(Parser_dh *p); extern void Parser_dhDestroy(Parser_dh p); -extern bool Parser_dhHasSwitch(Parser_dh p,const char *in); -extern bool Parser_dhReadString(Parser_dh p,const char *in, char **out); -extern bool Parser_dhReadInt(Parser_dh p,const char *in, HYPRE_Int *out); -extern bool Parser_dhReadDouble(Parser_dh p,const char *in, HYPRE_Real *out); - /* if the flag (char *in) is found, these four return - true and set "out" accordingly. If not found, they return +extern bool Parser_dhHasSwitch(Parser_dh p, const char *in); +extern bool Parser_dhReadString(Parser_dh p, const char *in, char **out); +extern bool Parser_dhReadInt(Parser_dh p, const char *in, HYPRE_Int *out); +extern bool Parser_dhReadDouble(Parser_dh p, const char *in, HYPRE_Real *out); + /* if the flag (char *in) is found, these four return + true and set "out" accordingly. If not found, they return false, and "out" is unaltered. */ @@ -1310,25 +1408,25 @@ extern void Parser_dhPrint(Parser_dh p, FILE *fp, bool allPrint); * only meaningful when Euclid is compiled in MPI mode */ -extern void Parser_dhInsert(Parser_dh p,const char *name,const char *value); +extern void Parser_dhInsert(Parser_dh p, const char *name, const char *value); /* For inserting a new pair, or altering * the value of an existing pair from within user apps. */ -extern void Parser_dhUpdateFromFile(Parser_dh p,const char *name); +extern void Parser_dhUpdateFromFile(Parser_dh p, const char *name); extern void Parser_dhInit(Parser_dh p, HYPRE_Int argc, char *argv[]); /* Init enters pairs in its internal database in the following order: - (1) $PCPACK_DIR/options_database + (1) $PCPACK_DIR/options_database (2) "database" in local directory, if the file exists (3) "pathname/foo" if argv[] contains a pair of entries: -db_filename pathname/foo (4) flag,value pairs from the command line (ie, argv) If a flag already exists, its value is updated if it is - encountered a second time. + encountered a second time. WARNING! to enter a negative value, you must use two dashes, e.g: -myvalue --0.1 @@ -1350,6 +1448,12 @@ extern void Parser_dhInit(Parser_dh p, HYPRE_Int argc, char *argv[]); */ #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ #ifndef SORTEDLIST_DH_H #define SORTEDLIST_DH_H @@ -1429,6 +1533,13 @@ extern SRecord * SortedList_dhFind(SortedList_dh sList, SRecord *sr); extern void SortedList_dhUpdateVal(SortedList_dh sList, SRecord *sr); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef HASH_D_DH #define HASH_D_DH @@ -1484,6 +1595,13 @@ extern void Hash_dhPrint(Hash_dh h, FILE *fp); } #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef MAT_DH_PRIVATE #define MAT_DH_PRIVATE @@ -1601,6 +1719,13 @@ extern void make_symmetric_private(HYPRE_Int m, HYPRE_Int **rp, HYPRE_Int **cval extern void make_symmetric_private(HYPRE_Int m, HYPRE_Int **rp, HYPRE_Int **cval, HYPRE_Real **aval); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef GET_ROW_DH #define GET_ROW_DH @@ -1620,6 +1745,13 @@ extern void PrintMatUsingGetRow(void* A, HYPRE_Int beg_row, HYPRE_Int m, #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef ILU_MPI_DH #define ILU_MPI_DH @@ -1654,6 +1786,13 @@ extern void ilut_seq(Euclid_dh ctx); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef EUCLID_MPI_INTERFACE_DH #define EUCLID_MPI_INTERFACE_DH @@ -1806,6 +1945,13 @@ struct _mpi_interface_dh { }; #endif /* #ifndef EUCLID_MPI_INTERFACE_DH */ +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef THREADED_KRYLOV_H #define THREADED_KRYLOV_H @@ -1818,6 +1964,13 @@ extern void cg_euclid(Mat_dh A, Euclid_dh ctx, HYPRE_Real *x, HYPRE_Real *b, HYPRE_Int *itsOUT); #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + /* Note: this module contains functionality for reading/writing Euclid's binary io format, and opening and closing files. @@ -1862,6 +2015,13 @@ extern void io_dh_read_ebin_vec_private(HYPRE_Int *n, HYPRE_Real **vals, char *f #endif +/****************************************************************************** + * Copyright 1998-2019 Lawrence Livermore National Security, LLC and other + * HYPRE Project Developers. See the top-level COPYRIGHT file for details. + * + * SPDX-License-Identifier: (Apache-2.0 OR MIT) + ******************************************************************************/ + #ifndef THREADED_BLAS_DH #define THREADED_BLAS_DH diff --git a/src/distributed_ls/Euclid/euclid_common.h b/src/distributed_ls/Euclid/euclid_common.h index 652af1da4..24dbaa4d6 100644 --- a/src/distributed_ls/Euclid/euclid_common.h +++ b/src/distributed_ls/Euclid/euclid_common.h @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -23,19 +24,9 @@ * files are included. *-----------------------------------------------------------------------*/ -#if defined(HYPRE_MODE) -#include "HYPRE_parcsr_mv.h" -#include "HYPRE_config.h" -#include "HYPRE_distributed_matrix_mv.h" -#include "_hypre_utilities.h" - -#elif defined(PETSC_MODE) -#include "petsc_config.h" -#endif - #if ( !defined(FAKE_MPI) && defined(USING_MPI) && \ !defined(HYPRE_MODE) && !defined(PETSC_MODE) ) -#include +#include #endif #if defined(FAKE_MPI) @@ -54,8 +45,8 @@ /* #include "macros_dh.h" */ /* macros for error checking, etc */ -/*----------------------------------------------------------- - * Euclid classes +/*----------------------------------------------------------- + * Euclid classes *-----------------------------------------------------------*/ typedef struct _matgenfd* MatGenFD; typedef struct _subdomain_dh* SubdomainGraph_dh; @@ -84,21 +75,9 @@ typedef struct _apply_dh* Apply_dh; typedef struct _externalRows_dh* ExternalRows_dh; */ -/*--------------------------------------------------------------------- - * misc. - *---------------------------------------------------------------------*/ - - -#if defined(__cplusplus) -#else -typedef HYPRE_Int bool; -#define true 1 -#define false 0 -#endif - /* ------------------------------------------------------------------ * Globally scoped variables, error handling functions, etc. - * These are all defined in /src/globalObjects.c + * These are all defined in /src/globalObjects.c * ------------------------------------------------------------------*/ extern Parser_dh parser_dh; /* for setting/getting runtime options */ extern TimeLog_dh tlog_dh; /* internal timing functionality */ @@ -106,7 +85,7 @@ extern Mem_dh mem_dh; /* memory management */ extern FILE *logFile; extern HYPRE_Int np_dh; /* number of processors and subdomains */ extern HYPRE_Int myid_dh; /* rank of this processor (and subdomain) */ -extern MPI_Comm comm_dh; +extern MPI_Comm comm_dh; extern bool ignoreMe; /* used to stop compiler complaints */ @@ -122,8 +101,8 @@ extern HYPRE_Int ref_counter; /* for internal use only! Reference counter * macros defined in "macros_dh.h" */ extern bool errFlag_dh; -extern void setInfo_dh(char *msg, char *function, char *file, HYPRE_Int line); -extern void setError_dh(char *msg, char *function, char *file, HYPRE_Int line); +extern void setInfo_dh(const char *msg, const char *function, const char *file, HYPRE_Int line); +extern void setError_dh(const char *msg, const char *function, const char *file, HYPRE_Int line); extern void printErrorMsg(FILE *fp); #ifndef hypre_MPI_MAX_ERROR_STRING @@ -134,8 +113,8 @@ extern void printErrorMsg(FILE *fp); extern char msgBuf_dh[MSG_BUF_SIZE_DH]; /* Each processor (may) open a logfile. - * The bools are switches for controlling the amount of informational - * output, and where it gets written to. Function trace logging is only + * The bools are switches for controlling the amount of informational + * output, and where it gets written to. Function trace logging is only * enabled when compiled with the debugging (-g) option. */ extern void openLogfile_dh(HYPRE_Int argc, char *argv[]); @@ -146,15 +125,15 @@ extern bool logFuncsToStderr; extern bool logFuncsToFile; extern void Error_dhStartFunc(char *function, char *file, HYPRE_Int line); extern void Error_dhEndFunc(char *function); -extern void dh_StartFunc(char *function, char *file, HYPRE_Int line, HYPRE_Int priority); -extern void dh_EndFunc(char *function, HYPRE_Int priority); +extern void dh_StartFunc(const char *function, const char *file, HYPRE_Int line, HYPRE_Int priority); +extern void dh_EndFunc(const char *function, HYPRE_Int priority); extern void printFunctionStack(FILE *fp); extern void EuclidInitialize(HYPRE_Int argc, char *argv[], char *help); /* instantiates global objects */ extern void EuclidFinalize(); /* deletes global objects */ -extern bool EuclidIsInitialized(); -extern void printf_dh(char *fmt, ...); -extern void fprintf_dh(FILE *fp, char *fmt, ...); +extern bool EuclidIsInitialized(); +extern void printf_dh(const char *fmt, ...); +extern void fprintf_dh(FILE *fp, const char *fmt, ...); /* echo command line invocation to stdout. The "prefix" string is for grepping; it may be NULL. diff --git a/src/distributed_ls/Euclid/euclid_config.h b/src/distributed_ls/Euclid/euclid_config.h index ab7468606..b184414b6 100644 --- a/src/distributed_ls/Euclid/euclid_config.h +++ b/src/distributed_ls/Euclid/euclid_config.h @@ -38,7 +38,7 @@ #define EXIT_NOW(msg) \ { setError_dh(msg, __FUNC__, __FILE__, __LINE__); \ ERRCHKA; \ - } + } #define ERRCHKA \ if (errFlag_dh) { \ @@ -52,7 +52,7 @@ Mem_dhPrint(mem_dh, stderr, false); \ } \ EUCLID_EXIT; \ - } + } /* let Euclid do its thing, before handing off to PETSc; @@ -70,10 +70,10 @@ printErrorMsg(stderr); \ hypre_fprintf(stderr, "\n[%i] ierr = %i, errFlag_dh = %i\n", myid_dh, ierr, errFlag_dh); \ CHKERRA(ierr); \ - } + } -#define MAX_SUBDOMAINS 20 +#define MAX_SUBDOMAINS 20 /* The maximum number of subdomains into which the matrix may be partitioned. Rule of thumb: MAX_SUBDOMAINS >= number of threads. @@ -86,7 +86,7 @@ /*--------------------------------------------------------------------- * Memory management. These macros work with functions in Mem_dh.c; - * Change if you want to use some memory management and reporting schemes + * Change if you want to use some memory management and reporting schemes * other than that supplied with Euclid. These depend on the global * object "Mem_dh mem_dh" which is defined in globalObjects.c (yuck!) ---------------------------------------------------------------------*/ @@ -103,11 +103,11 @@ #endif - /* The actual calls used by Mem_dh objects to allocate/free memory + /* The actual calls used by Mem_dh objects to allocate/free memory * from the heap. */ -#define PRIVATE_MALLOC malloc -#define PRIVATE_FREE free +#define PRIVATE_MALLOC(size) ( hypre_TAlloc(char, size, HYPRE_MEMORY_HOST) ) +#define PRIVATE_FREE(ptr) ( hypre_TFree(ptr, HYPRE_MEMORY_HOST) ) /*------------------ Memory management end -----------------------------*/ diff --git a/src/distributed_ls/Euclid/globalObjects.c b/src/distributed_ls/Euclid/globalObjects.c index 43c6d546c..3740bf038 100644 --- a/src/distributed_ls/Euclid/globalObjects.c +++ b/src/distributed_ls/Euclid/globalObjects.c @@ -7,7 +7,7 @@ #include "_hypre_Euclid.h" -/* Contains definitions of globally scoped objects; +/* Contains definitions of globally scoped objects; * Also, functions for error handling and message logging. */ @@ -25,17 +25,14 @@ Parser_dh parser_dh = NULL; /* for setting/getting runtime options */ TimeLog_dh tlog_dh = NULL; /* internal timing functionality */ Mem_dh mem_dh = NULL; /* memory management */ FILE *logFile = NULL; -#if defined(HYPRE_USING_RAJA) || defined(HYPRE_USING_KOKKOS) || defined(HYPRE_USING_CUDA) -#else char msgBuf_dh[MSG_BUF_SIZE_DH]; /* for internal use */ -#endif HYPRE_Int np_dh = 1; /* number of processors and subdomains */ HYPRE_Int myid_dh = 0; /* rank of this processor (and subdomain) */ MPI_Comm comm_dh = 0; /* Each processor (may) open a logfile. - * The bools are switches for controlling the amount of informational + * The bools are switches for controlling the amount of informational * output, and where it gets written to. Function logging is only enabled * when compiled with the debugging (-g) option. */ @@ -52,7 +49,7 @@ HYPRE_Int ref_counter = 0; /*------------------------------------------------------------------------- - * End of global definitions. + * End of global definitions. * Error and info functions follow. *-------------------------------------------------------------------------*/ @@ -75,7 +72,7 @@ void openLogfile_dh(HYPRE_Int argc, char *argv[]) /* this doesn't really belong here, but it's gotta go someplace! */ /* strcpy(errMsg, "error msg was never set -- ??"); */ - if (logFile != NULL) return; + if (logFile != NULL) return; /* set default logging filename */ hypre_sprintf(buf, "logFile"); @@ -84,7 +81,7 @@ void openLogfile_dh(HYPRE_Int argc, char *argv[]) if (argc && argv != NULL) { HYPRE_Int j; for (j=1; j%s< for writing; continuing anyway\n", buf); } - } + } } void closeLogfile_dh() @@ -118,12 +115,12 @@ void closeLogfile_dh() void setInfo_dh(const char *msg,const char *function,const char *file, HYPRE_Int line) { if (logInfoToFile && logFile != NULL) { - hypre_fprintf(logFile, "INFO: %s;\n function= %s file=%s line=%i\n", + hypre_fprintf(logFile, "INFO: %s;\n function= %s file=%s line=%i\n", msg, function, file, line); fflush(logFile); } if (logInfoToStderr) { - hypre_fprintf(stderr, "INFO: %s;\n function= %s file=%s line=%i\n", + hypre_fprintf(stderr, "INFO: %s;\n function= %s file=%s line=%i\n", msg, function, file, line); } } @@ -135,7 +132,7 @@ void setInfo_dh(const char *msg,const char *function,const char *file, HYPRE_In void dh_StartFunc(const char *function,const char *file, HYPRE_Int line, HYPRE_Int priority) { if (priority == 1) { - hypre_sprintf(calling_stack[calling_stack_count], + hypre_sprintf(calling_stack[calling_stack_count], "[%i] %s file= %s line= %i", myid_dh, function, file, line); /* priority_private[calling_stack_count] = priority; */ ++calling_stack_count; @@ -170,12 +167,12 @@ void setError_dh(const char *msg,const char *function,const char *file, HYPRE_I { errFlag_dh = true; if (! strcmp(msg, "")) { - hypre_sprintf(errMsg_private[errCount_private], - "[%i] called from: %s file= %s line= %i", + hypre_sprintf(errMsg_private[errCount_private], + "[%i] called from: %s file= %s line= %i", myid_dh, function, file, line); } else { - hypre_sprintf(errMsg_private[errCount_private], - "[%i] ERROR: %s\n %s file= %s line= %i\n", + hypre_sprintf(errMsg_private[errCount_private], + "[%i] ERROR: %s\n %s file= %s line= %i\n", myid_dh, msg, function, file, line); } ++errCount_private; @@ -234,26 +231,26 @@ void Error_dhStartFunc(char *function, char *file, HYPRE_Int line) /* get rid of string null-terminator from last * call (if any) to Error_dhStartFunc() */ - spaces[INDENT_DH*nesting] = ' '; + spaces[INDENT_DH*nesting] = ' '; /* add null-terminator, so the correct number of spaces will be printed */ - ++nesting; + ++nesting; if (nesting > MAX_ERROR_SPACES-1) nesting = MAX_ERROR_SPACES-1; spaces[INDENT_DH*nesting] = '\0'; if (logFuncsToStderr) { - hypre_fprintf(stderr, "%s(%i) %s [file= %s line= %i]\n", + hypre_fprintf(stderr, "%s(%i) %s [file= %s line= %i]\n", spaces, nesting, function, file, line); } if (logFuncsToFile && logFile != NULL) { - hypre_fprintf(logFile, "%s(%i) %s [file= %s line= %i]\n", + hypre_fprintf(logFile, "%s(%i) %s [file= %s line= %i]\n", spaces, nesting, function, file, line); fflush(logFile); } } void Error_dhEndFunc(char *function) -{ +{ nesting -= 1; if (nesting < 0) nesting = 0; spaces[INDENT_DH*nesting] = '\0'; @@ -279,7 +276,7 @@ void EuclidInitialize(HYPRE_Int argc, char *argv[], char *help) if (! EuclidIsActive) { hypre_MPI_Comm_size(comm_dh, &np_dh); hypre_MPI_Comm_rank(comm_dh, &myid_dh); - openLogfile_dh(argc, argv); + openLogfile_dh(argc, argv); if (mem_dh == NULL) { Mem_dhCreate(&mem_dh); CHECK_V_ERROR; } if (tlog_dh == NULL) { TimeLog_dhCreate(&tlog_dh); CHECK_V_ERROR; } if (parser_dh == NULL) { Parser_dhCreate(&parser_dh); CHECK_V_ERROR; } @@ -291,7 +288,7 @@ void EuclidInitialize(HYPRE_Int argc, char *argv[], char *help) if (myid_dh == 0) hypre_printf("%s\n\n", help); EUCLID_EXIT; } - if (Parser_dhHasSwitch(parser_dh, "-logFuncsToFile")) { + if (Parser_dhHasSwitch(parser_dh, "-logFuncsToFile")) { logFuncsToFile = true; } if (Parser_dhHasSwitch(parser_dh, "-logFuncsToStderr")) { diff --git a/src/distributed_ls/Euclid/headers b/src/distributed_ls/Euclid/headers index 5315dbab9..22af06918 100755 --- a/src/distributed_ls/Euclid/headers +++ b/src/distributed_ls/Euclid/headers @@ -12,6 +12,8 @@ INTERNAL_HEADER=_hypre_Euclid.h cat > $INTERNAL_HEADER <<@ +/*** DO NOT EDIT THIS FILE DIRECTLY (use 'headers' to generate) ***/ + #ifndef hypre_EUCLID_HEADER #define hypre_EUCLID_HEADER @@ -20,6 +22,16 @@ cat > $INTERNAL_HEADER <<@ #define HYPRE_MODE #define OPTIMIZED_DH +#if defined(HYPRE_MODE) +#include "HYPRE_parcsr_mv.h" +#include "HYPRE_config.h" +#include "HYPRE_distributed_matrix_mv.h" +#include "_hypre_utilities.h" + +#elif defined(PETSC_MODE) +#include "petsc_config.h" +#endif + #ifdef __cplusplus extern "C" { #endif @@ -71,4 +83,3 @@ cat >> $INTERNAL_HEADER <<@ #endif @ - diff --git a/src/distributed_ls/Euclid/mat_dh_private.c b/src/distributed_ls/Euclid/mat_dh_private.c index bd1116be5..3f13e5152 100644 --- a/src/distributed_ls/Euclid/mat_dh_private.c +++ b/src/distributed_ls/Euclid/mat_dh_private.c @@ -25,7 +25,7 @@ static HYPRE_Int isTriangular(HYPRE_Int m, HYPRE_Int *rp, HYPRE_Int *cval); /* Instantiates Aout; allocates storage for rp, cval, and aval arrays; uses rowLengths[] and rowToBlock[] data to fill in rp[]. */ -static void mat_par_read_allocate_private(Mat_dh *Aout, HYPRE_Int n, +static void mat_par_read_allocate_private(Mat_dh *Aout, HYPRE_Int n, HYPRE_Int *rowLengths, HYPRE_Int *rowToBlock); /* Currently, divides (partitions)matrix by contiguous sections of rows. @@ -34,14 +34,14 @@ static void mat_par_read_allocate_private(Mat_dh *Aout, HYPRE_Int n, void mat_partition_private(Mat_dh A, HYPRE_Int blocks, HYPRE_Int *o2n_row, HYPRE_Int *rowToBlock); -static void convert_triples_to_scr_private(HYPRE_Int m, HYPRE_Int nz, - HYPRE_Int *I, HYPRE_Int *J, HYPRE_Real *A, +static void convert_triples_to_scr_private(HYPRE_Int m, HYPRE_Int nz, + HYPRE_Int *I, HYPRE_Int *J, HYPRE_Real *A, HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Real *aval); #if 0 #undef __FUNC__ #define __FUNC__ "mat_dh_print_graph_private" -void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, HYPRE_Int *cval, +void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Real *aval, HYPRE_Int *n2o, HYPRE_Int *o2n, Hash_i_dh hash, FILE* fp) { START_FUNC_DH @@ -55,7 +55,7 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H create_nat_ordering_private(m, &n2o); CHECK_V_ERROR; create_nat_ordering_private(m, &o2n); CHECK_V_ERROR; } - + if (hash == NULL) { private_hash = true; Hash_i_dhCreate(&hash, -1); CHECK_V_ERROR; @@ -70,9 +70,9 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H /* nonlocal column: get permutation from hash table */ tmp = Hash_i_dhLookup(hash, col); CHECK_V_ERROR; - if (tmp == -1) { + if (tmp == -1) { hypre_sprintf(msgBuf_dh, "beg_row= %i m= %i; nonlocal column= %i not in hash table", - beg_row, m, col); + beg_row, m, col); SET_V_ERROR(msgBuf_dh); } else { col = tmp; @@ -81,7 +81,7 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H col = o2n[col]; } - if (aval == NULL) { + if (aval == NULL) { val = _MATLAB_ZERO_; } else { val = aval[j]; @@ -107,7 +107,7 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H /* currently only for unpermuted */ #undef __FUNC__ #define __FUNC__ "mat_dh_print_graph_private" -void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, HYPRE_Int *cval, +void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, HYPRE_Int *cval, HYPRE_Real *aval, HYPRE_Int *n2o, HYPRE_Int *o2n, Hash_i_dh hash, FILE* fp) { START_FUNC_DH @@ -123,7 +123,7 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H create_nat_ordering_private(m, &n2o); CHECK_V_ERROR; create_nat_ordering_private(m, &o2n); CHECK_V_ERROR; } - + if (hash == NULL) { private_hash = true; Hash_i_dhCreate(&hash, -1); CHECK_V_ERROR; @@ -138,21 +138,21 @@ void mat_dh_print_graph_private(HYPRE_Int m, HYPRE_Int beg_row, HYPRE_Int *rp, H /* local column */ if (col >= beg_row || col < beg_row+m) { col = o2n[col]; - } + } /* nonlocal column: get permutation from hash table */ else { HYPRE_Int tmp = col; tmp = Hash_i_dhLookup(hash, col); CHECK_V_ERROR; - if (tmp == -1) { + if (tmp == -1) { hypre_sprintf(msgBuf_dh, "beg_row= %i m= %i; nonlocal column= %i not in hash table", - beg_row, m, col); + beg_row, m, col); SET_V_ERROR(msgBuf_dh); } else { col = tmp; } - } + } work[col] = 1; } @@ -247,7 +247,7 @@ void mat_dh_print_csr_private(HYPRE_Int m, HYPRE_Int *rp, HYPRE_Int *cval, HYPRE /* only implemented for a single cpu! */ #undef __FUNC__ #define __FUNC__ "mat_dh_read_csr_private" -void mat_dh_read_csr_private(HYPRE_Int *mOUT, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT, +void mat_dh_read_csr_private(HYPRE_Int *mOUT, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT, HYPRE_Real **avalOUT, FILE* fp) { START_FUNC_DH @@ -302,7 +302,7 @@ void mat_dh_read_csr_private(HYPRE_Int *mOUT, HYPRE_Int **rpOUT, HYPRE_Int **cva #undef __FUNC__ #define __FUNC__ "mat_dh_read_triples_private" -void mat_dh_read_triples_private(HYPRE_Int ignore, HYPRE_Int *mOUT, HYPRE_Int **rpOUT, +void mat_dh_read_triples_private(HYPRE_Int ignore, HYPRE_Int *mOUT, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT, HYPRE_Real **avalOUT, FILE* fp) { START_FUNC_DH @@ -318,16 +318,18 @@ void mat_dh_read_triples_private(HYPRE_Int ignore, HYPRE_Int *mOUT, HYPRE_Int ** hypre_printf("mat_dh_read_triples_private:: ignoring following header lines:\n"); hypre_printf("--------------------------------------------------------------\n"); for (i=0; irp; + rp = A->rp; cval = A->cval; aval = A->aval; } @@ -626,7 +631,7 @@ void insert_missing_diags_private(Mat_dh A) } rp[i+1] = idx; } - + FREE_DH(RP); CHECK_V_ERROR; FREE_DH(CVAL); CHECK_V_ERROR; FREE_DH(AVAL); CHECK_V_ERROR; @@ -644,15 +649,15 @@ void readVec(Vec_dh *bout, char *ft, char *fn, HYPRE_Int ignore) SET_V_ERROR("passed NULL filename; can't open for reading!"); } - if (!strcmp(ft, "csr") || !strcmp(ft, "trip")) + if (!strcmp(ft, "csr") || !strcmp(ft, "trip")) { Vec_dhRead(bout, ignore, fn); CHECK_V_ERROR; - } + } else if (!strcmp(ft, "ebin")) { Vec_dhReadBIN(bout, fn); CHECK_V_ERROR; - } + } #ifdef PETSC_MODE else if (!strcmp(ft, "petsc")) { @@ -670,7 +675,7 @@ void readVec(Vec_dh *bout, char *ft, char *fn, HYPRE_Int ignore) if (ierr) { SET_V_ERROR("convertPetscToEuclidVec failed!"); } ierr = VecDestroy(bb); if (ierr) { SET_V_ERROR("VecDestroy failed! [PETSc lib]"); } - } + } #else else if (!strcmp(ft, "petsc")) { hypre_sprintf(msgBuf_dh, "must recompile Euclid using petsc mode!"); @@ -678,12 +683,12 @@ void readVec(Vec_dh *bout, char *ft, char *fn, HYPRE_Int ignore) } #endif - else + else { hypre_sprintf(msgBuf_dh, "unknown filetype: -ftin %s", ft); SET_V_ERROR(msgBuf_dh); } - + END_FUNC_DH } @@ -697,29 +702,29 @@ void writeMat(Mat_dh Ain, char *ft, char *fn) SET_V_ERROR("passed NULL filename; can't open for writing!"); } - if (!strcmp(ft, "csr")) + if (!strcmp(ft, "csr")) { Mat_dhPrintCSR(Ain, NULL, fn); CHECK_V_ERROR; - } + } - else if (!strcmp(ft, "trip")) + else if (!strcmp(ft, "trip")) { Mat_dhPrintTriples(Ain, NULL, fn); CHECK_V_ERROR; - } + } else if (!strcmp(ft, "ebin")) { Mat_dhPrintBIN(Ain, NULL, fn); CHECK_V_ERROR; - } + } #ifdef PETSC_MODE - else if (!strcmp(ft, "petsc")) + else if (!strcmp(ft, "petsc")) { Viewer_DH viewer; Mat Apetsc; HYPRE_Int ierr; - ierr = buildPetscMat(Ain->m, Ain->n, Ain->beg_row, + ierr = buildPetscMat(Ain->m, Ain->n, Ain->beg_row, Ain->rp, Ain->cval, Ain->aval, &Apetsc); if (ierr) { SET_V_ERROR("buildPetscMat failed!"); } @@ -741,7 +746,7 @@ void writeMat(Mat_dh Ain, char *ft, char *fn) } #endif - else + else { hypre_sprintf(msgBuf_dh, "unknown filetype: -ftout %s", ft); SET_V_ERROR(msgBuf_dh); @@ -759,18 +764,18 @@ void writeVec(Vec_dh bin, char *ft, char *fn) SET_V_ERROR("passed NULL filename; can't open for writing!"); } - if (!strcmp(ft, "csr") || !strcmp(ft, "trip")) + if (!strcmp(ft, "csr") || !strcmp(ft, "trip")) { Vec_dhPrint(bin, NULL, fn); CHECK_V_ERROR; - } + } else if (!strcmp(ft, "ebin")) { Vec_dhPrintBIN(bin, NULL, fn); CHECK_V_ERROR; - } + } #ifdef PETSC_MODE - else if (!strcmp(ft, "petsc")) + else if (!strcmp(ft, "petsc")) { Viewer_DH viewer; HYPRE_Int ierr; @@ -786,7 +791,7 @@ void writeVec(Vec_dh bin, char *ft, char *fn) if (ierr) { SET_V_ERROR("ViewerDestroy failed! [PETSc lib]"); } ierr = VecDestroy(bb); if (ierr) { SET_V_ERROR("VecDestroy failed! [PETSc lib]"); } - } + } #else else if (!strcmp(ft, "petsc")) { hypre_sprintf(msgBuf_dh, "must recompile Euclid using petsc mode!"); @@ -794,7 +799,7 @@ void writeVec(Vec_dh bin, char *ft, char *fn) } #endif - else + else { hypre_sprintf(msgBuf_dh, "unknown filetype: -ftout %s", ft); SET_V_ERROR(msgBuf_dh); @@ -838,14 +843,14 @@ HYPRE_Int isTriangular(HYPRE_Int m, HYPRE_Int *rp, HYPRE_Int *cval) /*-----------------------------------------------------------------------------------*/ static void mat_dh_transpose_reuse_private_private( - bool allocateMem, HYPRE_Int m, + bool allocateMem, HYPRE_Int m, HYPRE_Int *rpIN, HYPRE_Int *cvalIN, HYPRE_Real *avalIN, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT, HYPRE_Real **avalOUT); #undef __FUNC__ #define __FUNC__ "mat_dh_transpose_reuse_private" -void mat_dh_transpose_reuse_private(HYPRE_Int m, +void mat_dh_transpose_reuse_private(HYPRE_Int m, HYPRE_Int *rpIN, HYPRE_Int *cvalIN, HYPRE_Real *avalIN, HYPRE_Int *rpOUT, HYPRE_Int *cvalOUT, HYPRE_Real *avalOUT) { @@ -863,14 +868,14 @@ void mat_dh_transpose_private(HYPRE_Int m, HYPRE_Int *RP, HYPRE_Int **rpOUT, HYPRE_Real *AVAL, HYPRE_Real **avalOUT) { START_FUNC_DH - mat_dh_transpose_reuse_private_private(true, m, RP, CVAL, AVAL, + mat_dh_transpose_reuse_private_private(true, m, RP, CVAL, AVAL, rpOUT, cvalOUT, avalOUT); CHECK_V_ERROR; END_FUNC_DH } #undef __FUNC__ #define __FUNC__ "mat_dh_transpose_private_private" -void mat_dh_transpose_reuse_private_private(bool allocateMem, HYPRE_Int m, +void mat_dh_transpose_reuse_private_private(bool allocateMem, HYPRE_Int m, HYPRE_Int *RP, HYPRE_Int *CVAL, HYPRE_Real *AVAL, HYPRE_Int **rpOUT, HYPRE_Int **cvalOUT, HYPRE_Real **avalOUT) { @@ -959,7 +964,7 @@ HYPRE_Int mat_find_owner(HYPRE_Int *beg_rows, HYPRE_Int *end_rows, HYPRE_Int ind #define AVAL_TAG 2 #define CVAL_TAG 3 void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout); -void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout); +void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout); #undef __FUNC__ #define __FUNC__ "readMat_par" @@ -989,7 +994,7 @@ void readMat_par(Mat_dh *Aout, char *fileType, char *fileName, HYPRE_Int ignore) Mat_dhDestroy(A); CHECK_V_ERROR; } - + if (Parser_dhHasSwitch(parser_dh, "-printMAT")) { char xname[] = "A", *name = xname; Parser_dhReadString(parser_dh, "-printMat", &name); @@ -1045,7 +1050,7 @@ void partition_and_distribute_metis_private(Mat_dh A, Mat_dh *Bout) /* partition and permute matrix */ Mat_dhPartition(A, np_dh, &beg_row, &row_count, &n2o_col, &o2n_row); ERRCHKA; Mat_dhPermute(A, n2o_col, &C); ERRCHKA; - + /* form rowToBlock array */ for (i=0; icval; @@ -1152,7 +1157,7 @@ void partition_and_distribute_private(Mat_dh A, Mat_dh *Bout) /* broadcast number of nonzeros in each row to all processors */ rowLengths = (HYPRE_Int*)MALLOC_DH(m*sizeof(HYPRE_Int)); CHECK_V_ERROR; if (myid_dh == 0) { - HYPRE_Int *tmp = A->rp; + HYPRE_Int *tmp = A->rp; for (i=0; icval; @@ -1309,9 +1314,9 @@ void mat_partition_private(Mat_dh A, HYPRE_Int blocks, HYPRE_Int *o2n_row, HYPRE for (i=0; icval[j]; /* row has an explicit diagonal element */ - if (col == i) { + if (col == i) { HYPRE_Real val = A->aval[j]; flag = false; if (val == 0.0) ++z_diag; @@ -1569,9 +1574,9 @@ void profileMat(Mat_dh A) hypre_printf("YY strict upper triangular nonzeros: %i\n", unz); hypre_printf("YY strict lower triangular nonzeros: %i\n", lnz); } - - - + + + Mat_dhTranspose(A, &B); CHECK_V_ERROR; diff --git a/src/distributed_ls/ParaSails/CMakeLists.txt b/src/distributed_ls/ParaSails/CMakeLists.txt index fed00e9ee..ac677ef15 100644 --- a/src/distributed_ls/ParaSails/CMakeLists.txt +++ b/src/distributed_ls/ParaSails/CMakeLists.txt @@ -3,7 +3,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -set(PARASAILS_SRCS +set(SRCS ConjGrad.c DiagScale.c FGmres.c @@ -19,9 +19,7 @@ set(PARASAILS_SRCS RowPatt.c StoredRows.c ) - -convert_filenames_to_full_paths(PARASAILS_SRCS) - -set(SRCS ${SRCS} ${PARASAILS_SRCS} PARENT_SCOPE) - +target_sources(${PROJECT_NAME} + PRIVATE ${SRCS} +) diff --git a/src/distributed_ls/ParaSails/ConjGrad.c b/src/distributed_ls/ParaSails/ConjGrad.c index 365588067..fbe968ba3 100644 --- a/src/distributed_ls/ParaSails/ConjGrad.c +++ b/src/distributed_ls/ParaSails/ConjGrad.c @@ -52,7 +52,7 @@ static void Axpy(HYPRE_Int n, HYPRE_Real alpha, HYPRE_Real *x, HYPRE_Real *y) /*-------------------------------------------------------------------------- * PCG_ParaSails - PCG solver using ParaSails. * Use NULL for ps if to get unpreconditioned solve. - * Solver will stop at step 500 if rel. resid. norm reduction is not less + * Solver will stop at step 500 if rel. resid. norm reduction is not less * than 0.1 at that point. *--------------------------------------------------------------------------*/ @@ -93,7 +93,7 @@ void PCG_ParaSails(Matrix *mat, ParaSails *ps, HYPRE_Real *b, HYPRE_Real *x, MatrixMatvec(mat, x, r); /* r = Ax */ ScaleVector(n, -1.0, r); /* r = -r */ Axpy(n, 1.0, b, r); /* r = r + b */ - + /* p = C*r */ if (ps != NULL) ParaSailsApply(ps, r, p); @@ -120,7 +120,7 @@ void PCG_ParaSails(Matrix *mat, ParaSails *ps, HYPRE_Real *b, HYPRE_Real *x, /* r = r - alpha*s */ Axpy(n, -alpha, s, r); - + /* s = C*r */ if (ps != NULL) ParaSailsApply(ps, r, s); @@ -149,17 +149,17 @@ void PCG_ParaSails(Matrix *mat, ParaSails *ps, HYPRE_Real *b, HYPRE_Real *x, hypre_printf("Aborting solve due to slow or no convergence.\n"); break; } - + /* beta = gamma / gamma_old */ beta = gamma / gamma_old; /* p = s + beta p */ - ScaleVector(n, beta, p); + ScaleVector(n, beta, p); Axpy(n, 1.0, s, p); } - free(p); - free(s); + hypre_TFree(p, HYPRE_MEMORY_HOST); + hypre_TFree(s, HYPRE_MEMORY_HOST); /* compute exact relative residual norm */ MatrixMatvec(mat, x, r); /* r = Ax */ @@ -167,7 +167,7 @@ void PCG_ParaSails(Matrix *mat, ParaSails *ps, HYPRE_Real *b, HYPRE_Real *x, Axpy(n, 1.0, b, r); /* r = r + b */ i_prod = InnerProd(n, r, r, comm); - free(r); + hypre_TFree(r, HYPRE_MEMORY_HOST); if (mype == 0) hypre_printf("Iter (%4d): computed rrn : %e\n", i, sqrt(i_prod/bi_prod)); diff --git a/src/distributed_ls/ParaSails/DiagScale.c b/src/distributed_ls/ParaSails/DiagScale.c index 4ae8518fd..82cd22121 100644 --- a/src/distributed_ls/ParaSails/DiagScale.c +++ b/src/distributed_ls/ParaSails/DiagScale.c @@ -192,7 +192,7 @@ DiagScale *DiagScaleCreate(Matrix *A, Numbering *numb) requests, replies_list); num_replies = FindNumReplies(A->comm, replies_list); - free(replies_list); + hypre_TFree(replies_list,HYPRE_MEMORY_HOST); mem = MemCreate(); requests2 = NULL; @@ -204,7 +204,7 @@ DiagScale *DiagScaleCreate(Matrix *A, Numbering *numb) /* Wait for all replies */ hypre_MPI_Waitall(num_requests, requests, statuses); - free(requests); + hypre_TFree(requests,HYPRE_MEMORY_HOST); p->offset = A->end_row - A->beg_row + 1; @@ -217,16 +217,16 @@ DiagScale *DiagScaleCreate(Matrix *A, Numbering *numb) for (j=0; joffset] = p->ext_diags[j]; - free(ind); - free(p->ext_diags); + hypre_TFree(ind,HYPRE_MEMORY_HOST); + hypre_TFree(p->ext_diags,HYPRE_MEMORY_HOST); p->ext_diags = temp; /* Wait for all sends */ hypre_MPI_Waitall(num_replies, requests2, statuses); - free(requests2); + hypre_TFree(requests2,HYPRE_MEMORY_HOST); MemDestroy(mem); - free(statuses); + hypre_TFree(statuses,HYPRE_MEMORY_HOST); return p; } @@ -236,10 +236,10 @@ DiagScale *DiagScaleCreate(Matrix *A, Numbering *numb) void DiagScaleDestroy(DiagScale *p) { - free(p->local_diags); - free(p->ext_diags); + hypre_TFree(p->local_diags,HYPRE_MEMORY_HOST); + hypre_TFree(p->ext_diags,HYPRE_MEMORY_HOST); - free(p); + hypre_TFree(p,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- diff --git a/src/distributed_ls/ParaSails/FGmres.c b/src/distributed_ls/ParaSails/FGmres.c index 0e8387eb8..573c60000 100644 --- a/src/distributed_ls/ParaSails/FGmres.c +++ b/src/distributed_ls/ParaSails/FGmres.c @@ -187,10 +187,11 @@ void FGMRES_ParaSails(Matrix *mat, ParaSails *ps, HYPRE_Real *b, HYPRE_Real *x, if (mype == 0) hypre_printf("Iter (%d): computed rrn : %e\n", iter, rel_resid); - free(H); - free(s); - free(cs); - free(sn); - free(V); - free(W); + hypre_TFree(H, HYPRE_MEMORY_HOST); + hypre_TFree(s, HYPRE_MEMORY_HOST); + hypre_TFree(cs, HYPRE_MEMORY_HOST); + hypre_TFree(sn, HYPRE_MEMORY_HOST); + hypre_TFree(V, HYPRE_MEMORY_HOST); + hypre_TFree(W, HYPRE_MEMORY_HOST); } + diff --git a/src/distributed_ls/ParaSails/Hash.c b/src/distributed_ls/ParaSails/Hash.c index 8be6ab80c..c872e3908 100644 --- a/src/distributed_ls/ParaSails/Hash.c +++ b/src/distributed_ls/ParaSails/Hash.c @@ -11,8 +11,8 @@ * * We allow rehashing the data into a larger or smaller table, and thus * allow a data item (an integer, but a pointer would be more general) - * to be stored with each key in the table. (If we only return the - * storage location of the key in the table (the implied index), then + * to be stored with each key in the table. (If we only return the + * storage location of the key in the table (the implied index), then * rehashing would change the implied indices.) * * The modulus function is used as the hash function. @@ -23,7 +23,6 @@ *****************************************************************************/ #include -#include #include "Common.h" #include "Hash.h" @@ -58,14 +57,14 @@ Hash *HashCreate(HYPRE_Int size) void HashDestroy(Hash *h) { - free(h->keys); - free(h->table); - free(h->data); - free(h); + hypre_TFree(h->keys,HYPRE_MEMORY_HOST); + hypre_TFree(h->table,HYPRE_MEMORY_HOST); + hypre_TFree(h->data,HYPRE_MEMORY_HOST); + hypre_TFree(h,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- - * HashLookup - Look up the "key" in hash table "h" and return the data + * HashLookup - Look up the "key" in hash table "h" and return the data * associated with the key, or return HASH_NOTFOUND. *--------------------------------------------------------------------------*/ @@ -105,7 +104,7 @@ void HashInsert(Hash *h, HYPRE_Int key, HYPRE_Int data) { if (h->table[loc] == HASH_EMPTY) { - assert(h->num < h->size); + hypre_assert(h->num < h->size); h->keys[h->num++] = key; h->table[loc] = key; diff --git a/src/distributed_ls/ParaSails/LoadBal.c b/src/distributed_ls/ParaSails/LoadBal.c index 434a81788..2f6d60c79 100644 --- a/src/distributed_ls/ParaSails/LoadBal.c +++ b/src/distributed_ls/ParaSails/LoadBal.c @@ -11,7 +11,6 @@ * *****************************************************************************/ -#include #include #include "Common.h" #include "Matrix.h" @@ -19,8 +18,8 @@ #include "LoadBal.h" /*-------------------------------------------------------------------------- - * LoadBalInit - determine the amount of work to be donated and received by - * each processor, given the amount of work that each processor has + * LoadBalInit - determine the amount of work to be donated and received by + * each processor, given the amount of work that each processor has * ("local_cost"). The number of processors that this processor will donate * to is "num_given" and the number of processors from which this processor * will receive is "num_taken". Additional donor information is stored in @@ -30,7 +29,7 @@ * beta - target load balance factor *--------------------------------------------------------------------------*/ -void LoadBalInit(MPI_Comm comm, HYPRE_Real local_cost, HYPRE_Real beta, +void LoadBalInit(MPI_Comm comm, HYPRE_Real local_cost, HYPRE_Real beta, HYPRE_Int *num_given, HYPRE_Int *donor_data_pe, HYPRE_Real *donor_data_cost, HYPRE_Int *num_taken) { @@ -118,7 +117,7 @@ void LoadBalInit(MPI_Comm comm, HYPRE_Real local_cost, HYPRE_Real beta, } } - free(cost); + hypre_TFree(cost,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -128,7 +127,7 @@ void LoadBalInit(MPI_Comm comm, HYPRE_Real local_cost, HYPRE_Real beta, *--------------------------------------------------------------------------*/ void LoadBalDonorSend(MPI_Comm comm, Matrix *mat, Numbering *numb, - HYPRE_Int num_given, const HYPRE_Int *donor_data_pe, const HYPRE_Real *donor_data_cost, + HYPRE_Int num_given, const HYPRE_Int *donor_data_pe, const HYPRE_Real *donor_data_cost, DonorData *donor_data, HYPRE_Int *local_beg_row, hypre_MPI_Request *request) { HYPRE_Int send_beg_row, send_end_row; @@ -155,7 +154,7 @@ void LoadBalDonorSend(MPI_Comm comm, Matrix *mat, Numbering *numb, do { send_end_row++; - assert(send_end_row <= mat->end_row); + hypre_assert(send_end_row <= mat->end_row); MatrixGetRow(mat, send_end_row - mat->beg_row, &len, &ind, &val); accum += (HYPRE_Real) len*len*len; buflen += (len+1); /* additional one for row length */ @@ -213,7 +212,7 @@ void LoadBalRecipRecv(MPI_Comm comm, Numbering *numb, hypre_MPI_Get_count(&status, HYPRE_MPI_INT, &count); buffer = hypre_TAlloc(HYPRE_Int, count , HYPRE_MEMORY_HOST); - hypre_MPI_Recv(buffer, count, HYPRE_MPI_INT, recip_data[i].pe, LOADBAL_REQ_TAG, + hypre_MPI_Recv(buffer, count, HYPRE_MPI_INT, recip_data[i].pe, LOADBAL_REQ_TAG, comm, &status); bufferp = buffer; @@ -232,7 +231,7 @@ void LoadBalRecipRecv(MPI_Comm comm, Numbering *numb, bufferp += len; } - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); } } @@ -243,7 +242,7 @@ void LoadBalRecipRecv(MPI_Comm comm, Numbering *numb, * Caller must free the allocated buffers. *--------------------------------------------------------------------------*/ -void LoadBalRecipSend(MPI_Comm comm, HYPRE_Int num_taken, +void LoadBalRecipSend(MPI_Comm comm, HYPRE_Int num_taken, RecipData *recip_data, hypre_MPI_Request *request) { HYPRE_Int i, row, buflen; @@ -289,7 +288,7 @@ void LoadBalRecipSend(MPI_Comm comm, HYPRE_Int num_taken, * Assume indices are in the same order. *--------------------------------------------------------------------------*/ -void LoadBalDonorRecv(MPI_Comm comm, Matrix *mat, +void LoadBalDonorRecv(MPI_Comm comm, Matrix *mat, HYPRE_Int num_given, DonorData *donor_data) { HYPRE_Int i, j, row; @@ -306,7 +305,7 @@ void LoadBalDonorRecv(MPI_Comm comm, Matrix *mat, hypre_MPI_Get_count(&status, hypre_MPI_REAL, &count); buffer = hypre_TAlloc(HYPRE_Real, count , HYPRE_MEMORY_HOST); - hypre_MPI_Recv(buffer, count, hypre_MPI_REAL, source, LOADBAL_REP_TAG, + hypre_MPI_Recv(buffer, count, hypre_MPI_REAL, source, LOADBAL_REP_TAG, comm, &status); /* search for which entry in donor_data this message corresponds to */ @@ -315,7 +314,7 @@ void LoadBalDonorRecv(MPI_Comm comm, Matrix *mat, if (donor_data[j].pe == source) break; } - assert(j < num_given); + hypre_assert(j < num_given); /* Parse the message and put row values into local matrix */ bufferp = buffer; @@ -326,7 +325,7 @@ void LoadBalDonorRecv(MPI_Comm comm, Matrix *mat, bufferp += len; } - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); } } @@ -351,7 +350,7 @@ LoadBal *LoadBalDonate(MPI_Comm comm, Matrix *mat, Numbering *numb, donor_data_pe = hypre_TAlloc(HYPRE_Int, npes , HYPRE_MEMORY_HOST); donor_data_cost = hypre_TAlloc(HYPRE_Real, npes , HYPRE_MEMORY_HOST); - LoadBalInit(comm, local_cost, beta, &p->num_given, + LoadBalInit(comm, local_cost, beta, &p->num_given, donor_data_pe, donor_data_cost, &p->num_taken); p->recip_data = NULL; @@ -370,19 +369,19 @@ LoadBal *LoadBalDonate(MPI_Comm comm, Matrix *mat, Numbering *numb, LoadBalDonorSend(comm, mat, numb, p->num_given, donor_data_pe, donor_data_cost, p->donor_data, &p->beg_row, requests); - free(donor_data_pe); - free(donor_data_cost); + hypre_TFree(donor_data_pe,HYPRE_MEMORY_HOST); + hypre_TFree(donor_data_cost,HYPRE_MEMORY_HOST); LoadBalRecipRecv(comm, numb, p->num_taken, p->recip_data); hypre_MPI_Waitall(p->num_given, requests, statuses); - free(requests); - free(statuses); + hypre_TFree(requests,HYPRE_MEMORY_HOST); + hypre_TFree(statuses,HYPRE_MEMORY_HOST); /* Free the send buffers which were allocated by LoadBalDonorSend */ for (i=0; inum_given; i++) - free(p->donor_data[i].buffer); + hypre_TFree(p->donor_data[i].buffer,HYPRE_MEMORY_HOST); return p; } @@ -410,16 +409,16 @@ void LoadBalReturn(LoadBal *p, MPI_Comm comm, Matrix *mat) hypre_MPI_Waitall(p->num_taken, requests, statuses); - free(requests); - free(statuses); + hypre_TFree(requests,HYPRE_MEMORY_HOST); + hypre_TFree(statuses,HYPRE_MEMORY_HOST); /* Free the send buffers which were allocated by LoadBalRecipSend */ for (i=0; inum_taken; i++) - free(p->recip_data[i].buffer); + hypre_TFree(p->recip_data[i].buffer,HYPRE_MEMORY_HOST); - free(p->donor_data); - free(p->recip_data); + hypre_TFree(p->donor_data,HYPRE_MEMORY_HOST); + hypre_TFree(p->recip_data,HYPRE_MEMORY_HOST); - free(p); + hypre_TFree(p,HYPRE_MEMORY_HOST); } diff --git a/src/distributed_ls/ParaSails/Matrix.c b/src/distributed_ls/ParaSails/Matrix.c index ccf45ce13..ef45ca0a6 100644 --- a/src/distributed_ls/ParaSails/Matrix.c +++ b/src/distributed_ls/ParaSails/Matrix.c @@ -11,16 +11,15 @@ * the matrix nonzeros are copied into the matrix a row at a time, in any * order using the MatrixGetRow function. The MatrixPutRow function returns * a pointer to the indices and values of a row. The matrix has a set of - * row and column indices such that these indices begin at "beg" and end + * row and column indices such that these indices begin at "beg" and end * at "end", where 0 <= "beg" <= "end". In other words, the matrix indices - * have any nonnegative base value, and the base values of the row and column + * have any nonnegative base value, and the base values of the row and column * indices must agree. * *****************************************************************************/ #include -#include -#include +//#include #include "Common.h" #include "Matrix.h" #include "Numbering.h" @@ -33,51 +32,51 @@ Matrix *MatrixCreate(MPI_Comm comm, HYPRE_Int beg_row, HYPRE_Int end_row) { - HYPRE_Int num_rows, mype, npes; + HYPRE_Int num_rows, mype, npes; - Matrix *mat = hypre_TAlloc(Matrix, 1, HYPRE_MEMORY_HOST); + Matrix *mat = hypre_TAlloc(Matrix, 1, HYPRE_MEMORY_HOST); - mat->comm = comm; + mat->comm = comm; - mat->beg_row = beg_row; - mat->end_row = end_row; + mat->beg_row = beg_row; + mat->end_row = end_row; - mat->mem = (Mem *) MemCreate(); + mat->mem = (Mem *) MemCreate(); - num_rows = mat->end_row - mat->beg_row + 1; + num_rows = mat->end_row - mat->beg_row + 1; - mat->lens = (HYPRE_Int *) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int)); - mat->inds = (HYPRE_Int **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int *)); - mat->vals = (HYPRE_Real **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Real *)); + mat->lens = (HYPRE_Int *) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int)); + mat->inds = (HYPRE_Int **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int *)); + mat->vals = (HYPRE_Real **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Real *)); - /* Send beg_row and end_row to all processors */ - /* This is needed in order to map row numbers to processors */ + /* Send beg_row and end_row to all processors */ + /* This is needed in order to map row numbers to processors */ - hypre_MPI_Comm_rank(comm, &mype); - hypre_MPI_Comm_size(comm, &npes); + hypre_MPI_Comm_rank(comm, &mype); + hypre_MPI_Comm_size(comm, &npes); - mat->beg_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); - mat->end_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); + mat->beg_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); + mat->end_rows = (HYPRE_Int *) MemAlloc(mat->mem, npes * sizeof(HYPRE_Int)); - hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, comm); - hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&beg_row, 1, HYPRE_MPI_INT, mat->beg_rows, 1, HYPRE_MPI_INT, comm); + hypre_MPI_Allgather(&end_row, 1, HYPRE_MPI_INT, mat->end_rows, 1, HYPRE_MPI_INT, comm); - mat->num_recv = 0; - mat->num_send = 0; + mat->num_recv = 0; + mat->num_send = 0; - mat->recv_req = NULL; - mat->send_req = NULL; - mat->recv_req2 = NULL; - mat->send_req2 = NULL; - mat->statuses = NULL; - - mat->sendind = NULL; - mat->sendbuf = NULL; - mat->recvbuf = NULL; + mat->recv_req = NULL; + mat->send_req = NULL; + mat->recv_req2 = NULL; + mat->send_req2 = NULL; + mat->statuses = NULL; - mat->numb = NULL; + mat->sendind = NULL; + mat->sendbuf = NULL; + mat->recvbuf = NULL; - return mat; + mat->numb = NULL; + + return mat; } /*-------------------------------------------------------------------------- @@ -87,45 +86,45 @@ Matrix *MatrixCreate(MPI_Comm comm, HYPRE_Int beg_row, HYPRE_Int end_row) Matrix *MatrixCreateLocal(HYPRE_Int beg_row, HYPRE_Int end_row) { - HYPRE_Int num_rows; + HYPRE_Int num_rows; - Matrix *mat = hypre_TAlloc(Matrix, 1, HYPRE_MEMORY_HOST); + Matrix *mat = hypre_TAlloc(Matrix, 1, HYPRE_MEMORY_HOST); - mat->comm = hypre_MPI_COMM_NULL; + mat->comm = hypre_MPI_COMM_NULL; - mat->beg_row = beg_row; - mat->end_row = end_row; + mat->beg_row = beg_row; + mat->end_row = end_row; - mat->mem = (Mem *) MemCreate(); + mat->mem = (Mem *) MemCreate(); - num_rows = mat->end_row - mat->beg_row + 1; + num_rows = mat->end_row - mat->beg_row + 1; - mat->lens = (HYPRE_Int *) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int)); - mat->inds = (HYPRE_Int **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int *)); - mat->vals = (HYPRE_Real **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Real *)); + mat->lens = (HYPRE_Int *) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int)); + mat->inds = (HYPRE_Int **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Int *)); + mat->vals = (HYPRE_Real **) MemAlloc(mat->mem, num_rows * sizeof(HYPRE_Real *)); - /* Send beg_row and end_row to all processors */ - /* This is needed in order to map row numbers to processors */ + /* Send beg_row and end_row to all processors */ + /* This is needed in order to map row numbers to processors */ - mat->beg_rows = NULL; - mat->end_rows = NULL; + mat->beg_rows = NULL; + mat->end_rows = NULL; - mat->num_recv = 0; - mat->num_send = 0; + mat->num_recv = 0; + mat->num_send = 0; - mat->recv_req = NULL; - mat->send_req = NULL; - mat->recv_req2 = NULL; - mat->send_req2 = NULL; - mat->statuses = NULL; + mat->recv_req = NULL; + mat->send_req = NULL; + mat->recv_req2 = NULL; + mat->send_req2 = NULL; + mat->statuses = NULL; - mat->sendind = NULL; - mat->sendbuf = NULL; - mat->recvbuf = NULL; + mat->sendind = NULL; + mat->sendbuf = NULL; + mat->recvbuf = NULL; - mat->numb = NULL; + mat->numb = NULL; - return mat; + return mat; } /*-------------------------------------------------------------------------- @@ -134,69 +133,69 @@ Matrix *MatrixCreateLocal(HYPRE_Int beg_row, HYPRE_Int end_row) void MatrixDestroy(Matrix *mat) { - HYPRE_Int i; + HYPRE_Int i; - for (i=0; inum_recv; i++) - hypre_MPI_Request_free(&mat->recv_req[i]); + for (i=0; inum_recv; i++) + hypre_MPI_Request_free(&mat->recv_req[i]); - for (i=0; inum_send; i++) - hypre_MPI_Request_free(&mat->send_req[i]); + for (i=0; inum_send; i++) + hypre_MPI_Request_free(&mat->send_req[i]); - for (i=0; inum_send; i++) - hypre_MPI_Request_free(&mat->recv_req2[i]); + for (i=0; inum_send; i++) + hypre_MPI_Request_free(&mat->recv_req2[i]); - for (i=0; inum_recv; i++) - hypre_MPI_Request_free(&mat->send_req2[i]); + for (i=0; inum_recv; i++) + hypre_MPI_Request_free(&mat->send_req2[i]); - free(mat->recv_req); - free(mat->send_req); - free(mat->recv_req2); - free(mat->send_req2); - free(mat->statuses); + hypre_TFree(mat->recv_req,HYPRE_MEMORY_HOST); + hypre_TFree(mat->send_req,HYPRE_MEMORY_HOST); + hypre_TFree(mat->recv_req2,HYPRE_MEMORY_HOST); + hypre_TFree(mat->send_req2,HYPRE_MEMORY_HOST); + hypre_TFree(mat->statuses,HYPRE_MEMORY_HOST); - free(mat->sendind); - free(mat->sendbuf); - free(mat->recvbuf); + hypre_TFree(mat->sendind,HYPRE_MEMORY_HOST); + hypre_TFree(mat->sendbuf,HYPRE_MEMORY_HOST); + hypre_TFree(mat->recvbuf,HYPRE_MEMORY_HOST); - MemDestroy(mat->mem); + MemDestroy(mat->mem); - if (mat->numb) - NumberingDestroy(mat->numb); + if (mat->numb) + NumberingDestroy(mat->numb); - free(mat); + hypre_TFree(mat,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- * MatrixSetRow - Set a row in a matrix. Only local rows can be set. - * Once a row has been set, it should not be set again, or else the - * memory used by the existing row will not be recovered until + * Once a row has been set, it should not be set again, or else the + * memory used by the existing row will not be recovered until * the matrix is destroyed. "row" is in global coordinate numbering. *--------------------------------------------------------------------------*/ void MatrixSetRow(Matrix *mat, HYPRE_Int row, HYPRE_Int len, HYPRE_Int *ind, HYPRE_Real *val) { - row -= mat->beg_row; + row -= mat->beg_row; - mat->lens[row] = len; - mat->inds[row] = (HYPRE_Int *) MemAlloc(mat->mem, len*sizeof(HYPRE_Int)); - mat->vals[row] = (HYPRE_Real *) MemAlloc(mat->mem, len*sizeof(HYPRE_Real)); + mat->lens[row] = len; + mat->inds[row] = (HYPRE_Int *) MemAlloc(mat->mem, len*sizeof(HYPRE_Int)); + mat->vals[row] = (HYPRE_Real *) MemAlloc(mat->mem, len*sizeof(HYPRE_Real)); - if (ind != NULL) - hypre_TMemcpy(mat->inds[row], ind, HYPRE_Int, len, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + if (ind != NULL) + hypre_TMemcpy(mat->inds[row], ind, HYPRE_Int, len, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - if (val != NULL) - hypre_TMemcpy(mat->vals[row], val, HYPRE_Real, len, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + if (val != NULL) + hypre_TMemcpy(mat->vals[row], val, HYPRE_Real, len, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- - * MatrixGetRow - Get a *local* row in a matrix. + * MatrixGetRow - Get a *local* row in a matrix. *--------------------------------------------------------------------------*/ void MatrixGetRow(Matrix *mat, HYPRE_Int row, HYPRE_Int *lenp, HYPRE_Int **indp, HYPRE_Real **valp) { - *lenp = mat->lens[row]; - *indp = mat->inds[row]; - *valp = mat->vals[row]; + *lenp = mat->lens[row]; + *indp = mat->inds[row]; + *valp = mat->vals[row]; } /*-------------------------------------------------------------------------- @@ -205,23 +204,23 @@ void MatrixGetRow(Matrix *mat, HYPRE_Int row, HYPRE_Int *lenp, HYPRE_Int **indp, HYPRE_Int MatrixRowPe(Matrix *mat, HYPRE_Int row) { - HYPRE_Int npes, pe; + HYPRE_Int npes, pe; - HYPRE_Int *beg = mat->beg_rows; - HYPRE_Int *end = mat->end_rows; + HYPRE_Int *beg = mat->beg_rows; + HYPRE_Int *end = mat->end_rows; - hypre_MPI_Comm_size(mat->comm, &npes); + hypre_MPI_Comm_size(mat->comm, &npes); - for (pe=0; pe= beg[pe] && row <= end[pe]) - return pe; - } + for (pe=0; pe= beg[pe] && row <= end[pe]) + return pe; + } - hypre_printf("MatrixRowPe: could not map row %d.\n", row); - PARASAILS_EXIT; + hypre_printf("MatrixRowPe: could not map row %d.\n", row); + PARASAILS_EXIT; - return -1; /* for picky compilers */ + return -1; /* for picky compilers */ } /*-------------------------------------------------------------------------- @@ -230,55 +229,55 @@ HYPRE_Int MatrixRowPe(Matrix *mat, HYPRE_Int row) HYPRE_Int MatrixNnz(Matrix *mat) { - HYPRE_Int num_local, i, total, alltotal; + HYPRE_Int num_local, i, total, alltotal; - num_local = mat->end_row - mat->beg_row + 1; + num_local = mat->end_row - mat->beg_row + 1; - total = 0; - for (i=0; ilens[i]; + total = 0; + for (i=0; ilens[i]; - hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, mat->comm); + hypre_MPI_Allreduce(&total, &alltotal, 1, HYPRE_MPI_INT, hypre_MPI_SUM, mat->comm); - return alltotal; + return alltotal; } /*-------------------------------------------------------------------------- - * MatrixPrint - Print a matrix to a file "filename". Each processor + * MatrixPrint - Print a matrix to a file "filename". Each processor * appends to the file in order, but the file is overwritten if it exists. *--------------------------------------------------------------------------*/ void MatrixPrint(Matrix *mat, char *filename) { - HYPRE_Int mype, npes, pe; - HYPRE_Int row, i, len, *ind; - HYPRE_Real *val; - - hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Comm_size(mat->comm, &npes); - - for (pe=0; pecomm); - - if (mype == pe) - { - FILE *file = fopen(filename, (pe==0 ? "w" : "a")); - assert(file != NULL); - - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); - - for (i=0; ibeg_row, - mat->numb->local_to_global[ind[i]], val[i]); - } - - fclose(file); - } - } + HYPRE_Int mype, npes, pe; + HYPRE_Int row, i, len, *ind; + HYPRE_Real *val; + + hypre_MPI_Comm_rank(mat->comm, &mype); + hypre_MPI_Comm_size(mat->comm, &npes); + + for (pe=0; pecomm); + + if (mype == pe) + { + FILE *file = fopen(filename, (pe==0 ? "w" : "a")); + hypre_assert(file != NULL); + + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); + + for (i=0; ibeg_row, + mat->numb->local_to_global[ind[i]], val[i]); + } + + fclose(file); + } + } } /*-------------------------------------------------------------------------- @@ -287,117 +286,126 @@ void MatrixPrint(Matrix *mat, char *filename) static void MatrixReadMaster(Matrix *mat, char *filename) { - MPI_Comm comm = mat->comm; - HYPRE_Int mype, npes; - FILE *file; - HYPRE_Int ret; - HYPRE_Int num_rows, curr_proc; - HYPRE_Int row, col; - HYPRE_Real value; - hypre_longint offset; - hypre_longint outbuf; - - HYPRE_Int curr_row; - HYPRE_Int len; - HYPRE_Int ind[MAX_NZ_PER_ROW]; - HYPRE_Real val[MAX_NZ_PER_ROW]; - - char line[100]; - HYPRE_Int oldrow; - - hypre_MPI_Request request; - hypre_MPI_Status status; - - hypre_MPI_Comm_size(mat->comm, &npes); - hypre_MPI_Comm_rank(mat->comm, &mype); - - file = fopen(filename, "r"); - assert(file != NULL); - - fgets(line, 100, file); + MPI_Comm comm = mat->comm; + HYPRE_Int mype, npes; + FILE *file; + HYPRE_Int ret; + HYPRE_Int num_rows, curr_proc; + HYPRE_Int row, col; + HYPRE_Real value; + hypre_longint offset; + hypre_longint outbuf; + + HYPRE_Int curr_row; + HYPRE_Int len; + HYPRE_Int ind[MAX_NZ_PER_ROW]; + HYPRE_Real val[MAX_NZ_PER_ROW]; + + char line[100]; + HYPRE_Int oldrow; + + hypre_MPI_Request request; + hypre_MPI_Status status; + + hypre_MPI_Comm_size(mat->comm, &npes); + hypre_MPI_Comm_rank(mat->comm, &mype); + + file = fopen(filename, "r"); + hypre_assert(file != NULL); + + if (fgets(line, 100, file) == NULL) + { + hypre_fprintf(stderr, "Error reading file.\n"); + PARASAILS_EXIT; + } + #ifdef EMSOLVE - ret = hypre_sscanf(line, "%*d %d %*d %*d", &num_rows); - for (row=0; rowbeg_rows[curr_proc]) - { - hypre_MPI_Wait(&request, &status); - outbuf = offset; - hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, comm, &request); - curr_proc++; - } - offset = ftell(file); - oldrow = row; - hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - if (oldrow > row) - { - hypre_fprintf(stderr, "Matrix file is not sorted by rows.\n"); - PARASAILS_EXIT; - } - } - - /* Now read our own part */ - rewind(file); - - fgets(line, 100, file); + offset = ftell(file); + hypre_fscanf(file, "%d %d %lf", &row, &col, &value); + + request = hypre_MPI_REQUEST_NULL; + curr_proc = 1; /* proc for which we are looking for the beginning */ + while (curr_proc < npes) + { + if (row == mat->beg_rows[curr_proc]) + { + hypre_MPI_Wait(&request, &status); + outbuf = offset; + hypre_MPI_Isend(&outbuf, 1, hypre_MPI_LONG, curr_proc, 0, comm, &request); + curr_proc++; + } + offset = ftell(file); + oldrow = row; + hypre_fscanf(file, "%d %d %lf", &row, &col, &value); + if (oldrow > row) + { + hypre_fprintf(stderr, "Matrix file is not sorted by rows.\n"); + PARASAILS_EXIT; + } + } + + /* Now read our own part */ + rewind(file); + if (fgets(line, 100, file) == NULL) + { + hypre_fprintf(stderr, "Error reading file.\n"); + PARASAILS_EXIT; + } + #ifdef EMSOLVE - ret = hypre_sscanf(line, "%*d %d %*d %*d", &num_rows); - for (row=0; rowend_row) - { - if (row != curr_row) - { - /* store this row */ - MatrixSetRow(mat, curr_row, len, ind, val); + while (ret != EOF && row <= mat->end_row) + { + if (row != curr_row) + { + /* store this row */ + MatrixSetRow(mat, curr_row, len, ind, val); - curr_row = row; + curr_row = row; - /* reset row pointer */ - len = 0; - } + /* reset row pointer */ + len = 0; + } - if (len >= MAX_NZ_PER_ROW) - { - hypre_fprintf(stderr, "The matrix has exceeded %d\n", MAX_NZ_PER_ROW); - hypre_fprintf(stderr, "nonzeros per row. Internal buffers must be\n"); - hypre_fprintf(stderr, "increased to continue.\n"); - PARASAILS_EXIT; - } + if (len >= MAX_NZ_PER_ROW) + { + hypre_fprintf(stderr, "The matrix has exceeded %d\n", MAX_NZ_PER_ROW); + hypre_fprintf(stderr, "nonzeros per row. Internal buffers must be\n"); + hypre_fprintf(stderr, "increased to continue.\n"); + PARASAILS_EXIT; + } - ind[len] = col; - val[len] = value; - len++; + ind[len] = col; + val[len] = value; + len++; - ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - } + ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); + } - /* Store the final row */ - if (ret == EOF || row > mat->end_row) - MatrixSetRow(mat, mat->end_row, len, ind, val); + /* Store the final row */ + if (ret == EOF || row > mat->end_row) + MatrixSetRow(mat, mat->end_row, len, ind, val); - fclose(file); + fclose(file); - hypre_MPI_Wait(&request, &status); + hypre_MPI_Wait(&request, &status); } /*-------------------------------------------------------------------------- @@ -406,76 +414,76 @@ static void MatrixReadMaster(Matrix *mat, char *filename) static void MatrixReadSlave(Matrix *mat, char *filename) { - MPI_Comm comm = mat->comm; - hypre_MPI_Status status; - HYPRE_Int mype; - FILE *file; - HYPRE_Int ret; - HYPRE_Int row, col; - HYPRE_Real value; - hypre_longint offset; - - HYPRE_Int curr_row; - HYPRE_Int len; - HYPRE_Int ind[MAX_NZ_PER_ROW]; - HYPRE_Real val[MAX_NZ_PER_ROW]; - - HYPRE_Real time0, time1; - - file = fopen(filename, "r"); - assert(file != NULL); - - hypre_MPI_Comm_rank(mat->comm, &mype); - - hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, comm, &status); - time0 = hypre_MPI_Wtime(); - - ret = fseek(file, offset, SEEK_SET); - assert(ret == 0); - - ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - curr_row = row; - len = 0; - - while (ret != EOF && row <= mat->end_row) - { - if (row != curr_row) - { - /* store this row */ - MatrixSetRow(mat, curr_row, len, ind, val); - - curr_row = row; - - /* reset row pointer */ - len = 0; - } - - if (len >= MAX_NZ_PER_ROW) - { - hypre_fprintf(stderr, "The matrix has exceeded %d\n", MAX_NZ_PER_ROW); - hypre_fprintf(stderr, "nonzeros per row. Internal buffers must be\n"); - hypre_fprintf(stderr, "increased to continue.\n"); - PARASAILS_EXIT; - } - - ind[len] = col; - val[len] = value; - len++; - - ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); - } - - /* Store the final row */ - if (ret == EOF || row > mat->end_row) - MatrixSetRow(mat, mat->end_row, len, ind, val); - - fclose(file); - time1 = hypre_MPI_Wtime(); - hypre_printf("%d: Time for slave read: %f\n", mype, time1-time0); + MPI_Comm comm = mat->comm; + hypre_MPI_Status status; + HYPRE_Int mype; + FILE *file; + HYPRE_Int ret; + HYPRE_Int row, col; + HYPRE_Real value; + hypre_longint offset; + + HYPRE_Int curr_row; + HYPRE_Int len; + HYPRE_Int ind[MAX_NZ_PER_ROW]; + HYPRE_Real val[MAX_NZ_PER_ROW]; + + HYPRE_Real time0, time1; + + file = fopen(filename, "r"); + hypre_assert(file != NULL); + + hypre_MPI_Comm_rank(mat->comm, &mype); + + hypre_MPI_Recv(&offset, 1, hypre_MPI_LONG, 0, 0, comm, &status); + time0 = hypre_MPI_Wtime(); + + ret = fseek(file, offset, SEEK_SET); + hypre_assert(ret == 0); + + ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); + curr_row = row; + len = 0; + + while (ret != EOF && row <= mat->end_row) + { + if (row != curr_row) + { + /* store this row */ + MatrixSetRow(mat, curr_row, len, ind, val); + + curr_row = row; + + /* reset row pointer */ + len = 0; + } + + if (len >= MAX_NZ_PER_ROW) + { + hypre_fprintf(stderr, "The matrix has exceeded %d\n", MAX_NZ_PER_ROW); + hypre_fprintf(stderr, "nonzeros per row. Internal buffers must be\n"); + hypre_fprintf(stderr, "increased to continue.\n"); + PARASAILS_EXIT; + } + + ind[len] = col; + val[len] = value; + len++; + + ret = hypre_fscanf(file, "%d %d %lf", &row, &col, &value); + } + + /* Store the final row */ + if (ret == EOF || row > mat->end_row) + MatrixSetRow(mat, mat->end_row, len, ind, val); + + fclose(file); + time1 = hypre_MPI_Wtime(); + hypre_printf("%d: Time for slave read: %f\n", mype, time1-time0); } /*-------------------------------------------------------------------------- - * MatrixRead - Read a matrix file "filename" from disk and store in the + * MatrixRead - Read a matrix file "filename" from disk and store in the * matrix "mat" which has already been created using MatrixCreate. The format * assumes no nonzero rows, the rows are in order, and there will be at least * one row per processor. @@ -483,24 +491,24 @@ static void MatrixReadSlave(Matrix *mat, char *filename) void MatrixRead(Matrix *mat, char *filename) { - HYPRE_Int mype; - HYPRE_Real time0, time1; + HYPRE_Int mype; + HYPRE_Real time0, time1; - hypre_MPI_Comm_rank(mat->comm, &mype); + hypre_MPI_Comm_rank(mat->comm, &mype); - time0 = hypre_MPI_Wtime(); - if (mype == 0) - MatrixReadMaster(mat, filename); - else - MatrixReadSlave(mat, filename); - time1 = hypre_MPI_Wtime(); - hypre_printf("%d: Time for reading matrix: %f\n", mype, time1-time0); + time0 = hypre_MPI_Wtime(); + if (mype == 0) + MatrixReadMaster(mat, filename); + else + MatrixReadSlave(mat, filename); + time1 = hypre_MPI_Wtime(); + hypre_printf("%d: Time for reading matrix: %f\n", mype, time1-time0); - MatrixComplete(mat); + MatrixComplete(mat); } /*-------------------------------------------------------------------------- - * RhsRead - Read a right-hand side file "filename" from disk and store in the + * RhsRead - Read a right-hand side file "filename" from disk and store in the * location pointed to by "rhs". "mat" is needed to provide the partitioning * information. The expected format is: a header line (n, nrhs) followed * by n values. Also allows isis format, indicated by 1 HYPRE_Int in first line. @@ -508,61 +516,65 @@ void MatrixRead(Matrix *mat, char *filename) void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) { - FILE *file; - hypre_MPI_Status status; - HYPRE_Int mype, npes; - HYPRE_Int num_rows, num_local, pe, i, converted; - HYPRE_Real *buffer = NULL; - HYPRE_Int buflen = 0; - char line[100]; - HYPRE_Int dummy; - - hypre_MPI_Comm_size(mat->comm, &npes); - hypre_MPI_Comm_rank(mat->comm, &mype); - - num_local = mat->end_row - mat->beg_row + 1; - - if (mype != 0) - { - hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, mat->comm, &status); - return; - } - - file = fopen(filename, "r"); - assert(file != NULL); - - fgets(line, 100, file); - converted = hypre_sscanf(line, "%d %d", &num_rows, &dummy); - assert(num_rows == mat->end_rows[npes-1]); - - /* Read own rows first */ - for (i=0; iend_rows[pe] - mat->beg_rows[pe]+ 1; - - if (buflen < num_local) - { - free(buffer); - buflen = num_local; - buffer = hypre_TAlloc(HYPRE_Real, buflen , HYPRE_MEMORY_HOST); - } - - for (i=0; icomm, &npes); + hypre_MPI_Comm_rank(mat->comm, &mype); + + num_local = mat->end_row - mat->beg_row + 1; + + if (mype != 0) + { + hypre_MPI_Recv(rhs, num_local, hypre_MPI_REAL, 0, 0, mat->comm, &status); + return; + } + + file = fopen(filename, "r"); + hypre_assert(file != NULL); + + if (fgets(line, 100, file) == NULL) + { + hypre_fprintf(stderr, "Error reading file.\n"); + PARASAILS_EXIT; + } + converted = hypre_sscanf(line, "%d %d", &num_rows, &dummy); + hypre_assert(num_rows == mat->end_rows[npes-1]); + + /* Read own rows first */ + for (i=0; iend_rows[pe] - mat->beg_rows[pe]+ 1; + + if (buflen < num_local) + { + hypre_TFree(buffer,HYPRE_MEMORY_HOST); + buflen = num_local; + buffer = hypre_TAlloc(HYPRE_Real, buflen , HYPRE_MEMORY_HOST); + } + + for (i=0; icomm); - } + hypre_MPI_Send(buffer, num_local, hypre_MPI_REAL, pe, 0, mat->comm); + } - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -571,49 +583,49 @@ void RhsRead(HYPRE_Real *rhs, Matrix *mat, char *filename) static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPRE_Int *outlist) { - HYPRE_Int i, j, this_pe, mype; - hypre_MPI_Request request; - MPI_Comm comm = mat->comm; - HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; + HYPRE_Int i, j, this_pe, mype; + hypre_MPI_Request request; + MPI_Comm comm = mat->comm; + HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; - hypre_MPI_Comm_rank(comm, &mype); + hypre_MPI_Comm_rank(comm, &mype); - mat->num_recv = 0; + mat->num_recv = 0; - /* Allocate recvbuf */ - /* recvbuf has numlocal entires saved for local part of x, used in matvec */ - mat->recvlen = reqlen; /* used for the transpose multiply */ - mat->recvbuf = hypre_TAlloc(HYPRE_Real, (reqlen+num_local) , HYPRE_MEMORY_HOST); + /* Allocate recvbuf */ + /* recvbuf has numlocal entires saved for local part of x, used in matvec */ + mat->recvlen = reqlen; /* used for the transpose multiply */ + mat->recvbuf = hypre_TAlloc(HYPRE_Real, (reqlen+num_local) , HYPRE_MEMORY_HOST); - for (i=0; ibeg_rows[this_pe] || - reqind[j] > mat->end_rows[this_pe]) - break; - } + /* Figure out other rows we need from this_pe */ + for (j=i+1; jbeg_rows[this_pe] || + reqind[j] > mat->end_rows[this_pe]) + break; + } - /* Request rows in reqind[i..j-1] */ - hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm, &request); - hypre_MPI_Request_free(&request); + /* Request rows in reqind[i..j-1] */ + hypre_MPI_Isend(&reqind[i], j-i, HYPRE_MPI_INT, this_pe, 444, comm, &request); + hypre_MPI_Request_free(&request); - /* Count of number of number of indices needed from this_pe */ - outlist[this_pe] = j-i; + /* Count of number of number of indices needed from this_pe */ + outlist[this_pe] = j-i; - hypre_MPI_Recv_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 555, - comm, &mat->recv_req[mat->num_recv]); + hypre_MPI_Recv_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 555, + comm, &mat->recv_req[mat->num_recv]); - hypre_MPI_Send_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 666, - comm, &mat->send_req2[mat->num_recv]); + hypre_MPI_Send_init(&mat->recvbuf[i+num_local], j-i, hypre_MPI_REAL, this_pe, 666, + comm, &mat->send_req2[mat->num_recv]); - mat->num_recv++; - } + mat->num_recv++; + } } /*-------------------------------------------------------------------------- @@ -623,61 +635,61 @@ static void SetupReceives(Matrix *mat, HYPRE_Int reqlen, HYPRE_Int *reqind, HYPR static void SetupSends(Matrix *mat, HYPRE_Int *inlist) { - HYPRE_Int i, j, mype, npes; - hypre_MPI_Request *requests; - hypre_MPI_Status *statuses; - MPI_Comm comm = mat->comm; - - hypre_MPI_Comm_rank(comm, &mype); - hypre_MPI_Comm_size(comm, &npes); - - requests = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); - statuses = hypre_TAlloc(hypre_MPI_Status, npes , HYPRE_MEMORY_HOST); - - /* Determine size of and allocate sendbuf and sendind */ - mat->sendlen = 0; - for (i=0; isendlen += inlist[i]; - mat->sendbuf = NULL; - mat->sendind = NULL; - if (mat->sendlen) - { - mat->sendbuf = hypre_TAlloc(HYPRE_Real, mat->sendlen , HYPRE_MEMORY_HOST); - mat->sendind = hypre_TAlloc(HYPRE_Int, mat->sendlen , HYPRE_MEMORY_HOST); - } - - j = 0; - mat->num_send = 0; - for (i=0; isendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm, - &requests[mat->num_send]); - - /* Set up the send */ - hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm, - &mat->send_req[mat->num_send]); - - /* Set up the receive for the transpose */ - hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, comm, - &mat->recv_req2[mat->num_send]); - - mat->num_send++; - j += inlist[i]; - } - - } - - hypre_MPI_Waitall(mat->num_send, requests, statuses); - free(requests); - free(statuses); - - /* convert global indices to local indices */ - /* these are all indices on this processor */ - for (i=0; isendlen; i++) - mat->sendind[i] -= mat->beg_row; + HYPRE_Int i, j, mype, npes; + hypre_MPI_Request *requests; + hypre_MPI_Status *statuses; + MPI_Comm comm = mat->comm; + + hypre_MPI_Comm_rank(comm, &mype); + hypre_MPI_Comm_size(comm, &npes); + + requests = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); + statuses = hypre_TAlloc(hypre_MPI_Status, npes , HYPRE_MEMORY_HOST); + + /* Determine size of and allocate sendbuf and sendind */ + mat->sendlen = 0; + for (i=0; isendlen += inlist[i]; + mat->sendbuf = NULL; + mat->sendind = NULL; + if (mat->sendlen) + { + mat->sendbuf = hypre_TAlloc(HYPRE_Real, mat->sendlen , HYPRE_MEMORY_HOST); + mat->sendind = hypre_TAlloc(HYPRE_Int, mat->sendlen , HYPRE_MEMORY_HOST); + } + + j = 0; + mat->num_send = 0; + for (i=0; isendind[j], inlist[i], HYPRE_MPI_INT, i, 444, comm, + &requests[mat->num_send]); + + /* Set up the send */ + hypre_MPI_Send_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 555, comm, + &mat->send_req[mat->num_send]); + + /* Set up the receive for the transpose */ + hypre_MPI_Recv_init(&mat->sendbuf[j], inlist[i], hypre_MPI_REAL, i, 666, comm, + &mat->recv_req2[mat->num_send]); + + mat->num_send++; + j += inlist[i]; + } + + } + + hypre_MPI_Waitall(mat->num_send, requests, statuses); + hypre_TFree(requests,HYPRE_MEMORY_HOST); + hypre_TFree(statuses,HYPRE_MEMORY_HOST); + + /* convert global indices to local indices */ + /* these are all indices on this processor */ + for (i=0; isendlen; i++) + mat->sendind[i] -= mat->beg_row; } /*-------------------------------------------------------------------------- @@ -686,42 +698,42 @@ static void SetupSends(Matrix *mat, HYPRE_Int *inlist) void MatrixComplete(Matrix *mat) { - HYPRE_Int mype, npes; - HYPRE_Int *outlist, *inlist; - HYPRE_Int row, len, *ind; - HYPRE_Real *val; + HYPRE_Int mype, npes; + HYPRE_Int *outlist, *inlist; + HYPRE_Int row, len, *ind; + HYPRE_Real *val; - hypre_MPI_Comm_rank(mat->comm, &mype); - hypre_MPI_Comm_size(mat->comm, &npes); + hypre_MPI_Comm_rank(mat->comm, &mype); + hypre_MPI_Comm_size(mat->comm, &npes); - mat->recv_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); - mat->send_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); - mat->recv_req2 = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); - mat->send_req2 = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); - mat->statuses = hypre_TAlloc(hypre_MPI_Status, npes , HYPRE_MEMORY_HOST); + mat->recv_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); + mat->send_req = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); + mat->recv_req2 = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); + mat->send_req2 = hypre_TAlloc(hypre_MPI_Request, npes , HYPRE_MEMORY_HOST); + mat->statuses = hypre_TAlloc(hypre_MPI_Status, npes , HYPRE_MEMORY_HOST); - outlist = hypre_CTAlloc(HYPRE_Int, npes, HYPRE_MEMORY_HOST); - inlist = hypre_CTAlloc(HYPRE_Int, npes, HYPRE_MEMORY_HOST); + outlist = hypre_CTAlloc(HYPRE_Int, npes, HYPRE_MEMORY_HOST); + inlist = hypre_CTAlloc(HYPRE_Int, npes, HYPRE_MEMORY_HOST); - /* Create Numbering object */ - mat->numb = NumberingCreate(mat, PARASAILS_NROWS); + /* Create Numbering object */ + mat->numb = NumberingCreate(mat, PARASAILS_NROWS); - SetupReceives(mat, mat->numb->num_ind - mat->numb->num_loc, - &mat->numb->local_to_global[mat->numb->num_loc], outlist); + SetupReceives(mat, mat->numb->num_ind - mat->numb->num_loc, + &mat->numb->local_to_global[mat->numb->num_loc], outlist); - hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, mat->comm); + hypre_MPI_Alltoall(outlist, 1, HYPRE_MPI_INT, inlist, 1, HYPRE_MPI_INT, mat->comm); - SetupSends(mat, inlist); + SetupSends(mat, inlist); - free(outlist); - free(inlist); + hypre_TFree(outlist,HYPRE_MEMORY_HOST); + hypre_TFree(inlist,HYPRE_MEMORY_HOST); - /* Convert to local indices */ - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); - NumberingGlobalToLocal(mat->numb, len, ind, ind); - } + /* Convert to local indices */ + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); + NumberingGlobalToLocal(mat->numb, len, ind, ind); + } } /*-------------------------------------------------------------------------- @@ -731,83 +743,83 @@ void MatrixComplete(Matrix *mat) void MatrixMatvec(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) { - HYPRE_Int row, i, len, *ind; - HYPRE_Real *val, temp; - HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; + HYPRE_Int row, i, len, *ind; + HYPRE_Real *val, temp; + HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; - /* Set up persistent communications */ + /* Set up persistent communications */ - /* Assumes MatrixComplete has been called */ + /* Assumes MatrixComplete has been called */ - /* Put components of x into the right outgoing buffers */ - for (i=0; isendlen; i++) - mat->sendbuf[i] = x[mat->sendind[i]]; + /* Put components of x into the right outgoing buffers */ + for (i=0; isendlen; i++) + mat->sendbuf[i] = x[mat->sendind[i]]; - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - hypre_MPI_Startall(mat->num_send, mat->send_req); + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + hypre_MPI_Startall(mat->num_send, mat->send_req); - /* Copy local part of x into top part of recvbuf */ - for (i=0; irecvbuf[i] = x[i]; + /* Copy local part of x into top part of recvbuf */ + for (i=0; irecvbuf[i] = x[i]; - hypre_MPI_Waitall(mat->num_recv, mat->recv_req, mat->statuses); + hypre_MPI_Waitall(mat->num_recv, mat->recv_req, mat->statuses); - /* do the multiply */ + /* do the multiply */ #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(row,len,ind,val,temp,i) schedule(static) #endif - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); - - temp = 0.0; - for (i=0; irecvbuf[ind[i]]; - } - y[row] = temp; - } - - hypre_MPI_Waitall(mat->num_send, mat->send_req, mat->statuses); + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); + + temp = 0.0; + for (i=0; irecvbuf[ind[i]]; + } + y[row] = temp; + } + + hypre_MPI_Waitall(mat->num_send, mat->send_req, mat->statuses); } void MatrixMatvecSerial(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) { - HYPRE_Int row, i, len, *ind; - HYPRE_Real *val, temp; - HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; + HYPRE_Int row, i, len, *ind; + HYPRE_Real *val, temp; + HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; - /* Set up persistent communications */ + /* Set up persistent communications */ - /* Assumes MatrixComplete has been called */ + /* Assumes MatrixComplete has been called */ - /* Put components of x into the right outgoing buffers */ - for (i=0; isendlen; i++) - mat->sendbuf[i] = x[mat->sendind[i]]; + /* Put components of x into the right outgoing buffers */ + for (i=0; isendlen; i++) + mat->sendbuf[i] = x[mat->sendind[i]]; - hypre_MPI_Startall(mat->num_recv, mat->recv_req); - hypre_MPI_Startall(mat->num_send, mat->send_req); + hypre_MPI_Startall(mat->num_recv, mat->recv_req); + hypre_MPI_Startall(mat->num_send, mat->send_req); - /* Copy local part of x into top part of recvbuf */ - for (i=0; irecvbuf[i] = x[i]; + /* Copy local part of x into top part of recvbuf */ + for (i=0; irecvbuf[i] = x[i]; - hypre_MPI_Waitall(mat->num_recv, mat->recv_req, mat->statuses); + hypre_MPI_Waitall(mat->num_recv, mat->recv_req, mat->statuses); - /* do the multiply */ - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); + /* do the multiply */ + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); - temp = 0.0; - for (i=0; irecvbuf[ind[i]]; - } - y[row] = temp; - } + temp = 0.0; + for (i=0; irecvbuf[ind[i]]; + } + y[row] = temp; + } - hypre_MPI_Waitall(mat->num_send, mat->send_req, mat->statuses); + hypre_MPI_Waitall(mat->num_send, mat->send_req, mat->statuses); } /*-------------------------------------------------------------------------- @@ -817,45 +829,45 @@ void MatrixMatvecSerial(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) void MatrixMatvecTrans(Matrix *mat, HYPRE_Real *x, HYPRE_Real *y) { - HYPRE_Int row, i, len, *ind; - HYPRE_Real *val; - HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; + HYPRE_Int row, i, len, *ind; + HYPRE_Real *val; + HYPRE_Int num_local = mat->end_row - mat->beg_row + 1; - /* Set up persistent communications */ + /* Set up persistent communications */ - /* Assumes MatrixComplete has been called */ + /* Assumes MatrixComplete has been called */ - /* Post receives for local parts of the solution y */ - hypre_MPI_Startall(mat->num_send, mat->recv_req2); + /* Post receives for local parts of the solution y */ + hypre_MPI_Startall(mat->num_send, mat->recv_req2); - /* initialize accumulator buffer to zero */ - for (i=0; irecvlen+num_local; i++) - mat->recvbuf[i] = 0.0; + /* initialize accumulator buffer to zero */ + for (i=0; irecvlen+num_local; i++) + mat->recvbuf[i] = 0.0; - /* do the multiply */ - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); + /* do the multiply */ + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); - for (i=0; irecvbuf[ind[i]] += val[i] * x[row]; - } - } + for (i=0; irecvbuf[ind[i]] += val[i] * x[row]; + } + } - /* Now can send nonlocal parts of solution to other procs */ - hypre_MPI_Startall(mat->num_recv, mat->send_req2); + /* Now can send nonlocal parts of solution to other procs */ + hypre_MPI_Startall(mat->num_recv, mat->send_req2); - /* copy local part of solution into y */ - for (i=0; irecvbuf[i]; + /* copy local part of solution into y */ + for (i=0; irecvbuf[i]; - /* alternatively, loop over a wait any */ - hypre_MPI_Waitall(mat->num_send, mat->recv_req2, mat->statuses); + /* alternatively, loop over a wait any */ + hypre_MPI_Waitall(mat->num_send, mat->recv_req2, mat->statuses); - /* add all the incoming partial sums to y */ - for (i=0; isendlen; i++) - y[mat->sendind[i]] += mat->sendbuf[i]; + /* add all the incoming partial sums to y */ + for (i=0; isendlen; i++) + y[mat->sendind[i]] += mat->sendbuf[i]; - hypre_MPI_Waitall(mat->num_recv, mat->send_req2, mat->statuses); + hypre_MPI_Waitall(mat->num_recv, mat->send_req2, mat->statuses); } diff --git a/src/distributed_ls/ParaSails/Mem.c b/src/distributed_ls/ParaSails/Mem.c index e6cc56e29..2edb8056d 100644 --- a/src/distributed_ls/ParaSails/Mem.c +++ b/src/distributed_ls/ParaSails/Mem.c @@ -10,19 +10,19 @@ * Mem - Memory pool for aggregate data with unknown total size at creation. * For example, a sparse matrix may be constructed one row at a time, which * do not need to be stored contiguously in memory. MemAlloc may be called - * for each row that needs to be stored, and space is allocated from the + * for each row that needs to be stored, and space is allocated from the * memory pool (individual requests are not made to the operating system). * Memory from the memory pool is freed entirely at once. * * Memory is requested from the operating system in blocks of 1 Mbyte * by default. This default must be changed if requests of more than * 1 Mbyte will be made, or if large requests (e.g., 0.5 Mbytes) will - * be made, in order to efficiently use the memory block. Up to 1000 - * blocks can be allocated, by default, giving a total of 1 Gbyte of + * be made, in order to efficiently use the memory block. Up to 1000 + * blocks can be allocated, by default, giving a total of 1 Gbyte of * memory. Actual storage will be less, and this can be determined by - * a call to MemStat. + * a call to MemStat. * - * If much less than 1 Mbyte is required or if the exact size of the + * If much less than 1 Mbyte is required or if the exact size of the * aggregate data is known, this these routines should not be used. * * Note that the size requested will be rounded up to the nearest multiple @@ -30,7 +30,6 @@ * *****************************************************************************/ -#include #include #include "Common.h" #include "Mem.h" @@ -54,7 +53,7 @@ Mem *MemCreate() } /*-------------------------------------------------------------------------- - * MemDestroy - Destroy a memory pool object "m", and release all allocated + * MemDestroy - Destroy a memory pool object "m", and release all allocated * memory to the operating system. *--------------------------------------------------------------------------*/ @@ -64,13 +63,15 @@ void MemDestroy(Mem *m) /* Free all blocks of memory */ for (i=0; inum_blocks; i++) - free(m->blocks[i]); + { + hypre_TFree(m->blocks[i], HYPRE_MEMORY_HOST); + } - free(m); + hypre_TFree(m, HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- - * MemAlloc - Return "size" bytes from the memory pool "m". This function + * MemAlloc - Return "size" bytes from the memory pool "m". This function * will return to the operating system on the following conditions: * 1) max block size exceeded, 2) max number of blocks exceeded, * 3) memory exhausted. @@ -78,52 +79,52 @@ void MemDestroy(Mem *m) char *MemAlloc(Mem *m, HYPRE_Int size) { - HYPRE_Int req; - char *p; - - /* Align on 16-byte boundary */ - size = ((size + 15) / 16) * 16; - - if (m->bytes_left < size) - { - /* Allocate a new block */ - if (m->num_blocks+1 > MEM_MAXBLOCKS) - { - hypre_printf("MemAlloc: max number of blocks %d exceeded.\n", - MEM_MAXBLOCKS); - PARASAILS_EXIT; - } - - /* Size of requested block */ - req = MAX(size, MEM_BLOCKSIZE); - - m->avail = hypre_TAlloc(char, req, HYPRE_MEMORY_HOST); - - if (m->avail == NULL) - { - hypre_printf("MemAlloc: request for %d bytes failed.\n", req); - PARASAILS_EXIT; - } - - m->blocks[m->num_blocks] = m->avail; - m->num_blocks++; - m->bytes_left = req; - m->total_bytes += size; - m->bytes_alloc += req; - if (req > MEM_BLOCKSIZE) - m->num_over++; - } - - p = m->avail; - m->avail += size; - m->bytes_left -= size; - m->total_bytes += size; - - return p; + HYPRE_Int req; + char *p; + + /* Align on 16-byte boundary */ + size = ((size + 15) / 16) * 16; + + if (m->bytes_left < size) + { + /* Allocate a new block */ + if (m->num_blocks+1 > MEM_MAXBLOCKS) + { + hypre_printf("MemAlloc: max number of blocks %d exceeded.\n", + MEM_MAXBLOCKS); + PARASAILS_EXIT; + } + + /* Size of requested block */ + req = MAX(size, MEM_BLOCKSIZE); + + m->avail = hypre_TAlloc(char, req, HYPRE_MEMORY_HOST); + + if (m->avail == NULL) + { + hypre_printf("MemAlloc: request for %d bytes failed.\n", req); + PARASAILS_EXIT; + } + + m->blocks[m->num_blocks] = m->avail; + m->num_blocks++; + m->bytes_left = req; + m->total_bytes += size; + m->bytes_alloc += req; + if (req > MEM_BLOCKSIZE) + m->num_over++; + } + + p = m->avail; + m->avail += size; + m->bytes_left -= size; + m->total_bytes += size; + + return p; } /*-------------------------------------------------------------------------- - * MemStat - Print statistics about memory pool "m" to stream "stream" with + * MemStat - Print statistics about memory pool "m" to stream "stream" with * a descriptive message "msg". *--------------------------------------------------------------------------*/ @@ -135,8 +136,8 @@ void MemStat(Mem *m, FILE *stream, char *msg) hypre_fprintf(stream, "total_bytes: %ld\n", m->total_bytes); hypre_fprintf(stream, "bytes_alloc: %ld\n", m->bytes_alloc); if (m->bytes_alloc != 0) - hypre_fprintf(stream, "efficiency : %f\n", m->total_bytes / - (HYPRE_Real) m->bytes_alloc); + hypre_fprintf(stream, "efficiency : %f\n", m->total_bytes / + (HYPRE_Real) m->bytes_alloc); hypre_fprintf(stream, "*********************\n"); fflush(stream); } diff --git a/src/distributed_ls/ParaSails/Numbering.c b/src/distributed_ls/ParaSails/Numbering.c index f13552aa2..878ee42ea 100644 --- a/src/distributed_ls/ParaSails/Numbering.c +++ b/src/distributed_ls/ParaSails/Numbering.c @@ -16,21 +16,20 @@ * Implementation: Mapping from a local index to a global index is performed * through an array. Mapping from a global index to a local index is more * difficult. If the global index is determined to be owned by the local - * processor, then a conversion is performed; else the local index is + * processor, then a conversion is performed; else the local index is * looked up in a hash table. * *****************************************************************************/ #include -#include -#include +//#include #include "Common.h" #include "Numbering.h" #include "OrderStat.h" /*-------------------------------------------------------------------------- * NumberingCreate - Return (a pointer to) a numbering object - * for a given matrix. The "size" parameter is the initial number of + * for a given matrix. The "size" parameter is the initial number of * external indices that can be stored, and will grow if necessary. * (Implementation note: the hash table size is kept approximately twice * this number.) @@ -41,72 +40,72 @@ Numbering *NumberingCreate(Matrix *mat, HYPRE_Int size) { - Numbering *numb = hypre_TAlloc(Numbering, 1, HYPRE_MEMORY_HOST); - HYPRE_Int row, i, len, *ind; - HYPRE_Real *val; - HYPRE_Int num_external = 0; - - numb->size = size; - numb->beg_row = mat->beg_row; - numb->end_row = mat->end_row; - numb->num_loc = mat->end_row - mat->beg_row + 1; - numb->num_ind = mat->end_row - mat->beg_row + 1; - - numb->local_to_global = hypre_TAlloc(HYPRE_Int, (numb->num_loc+size) , HYPRE_MEMORY_HOST); - numb->hash = HashCreate(2*size+1); - - /* Set up the local part of local_to_global */ - for (i=0; inum_loc; i++) - numb->local_to_global[i] = mat->beg_row + i; - - /* Fill local_to_global array */ - for (row=0; row<=mat->end_row - mat->beg_row; row++) - { - MatrixGetRow(mat, row, &len, &ind, &val); - - for (i=0; ibeg_row || ind[i] > mat->end_row) + Numbering *numb = hypre_TAlloc(Numbering, 1, HYPRE_MEMORY_HOST); + HYPRE_Int row, i, len, *ind; + HYPRE_Real *val; + HYPRE_Int num_external = 0; + + numb->size = size; + numb->beg_row = mat->beg_row; + numb->end_row = mat->end_row; + numb->num_loc = mat->end_row - mat->beg_row + 1; + numb->num_ind = mat->end_row - mat->beg_row + 1; + + numb->local_to_global = hypre_TAlloc(HYPRE_Int, (numb->num_loc+size) , HYPRE_MEMORY_HOST); + numb->hash = HashCreate(2*size+1); + + /* Set up the local part of local_to_global */ + for (i=0; inum_loc; i++) + numb->local_to_global[i] = mat->beg_row + i; + + /* Fill local_to_global array */ + for (row=0; row<=mat->end_row - mat->beg_row; row++) + { + MatrixGetRow(mat, row, &len, &ind, &val); + + for (i=0; ibeg_row || ind[i] > mat->end_row) + { + if (HashLookup(numb->hash, ind[i]) == HASH_NOTFOUND) { - if (HashLookup(numb->hash, ind[i]) == HASH_NOTFOUND) - { - if (num_external >= numb->size) - { - Hash *newHash; - - /* allocate more space for numbering */ - numb->size *= 2; - numb->local_to_global = (HYPRE_Int *) - hypre_TReAlloc(numb->local_to_global,HYPRE_Int, - (numb->num_loc+numb->size), HYPRE_MEMORY_HOST); - newHash = HashCreate(2*numb->size+1); - HashRehash(numb->hash, newHash); - HashDestroy(numb->hash); - numb->hash = newHash; - } - - HashInsert(numb->hash, ind[i], num_external); - numb->local_to_global[numb->num_loc+num_external] = ind[i]; - num_external++; - } + if (num_external >= numb->size) + { + Hash *newHash; + + /* allocate more space for numbering */ + numb->size *= 2; + numb->local_to_global = (HYPRE_Int *) + hypre_TReAlloc(numb->local_to_global,HYPRE_Int, + (numb->num_loc+numb->size), HYPRE_MEMORY_HOST); + newHash = HashCreate(2*numb->size+1); + HashRehash(numb->hash, newHash); + HashDestroy(numb->hash); + numb->hash = newHash; + } + + HashInsert(numb->hash, ind[i], num_external); + numb->local_to_global[numb->num_loc+num_external] = ind[i]; + num_external++; } - } - } + } + } + } - /* Sort the indices */ - hypre_shell_sort(num_external, &numb->local_to_global[numb->num_loc]); + /* Sort the indices */ + hypre_shell_sort(num_external, &numb->local_to_global[numb->num_loc]); - /* Redo the hash table for the sorted indices */ - HashReset(numb->hash); + /* Redo the hash table for the sorted indices */ + HashReset(numb->hash); - for (i=0; ihash, - numb->local_to_global[i+numb->num_loc], i+numb->num_loc); + for (i=0; ihash, + numb->local_to_global[i+numb->num_loc], i+numb->num_loc); - numb->num_ind += num_external; + numb->num_ind += num_external; - return numb; + return numb; } /*-------------------------------------------------------------------------- @@ -116,23 +115,23 @@ Numbering *NumberingCreate(Matrix *mat, HYPRE_Int size) Numbering *NumberingCreateCopy(Numbering *orig) { - Numbering *numb = hypre_TAlloc(Numbering, 1, HYPRE_MEMORY_HOST); + Numbering *numb = hypre_TAlloc(Numbering, 1, HYPRE_MEMORY_HOST); - numb->size = orig->size; - numb->beg_row = orig->beg_row; - numb->end_row = orig->end_row; - numb->num_loc = orig->num_loc; - numb->num_ind = orig->num_ind; + numb->size = orig->size; + numb->beg_row = orig->beg_row; + numb->end_row = orig->end_row; + numb->num_loc = orig->num_loc; + numb->num_ind = orig->num_ind; - numb->local_to_global = - hypre_TAlloc(HYPRE_Int, (numb->num_loc+numb->size) , HYPRE_MEMORY_HOST); - hypre_TMemcpy(numb->local_to_global, orig->local_to_global, - HYPRE_Int, numb->num_ind, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); + numb->local_to_global = + hypre_TAlloc(HYPRE_Int, (numb->num_loc+numb->size) , HYPRE_MEMORY_HOST); + hypre_TMemcpy(numb->local_to_global, orig->local_to_global, + HYPRE_Int, numb->num_ind, HYPRE_MEMORY_HOST, HYPRE_MEMORY_HOST); - numb->hash = HashCreate(2*numb->size+1); - HashRehash(orig->hash, numb->hash); + numb->hash = HashCreate(2*numb->size+1); + HashRehash(orig->hash, numb->hash); - return numb; + return numb; } /*-------------------------------------------------------------------------- @@ -141,10 +140,10 @@ Numbering *NumberingCreateCopy(Numbering *orig) void NumberingDestroy(Numbering *numb) { - free(numb->local_to_global); - HashDestroy(numb->hash); + hypre_TFree(numb->local_to_global,HYPRE_MEMORY_HOST); + HashDestroy(numb->hash); - free(numb); + hypre_TFree(numb,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -154,10 +153,10 @@ void NumberingDestroy(Numbering *numb) void NumberingLocalToGlobal(Numbering *numb, HYPRE_Int len, HYPRE_Int *local, HYPRE_Int *global) { - HYPRE_Int i; + HYPRE_Int i; - for (i=0; ilocal_to_global[local[i]]; + for (i=0; ilocal_to_global[local[i]]; } /*-------------------------------------------------------------------------- @@ -168,47 +167,49 @@ void NumberingLocalToGlobal(Numbering *numb, HYPRE_Int len, HYPRE_Int *local, HY void NumberingGlobalToLocal(Numbering *numb, HYPRE_Int len, HYPRE_Int *global, HYPRE_Int *local) { - HYPRE_Int i, l; - - for (i=0; ibeg_row || global[i] > numb->end_row) - { - l = HashLookup(numb->hash, global[i]); - - if (l == HASH_NOTFOUND) - { - if (numb->num_ind >= numb->num_loc + numb->size) - { - Hash *newHash; - - /* allocate more space for numbering */ - numb->size *= 2; + HYPRE_Int i, l; + + for (i=0; ibeg_row || global[i] > numb->end_row) + { + l = HashLookup(numb->hash, global[i]); + + if (l == HASH_NOTFOUND) + { + if (numb->num_ind >= numb->num_loc + numb->size) + { + Hash *newHash; + + /* allocate more space for numbering */ + numb->size *= 2; #ifdef PARASAILS_DEBUG - hypre_printf("Numbering resize %d\n", numb->size); + hypre_printf("Numbering resize %d\n", numb->size); #endif - numb->local_to_global = (HYPRE_Int *) - realloc(numb->local_to_global, - (numb->num_loc+numb->size)*sizeof(HYPRE_Int)); - newHash = HashCreate(2*numb->size+1); - HashRehash(numb->hash, newHash); - HashDestroy(numb->hash); - numb->hash = newHash; - } - - HashInsert(numb->hash, global[i], numb->num_ind); - numb->local_to_global[numb->num_ind] = global[i]; - local[i] = numb->num_ind; - numb->num_ind++; - } - else - { - local[i] = l; - } - } - else - { - local[i] = global[i] - numb->beg_row; - } - } + numb->local_to_global = hypre_TReAlloc(numb->local_to_global, + HYPRE_Int, + numb->num_loc + numb->size, + HYPRE_MEMORY_HOST); + + newHash = HashCreate(2*numb->size+1); + HashRehash(numb->hash, newHash); + HashDestroy(numb->hash); + numb->hash = newHash; + } + + HashInsert(numb->hash, global[i], numb->num_ind); + numb->local_to_global[numb->num_ind] = global[i]; + local[i] = numb->num_ind; + numb->num_ind++; + } + else + { + local[i] = l; + } + } + else + { + local[i] = global[i] - numb->beg_row; + } + } } diff --git a/src/distributed_ls/ParaSails/ParaSails.c b/src/distributed_ls/ParaSails/ParaSails.c index f799a3e83..2c63d8aec 100644 --- a/src/distributed_ls/ParaSails/ParaSails.c +++ b/src/distributed_ls/ParaSails/ParaSails.c @@ -12,7 +12,6 @@ *****************************************************************************/ #include "HYPRE_config.h" #include -#include #include #include #include "Common.h" @@ -71,7 +70,7 @@ HYPRE_Int FindNumReplies(MPI_Comm comm, HYPRE_Int *replies_list) hypre_MPI_Allreduce(replies_list, replies_list2, npes, HYPRE_MPI_INT, hypre_MPI_SUM, comm); num_replies = replies_list2[mype]; - free(replies_list2); + hypre_TFree(replies_list2,HYPRE_MEMORY_HOST); return num_replies; } @@ -158,7 +157,7 @@ static void ReceiveRequest(MPI_Comm comm, HYPRE_Int *source, HYPRE_Int tag, HYPR if (*count > *buflen) { - free(*buffer); + hypre_TFree(*buffer,HYPRE_MEMORY_HOST); *buflen = *count; *buffer = hypre_TAlloc(HYPRE_Int, *buflen , HYPRE_MEMORY_HOST); } @@ -462,7 +461,7 @@ static void ExchangePrunedRows(MPI_Comm comm, Matrix *M, Numbering *numb, SendRequests(comm, ROW_PRUNED_REQ_TAG, M, len, ind, &num_requests, replies_list); num_replies = FindNumReplies(comm, replies_list); - free(replies_list); + hypre_TFree(replies_list,HYPRE_MEMORY_HOST); for (i=0; i 0); + hypre_assert(len2 > 0); #ifdef ESSL for (j=0; j 0); + hypre_assert(len2 > 0); for (j=0; j ahat_size) { - free(ahat); + hypre_TFree(ahat,HYPRE_MEMORY_HOST); ahat_size = len*npat; ahat = hypre_TAlloc(HYPRE_Real, ahat_size , HYPRE_MEMORY_HOST); } @@ -1293,7 +1292,7 @@ static HYPRE_Int ComputeValuesNonsym(StoredRows *stored_rows, Matrix *mat, /* Reallocate bhat if necessary */ if (npat > bhat_size) { - free(bhat); + hypre_TFree(bhat,HYPRE_MEMORY_HOST); bhat_size = npat; bhat = hypre_TAlloc(HYPRE_Real, bhat_size , HYPRE_MEMORY_HOST); } @@ -1303,7 +1302,7 @@ static HYPRE_Int ComputeValuesNonsym(StoredRows *stored_rows, Matrix *mat, memset(bhat, 0, npat*sizeof(HYPRE_Real)); NumberingGlobalToLocal(numb, 1, &row, &loc); loc = marker[loc]; - assert(loc != -1); + hypre_assert(loc != -1); bhat[loc] = 1.0; /* Reset marker array */ @@ -1340,11 +1339,11 @@ static HYPRE_Int ComputeValuesNonsym(StoredRows *stored_rows, Matrix *mat, timet += (time1-time0); } - free(patt); - free(marker); - free(bhat); - free(ahat); - free(work); + hypre_TFree(patt,HYPRE_MEMORY_HOST); + hypre_TFree(marker,HYPRE_MEMORY_HOST); + hypre_TFree(bhat,HYPRE_MEMORY_HOST); + hypre_TFree(ahat,HYPRE_MEMORY_HOST); + hypre_TFree(work,HYPRE_MEMORY_HOST); #if 0 { @@ -1387,7 +1386,7 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, if (len > buflen) { - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); buflen = len; buffer = hypre_TAlloc(HYPRE_Real, buflen , HYPRE_MEMORY_HOST); } @@ -1412,7 +1411,7 @@ static HYPRE_Real SelectThresh(MPI_Comm comm, Matrix *A, DiagScale *diag_scale, hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); hypre_MPI_Comm_size(comm, &npes); - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); return sum / (A->end_rows[npes-1] - A->beg_rows[0] + 1); } @@ -1441,7 +1440,7 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, if (len > buflen) { - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); buflen = len; buffer = hypre_TAlloc(HYPRE_Real, buflen , HYPRE_MEMORY_HOST); } @@ -1468,7 +1467,7 @@ static HYPRE_Real SelectFilter(MPI_Comm comm, Matrix *M, DiagScale *diag_scale, hypre_MPI_Allreduce(&localsum, &sum, 1, hypre_MPI_REAL, hypre_MPI_SUM, comm); hypre_MPI_Comm_size(comm, &npes); - free(buffer); + hypre_TFree(buffer,HYPRE_MEMORY_HOST); return sum / (M->end_rows[npes-1] - M->beg_rows[0] + 1); } @@ -1545,7 +1544,7 @@ static void Rescale(Matrix *M, StoredRows *stored_rows, HYPRE_Int num_ind) /* Scatter nonzeros of A */ for (i=0; iM) MatrixDestroy(ps->M); - free(ps->beg_rows); - free(ps->end_rows); + hypre_TFree(ps->beg_rows,HYPRE_MEMORY_HOST); + hypre_TFree(ps->end_rows,HYPRE_MEMORY_HOST); - free(ps); + hypre_TFree(ps,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -2036,7 +2035,7 @@ void ParaSailsStatsValues(ParaSails *ps, Matrix *A) hypre_printf("ave: %8.1f\n", temp / (HYPRE_Real) npes); hypre_printf("*************************************************\n"); - free(setup_times); + hypre_TFree(setup_times,HYPRE_MEMORY_HOST); fflush(stdout); } diff --git a/src/distributed_ls/ParaSails/PrunedRows.c b/src/distributed_ls/ParaSails/PrunedRows.c index 9dd2a1f95..d12cfa199 100644 --- a/src/distributed_ls/ParaSails/PrunedRows.c +++ b/src/distributed_ls/ParaSails/PrunedRows.c @@ -7,14 +7,13 @@ /****************************************************************************** * - * PrunedRows - Collection of pruned rows that are cached on the local + * PrunedRows - Collection of pruned rows that are cached on the local * processor. Direct access to these rows is available, via the local * index number. * *****************************************************************************/ #include -#include #include "Common.h" #include "Mem.h" #include "Matrix.h" @@ -32,11 +31,11 @@ * diag_scale - diagonal scale object used to scale the thresholding (input) * thresh - threshold for pruning the matrix (input) * - * The local pruned rows are stored in the first part of the len and ind + * The local pruned rows are stored in the first part of the len and ind * arrays. *--------------------------------------------------------------------------*/ -PrunedRows *PrunedRowsCreate(Matrix *mat, HYPRE_Int size, DiagScale *diag_scale, +PrunedRows *PrunedRowsCreate(Matrix *mat, HYPRE_Int size, DiagScale *diag_scale, HYPRE_Real thresh) { HYPRE_Int row, len, *ind, count, j, *data; @@ -60,7 +59,7 @@ PrunedRows *PrunedRowsCreate(Matrix *mat, HYPRE_Int size, DiagScale *diag_scale, for (j=0; j= thresh && ind[j] != row) count++; } @@ -73,7 +72,7 @@ PrunedRows *PrunedRowsCreate(Matrix *mat, HYPRE_Int size, DiagScale *diag_scale, for (j=0; j= thresh && ind[j] != row) *data++ = ind[j]; } @@ -89,9 +88,9 @@ PrunedRows *PrunedRowsCreate(Matrix *mat, HYPRE_Int size, DiagScale *diag_scale, void PrunedRowsDestroy(PrunedRows *p) { MemDestroy(p->mem); - free(p->len); - free(p->ind); - free(p); + hypre_TFree(p->len,HYPRE_MEMORY_HOST); + hypre_TFree(p->ind,HYPRE_MEMORY_HOST); + hypre_TFree(p,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -127,7 +126,7 @@ void PrunedRowsPut(PrunedRows *p, HYPRE_Int index, HYPRE_Int len, HYPRE_Int *ind } /*-------------------------------------------------------------------------- - * PrunedRowsGet - Return the row with index "index" through the pointers + * PrunedRowsGet - Return the row with index "index" through the pointers * "lenp" and "indp" in the pruned rows object "p". *--------------------------------------------------------------------------*/ diff --git a/src/distributed_ls/ParaSails/RowPatt.c b/src/distributed_ls/ParaSails/RowPatt.c index 5a62e8837..2dfb6eee7 100644 --- a/src/distributed_ls/ParaSails/RowPatt.c +++ b/src/distributed_ls/ParaSails/RowPatt.c @@ -12,14 +12,13 @@ * * Implementation and Notes: a full-length array is used to mark nonzeros * in the pattern. Indices must not equal -1, which is the "empty" marker - * used in the full length array. It is expected that RowPatt will only be - * presented with local indices, otherwise the full length array may be very + * used in the full length array. It is expected that RowPatt will only be + * presented with local indices, otherwise the full length array may be very * large. * *****************************************************************************/ #include -#include #include "Common.h" #include "RowPatt.h" @@ -76,10 +75,10 @@ RowPatt *RowPattCreate(HYPRE_Int maxlen) void RowPattDestroy(RowPatt *p) { - free(p->ind); - free(p->mark); - free(p->buffer); - free(p); + hypre_TFree(p->ind,HYPRE_MEMORY_HOST); + hypre_TFree(p->mark,HYPRE_MEMORY_HOST); + hypre_TFree(p->buffer,HYPRE_MEMORY_HOST); + hypre_TFree(p,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -112,7 +111,7 @@ void RowPattMerge(RowPatt *p, HYPRE_Int len, HYPRE_Int *ind) if (p->mark[ind[i]] == -1) { - assert(p->len < p->maxlen); + hypre_assert(p->len < p->maxlen); p->mark[ind[i]] = p->len; p->ind[p->len] = ind[i]; @@ -122,7 +121,7 @@ void RowPattMerge(RowPatt *p, HYPRE_Int len, HYPRE_Int *ind) } /*-------------------------------------------------------------------------- - * RowPattMergeExt - Merge the external nonzeros in the array "ind" of + * RowPattMergeExt - Merge the external nonzeros in the array "ind" of * length "len" with the pattern "p". The external indices are those * that are less than "beg" or greater than "end". *--------------------------------------------------------------------------*/ @@ -141,7 +140,7 @@ void RowPattMergeExt(RowPatt *p, HYPRE_Int len, HYPRE_Int *ind, HYPRE_Int num_lo if (p->mark[ind[i]] == -1) { - assert(p->len < p->maxlen); + hypre_assert(p->len < p->maxlen); p->mark[ind[i]] = p->len; p->ind[p->len] = ind[i]; @@ -165,7 +164,7 @@ void RowPattGet(RowPatt *p, HYPRE_Int *lenp, HYPRE_Int **indp) if (len > p->buflen) { - free(p->buffer); + hypre_TFree(p->buffer,HYPRE_MEMORY_HOST); p->buflen = len + 100; p->buffer = hypre_TAlloc(HYPRE_Int, p->buflen , HYPRE_MEMORY_HOST); } @@ -179,7 +178,7 @@ void RowPattGet(RowPatt *p, HYPRE_Int *lenp, HYPRE_Int **indp) /*-------------------------------------------------------------------------- * RowPattPrevLevel - Return the new indices added to the pattern of "p" * since the last call to RowPattPrevLevel (or all the indices if never - * called). The length and pointer to the pattern indices are returned + * called). The length and pointer to the pattern indices are returned * through the parameters "lenp" and "indp". * A copy of the indices is returned; this copy is destroyed on the next * call to RowPattGet or RowPattPrevLevel. @@ -193,7 +192,7 @@ void RowPattPrevLevel(RowPatt *p, HYPRE_Int *lenp, HYPRE_Int **indp) if (len > p->buflen) { - free(p->buffer); + hypre_TFree(p->buffer,HYPRE_MEMORY_HOST); p->buflen = len + 100; p->buffer = hypre_TAlloc(HYPRE_Int, p->buflen , HYPRE_MEMORY_HOST); } diff --git a/src/distributed_ls/ParaSails/StoredRows.c b/src/distributed_ls/ParaSails/StoredRows.c index 60f969267..86f8d0a97 100644 --- a/src/distributed_ls/ParaSails/StoredRows.c +++ b/src/distributed_ls/ParaSails/StoredRows.c @@ -15,7 +15,6 @@ *****************************************************************************/ #include -#include #include "Common.h" #include "Mem.h" #include "Matrix.h" @@ -61,10 +60,10 @@ StoredRows *StoredRowsCreate(Matrix *mat, HYPRE_Int size) void StoredRowsDestroy(StoredRows *p) { MemDestroy(p->mem); - free(p->len); - free(p->ind); - free(p->val); - free(p); + hypre_TFree(p->len,HYPRE_MEMORY_HOST); + hypre_TFree(p->ind,HYPRE_MEMORY_HOST); + hypre_TFree(p->val,HYPRE_MEMORY_HOST); + hypre_TFree(p,HYPRE_MEMORY_HOST); } /*-------------------------------------------------------------------------- @@ -119,7 +118,7 @@ void StoredRowsPut(StoredRows *p, HYPRE_Int index, HYPRE_Int len, HYPRE_Int *ind } /* check that row has not been put already */ - assert(p->len[i] == 0); + hypre_assert(p->len[i] == 0); p->len[i] = len; p->ind[i] = ind; diff --git a/src/distributed_ls/ParaSails/convert.c b/src/distributed_ls/ParaSails/convert.c index b26c42b3d..9a20944bc 100644 --- a/src/distributed_ls/ParaSails/convert.c +++ b/src/distributed_ls/ParaSails/convert.c @@ -25,14 +25,14 @@ HYPRE_Int convert(FILE *infile, FILE *outfile) HYPRE_Int i, j; /* skip the comment section */ - do + do { - if (fgets(line, MM_MAX_LINE_LENGTH, infile) == NULL) + if (fgets(line, MM_MAX_LINE_LENGTH, infile) == NULL) return -1; } while (line[0] == '%'); - hypre_sscanf(line, "%d %d %d", &M, &N, &nz); + hypre_sscanf(line, "%d %d %d", &M, &N, &nz); hypre_printf("%d %d %d\n", M, N, nz); nnz = 2*nz - M; @@ -58,7 +58,7 @@ HYPRE_Int convert(FILE *infile, FILE *outfile) /* allocate space for whole matrix */ ind = hypre_TAlloc(HYPRE_Int, nnz , HYPRE_MEMORY_HOST); val = hypre_TAlloc(HYPRE_Real, nnz , HYPRE_MEMORY_HOST); - + /* set pointer to beginning of each row */ pointers[1] = 0; for (i=2; i<=M; i++) @@ -87,10 +87,10 @@ HYPRE_Int convert(FILE *infile, FILE *outfile) for (j=0; jhypre_MPI_communicator) -#define mype (globals->mype) -#define npes (globals->npes) -#define _secpertick (globals->_secpertick) -#define Mfactor (globals->Mfactor) -#define jr (globals->jr) -#define jw (globals->jw) -#define lastjr (globals->lastjr) -#define lr (globals->lr) -#define lastlr (globals->lastlr) -#define w (globals->w) -#define firstrow (globals->firstrow) -#define lastrow (globals->lastrow) -#define SerTmr (globals->SerTmr) -#define ParTmr (globals->ParTmr) -#define nrows (globals->nrows) -#define lnrows (globals->lnrows) -#define ndone (globals->ndone) -#define ntogo (globals->ntogo) -#define nleft (globals->nleft) -#define global_maxnz (globals->maxnz) -#define pilut_map (globals->map) -#define vrowdist (globals->vrowdist) -#define pilu_recv (globals->pilu_recv) -#define pilu_send (globals->pilu_send) -#define lu_recv (globals->lu_recv) +#define mype (globals->_mype) +#define npes (globals->_npes) +#define secpertick (globals->_secpertick) +#define Mfactor (globals->_Mfactor) +#define jr (globals->_jr) +#define jw (globals->_jw) +#define lastjr (globals->_lastjr) +#define lr (globals->_lr) +#define lastlr (globals->_lastlr) +#define w (globals->_w) +#define firstrow (globals->_firstrow) +#define lastrow (globals->_lastrow) +#define SerTmr (globals->_SerTmr) +#define ParTmr (globals->_ParTmr) +#define nrows (globals->_nrows) +#define lnrows (globals->_lnrows) +#define ndone (globals->_ndone) +#define ntogo (globals->_ntogo) +#define nleft (globals->_nleft) +#define global_maxnz (globals->_maxnz) +#define pilut_map (globals->_map) +#define vrowdist (globals->_vrowdist) +#define pilu_recv (globals->_pilu_recv) +#define pilu_send (globals->_pilu_send) +#define lu_recv (globals->_lu_recv) #include "./const.h" diff --git a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c index 715ef4627..5844a356a 100644 --- a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c +++ b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver.c @@ -43,6 +43,8 @@ HYPRE_Int HYPRE_NewDistributedMatrixPilutSolver( jw = NULL; w = NULL; + globals->logging = 0; + /* Set some variables in the "global variables" section */ pilut_comm = comm; @@ -312,6 +314,22 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetMaxIts( return hypre_error_flag; } +HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetLogging( + HYPRE_DistributedMatrixPilutSolver in_ptr, + HYPRE_Int logging ) +{ + hypre_DistributedMatrixPilutSolver *solver = + (hypre_DistributedMatrixPilutSolver *) in_ptr; + hypre_PilutSolverGlobals *globals = hypre_DistributedMatrixPilutSolverGlobals(solver); + + if (globals) + { + globals->logging = logging; + } + + return hypre_error_flag; +} + /*-------------------------------------------------------------------------- * HYPRE_DistributedMatrixPilutSolverSetup *--------------------------------------------------------------------------*/ @@ -323,6 +341,8 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS (hypre_DistributedMatrixPilutSolver *) in_ptr; hypre_PilutSolverGlobals *globals = hypre_DistributedMatrixPilutSolverGlobals(solver); + HYPRE_Int logging = globals ? globals->logging : 0; + if(hypre_DistributedMatrixPilutSolverMatrix(solver) == NULL ) { @@ -356,7 +376,7 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS rowdist[ nprocs ] = n; #ifdef HYPRE_TIMING -{ + { HYPRE_Int ilut_timer; ilut_timer = hypre_InitializeTiming( "hypre_ILUT factorization" ); @@ -376,7 +396,7 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS #ifdef HYPRE_TIMING hypre_EndTiming( ilut_timer ); /* hypre_FinalizeTiming( ilut_timer ); */ -} + } #endif if (ierr) @@ -386,7 +406,7 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS } #ifdef HYPRE_TIMING -{ + { HYPRE_Int Setup_timer; Setup_timer = hypre_InitializeTiming( "hypre_SetUpLUFactor: setup for triangular solvers"); @@ -402,7 +422,7 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS #ifdef HYPRE_TIMING hypre_EndTiming( Setup_timer ); /* hypre_FinalizeTiming( Setup_timer ); */ -} + } #endif if (ierr) @@ -412,9 +432,12 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup( HYPRE_DistributedMatrixPilutS } #ifdef HYPRE_DEBUG - fflush(stdout); - hypre_printf("Nlevels: %d\n", - hypre_DistributedMatrixPilutSolverFactorMat (solver)->nlevels); + if (logging) + { + fflush(stdout); + hypre_printf("Nlevels: %d\n", + hypre_DistributedMatrixPilutSolverFactorMat (solver)->nlevels); + } #endif return hypre_error_flag; diff --git a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver_protos.h b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver_protos.h index 25df0cb96..eb3bce133 100644 --- a/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver_protos.h +++ b/src/distributed_ls/pilut/HYPRE_DistributedMatrixPilutSolver_protos.h @@ -17,4 +17,5 @@ HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetDropTolerance (HYPRE_DistributedM HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetMaxIts (HYPRE_DistributedMatrixPilutSolver in_ptr , HYPRE_Int its ); HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetup (HYPRE_DistributedMatrixPilutSolver in_ptr ); HYPRE_Int HYPRE_DistributedMatrixPilutSolverSolve (HYPRE_DistributedMatrixPilutSolver in_ptr , HYPRE_Real *x , HYPRE_Real *b ); - +HYPRE_Int HYPRE_DistributedMatrixPilutSolverSetLogging( HYPRE_DistributedMatrixPilutSolver in_ptr, HYPRE_Int logging ); + diff --git a/src/distributed_ls/pilut/debug.c b/src/distributed_ls/pilut/debug.c index f9ffbb510..54958b4cd 100644 --- a/src/distributed_ls/pilut/debug.c +++ b/src/distributed_ls/pilut/debug.c @@ -25,10 +25,15 @@ /************************************************************************* * This function prints a message and file/line number **************************************************************************/ -void hypre_PrintLine(char *str, hypre_PilutSolverGlobals *globals) +void hypre_PrintLine(const char *str, hypre_PilutSolverGlobals *globals) { - hypre_printf("PE %d ---- %-27s (%s, %d)\n", - mype, str, __FILE__, __LINE__); + HYPRE_Int logging = globals ? globals->logging : 0; + + if (logging) + { + hypre_printf("PE %d ---- %-27s (%s, %d)\n", + mype, str, __FILE__, __LINE__); + } fflush(stdout); } @@ -39,8 +44,8 @@ void hypre_PrintLine(char *str, hypre_PilutSolverGlobals *globals) void hypre_CheckBounds(HYPRE_Int low, HYPRE_Int i ,HYPRE_Int up, hypre_PilutSolverGlobals *globals) { if ((i < low) || (i >= up)) - hypre_errexit("PE %d Bad bound: %d <= %d < %d (%s %d)\n", - mype, low, i, up, __FILE__, __LINE__ ); + hypre_errexit("PE %d Bad bound: %d <= %d < %d (%s %d)\n", + mype, low, i, up, __FILE__, __LINE__ ); } /************************************************************************* @@ -49,6 +54,7 @@ void hypre_CheckBounds(HYPRE_Int low, HYPRE_Int i ,HYPRE_Int up, hypre_PilutSolv hypre_longint hypre_IDX_Checksum(const HYPRE_Int *v, HYPRE_Int len, const char *msg, HYPRE_Int tag, hypre_PilutSolverGlobals *globals) { + HYPRE_Int logging = globals ? globals->logging : 0; static HYPRE_Int numChk = 0; HYPRE_Int i; hypre_ulongint sum = 0; @@ -56,9 +62,12 @@ hypre_longint hypre_IDX_Checksum(const HYPRE_Int *v, HYPRE_Int len, const char * for (i=0; ilogging : 0; static HYPRE_Int numChk = 0; HYPRE_Int i; hypre_ulongint sum = 0; @@ -78,9 +88,12 @@ hypre_longint hypre_INT_Checksum(const HYPRE_Int *v, HYPRE_Int len, const char * for (i=0; ilogging : 0; static HYPRE_Int numChk = 0; HYPRE_Int i; hypre_ulongint sum = 0; @@ -101,9 +115,12 @@ hypre_longint hypre_FP_Checksum(const HYPRE_Real *v, HYPRE_Int len, const char * for (i=0; ilogging : 0; HYPRE_Int i; static HYPRE_Int numChk = 0; @@ -125,18 +143,24 @@ hypre_longint hypre_RMat_Checksum(const ReduceMatType *rmat, rmat->rmat_rrowlen == NULL || rmat->rmat_rcolind == NULL || rmat->rmat_rvalues == NULL ) { - hypre_printf("PE %d [r%3d] rmat checksum -- not initializied\n", - mype, numChk); - fflush(stdout); + if (logging) + { + hypre_printf("PE %d [r%3d] rmat checksum -- not initializied\n", + mype, numChk); + fflush(stdout); + } numChk++; return 0; } - /* print ints */ - hypre_printf("PE %d [r%3d] rmat checksum -- ndone %d ntogo %d nlevel %d\n", - mype, numChk, rmat->rmat_ndone, rmat->rmat_ntogo, rmat->rmat_nlevel); - fflush(stdout); + if (logging) + { + /* print ints */ + hypre_printf("PE %d [r%3d] rmat checksum -- ndone %d ntogo %d nlevel %d\n", + mype, numChk, rmat->rmat_ndone, rmat->rmat_ntogo, rmat->rmat_nlevel); + fflush(stdout); + } /* print checksums for each array */ hypre_IDX_Checksum(rmat->rmat_rnz, rmat->rmat_ntogo, "rmat->rmat_rnz", numChk, @@ -160,6 +184,7 @@ hypre_longint hypre_RMat_Checksum(const ReduceMatType *rmat, hypre_longint hypre_LDU_Checksum(const FactorMatType *ldu, hypre_PilutSolverGlobals *globals) { + HYPRE_Int logging = globals ? globals->logging : 0; HYPRE_Int i, j; hypre_ulongint lisum=0, ldsum=0, uisum=0, udsum=0, dsum=0; static HYPRE_Int numChk = 0; @@ -175,7 +200,7 @@ hypre_longint hypre_LDU_Checksum(const FactorMatType *ldu, ldu->dvalues == NULL || ldu->nrm2s == NULL) { hypre_printf("PE %d [S%3d] LDU check -- not initializied\n", - mype, numChk); + mype, numChk); fflush(stdout); return 0; } @@ -195,9 +220,12 @@ hypre_longint hypre_LDU_Checksum(const FactorMatType *ldu, dsum += (hypre_longint)ldu->dvalues[i]; } - hypre_printf("PE %d [S%3d] LDU check [%16lx %16lx] [%16lx] [%16lx %16lx]\n", - mype, numChk, lisum, ldsum, dsum, uisum, udsum); - fflush(stdout); + if (logging) + { + hypre_printf("PE %d [S%3d] LDU check [%16lx %16lx] [%16lx] [%16lx %16lx]\n", + mype, numChk, lisum, ldsum, dsum, uisum, udsum); + fflush(stdout); + } hypre_FP_Checksum(ldu->nrm2s, lnrows, "2-norms", numChk, globals); @@ -207,20 +235,24 @@ hypre_longint hypre_LDU_Checksum(const FactorMatType *ldu, /************************************************************************* -* This function prints a vector on each processor +* This function prints a vector on each processor **************************************************************************/ void hypre_PrintVector(HYPRE_Int *v, HYPRE_Int n, char *msg, hypre_PilutSolverGlobals *globals) { + HYPRE_Int logging = globals ? globals->logging : 0; HYPRE_Int i, penum; for (penum=0; penum #include -#include #include #include "macros.h" diff --git a/src/distributed_ls/pilut/ilut.c b/src/distributed_ls/pilut/ilut.c index 198cba331..5b7d3409a 100644 --- a/src/distributed_ls/pilut/ilut.c +++ b/src/distributed_ls/pilut/ilut.c @@ -25,13 +25,17 @@ HYPRE_Int hypre_ILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, FactorMatType *ldu, HYPRE_Int maxnz, HYPRE_Real tol, hypre_PilutSolverGlobals *globals ) { + HYPRE_Int logging = globals ? globals->logging : 0; HYPRE_Int i, ierr; ReduceMatType rmat; HYPRE_Int dummy_row_ptr[2], size; HYPRE_Real *values; #ifdef HYPRE_DEBUG - hypre_printf("hypre_ILUT, maxnz = %d\n ", maxnz); + if (logging) + { + hypre_printf("hypre_ILUT, maxnz = %d\n ", maxnz); + } #endif /* Allocate memory for ldu */ diff --git a/src/distributed_ls/pilut/internal_protos.h b/src/distributed_ls/pilut/internal_protos.h index 718de6eb5..fa847756a 100644 --- a/src/distributed_ls/pilut/internal_protos.h +++ b/src/distributed_ls/pilut/internal_protos.h @@ -27,7 +27,7 @@ HYPRE_Real hypre_GlobalSEMinDouble( HYPRE_Real value , MPI_Comm hypre_MPI_Contex HYPRE_Real hypre_GlobalSESumDouble( HYPRE_Real value , MPI_Comm hypre_MPI_Context ); /* debug.c */ -void hypre_PrintLine( char *str , hypre_PilutSolverGlobals *globals ); +void hypre_PrintLine( const char *str , hypre_PilutSolverGlobals *globals ); void hypre_CheckBounds( HYPRE_Int low , HYPRE_Int i , HYPRE_Int up , hypre_PilutSolverGlobals *globals ); hypre_longint hypre_IDX_Checksum( const HYPRE_Int *v , HYPRE_Int len , const char *msg , HYPRE_Int tag , hypre_PilutSolverGlobals *globals ); hypre_longint hypre_INT_Checksum( const HYPRE_Int *v , HYPRE_Int len , const char *msg , HYPRE_Int tag , hypre_PilutSolverGlobals *globals ); diff --git a/src/distributed_ls/pilut/parilut.c b/src/distributed_ls/pilut/parilut.c index 9e57afadf..d1139d042 100644 --- a/src/distributed_ls/pilut/parilut.c +++ b/src/distributed_ls/pilut/parilut.c @@ -56,12 +56,12 @@ * This function performs hypre_ILUT on the boundary nodes via MIS computation **************************************************************************/ void hypre_ParILUT(DataDistType *ddist, FactorMatType *ldu, - ReduceMatType *rmat, HYPRE_Int gmaxnz, HYPRE_Real tol, + ReduceMatType *rmat, HYPRE_Int gmaxnz, HYPRE_Real tol, hypre_PilutSolverGlobals *globals ) { HYPRE_Int nmis, nlevel; CommInfoType cinfo; - HYPRE_Int *perm, *iperm, *newiperm, *newperm; + HYPRE_Int *perm, *iperm, *newiperm, *newperm; ReduceMatType *rmats[2], nrmat; #ifdef HYPRE_DEBUG @@ -107,14 +107,14 @@ void hypre_ParILUT(DataDistType *ddist, FactorMatType *ldu, nmis = hypre_SelectSet(rmats[nlevel%2], &cinfo, perm, iperm, newperm, newiperm, globals ); hypre_FactorLocal(ldu, rmats[nlevel%2], rmats[(nlevel+1)%2], &cinfo, - perm, iperm, newperm, newiperm, nmis, tol, globals ); + perm, iperm, newperm, newiperm, nmis, tol, globals ); fflush(stdout); hypre_MPI_Barrier(pilut_comm); hypre_SendFactoredRows(ldu, &cinfo, newperm, nmis, globals); fflush(stdout); hypre_MPI_Barrier(pilut_comm); hypre_ComputeRmat(ldu, rmats[nlevel%2], rmats[(nlevel+1)%2], &cinfo, - perm, iperm, newperm, newiperm, nmis, tol, globals); + perm, iperm, newperm, newiperm, nmis, tol, globals); hypre_EraseMap(&cinfo, newperm, nmis, globals); @@ -137,10 +137,10 @@ void hypre_ParILUT(DataDistType *ddist, FactorMatType *ldu, ldu->nlevels = nlevel; /*hypre_free_multi(jr, jw, lr, w, map, - nrmat.rmat_rnz, nrmat.rmat_rrowlen, nrmat.rmat_rcolind, + nrmat.rmat_rnz, nrmat.rmat_rrowlen, nrmat.rmat_rcolind, nrmat.rmat_rvalues, - cinfo.gatherbuf, cinfo.rrowind, cinfo.rnbrind, cinfo.rnbrptr, - cinfo.snbrind, cinfo.srowind, cinfo.snbrptr, + cinfo.gatherbuf, cinfo.rrowind, cinfo.rnbrind, cinfo.rnbrptr, + cinfo.snbrind, cinfo.srowind, cinfo.snbrptr, cinfo.incolind, cinfo.invalues, newperm, newiperm, vrowdist, -1);*/ hypre_TFree(jr, HYPRE_MEMORY_HOST); @@ -256,15 +256,16 @@ void hypre_ComputeCommInfo(ReduceMatType *rmat, CommInfoType *cinfo, HYPRE_Int * /* If memory requirements change, allocate new memory. * The first iteration this always occurs -- see hypre_ParINIT */ - if (cinfo->maxnrecv < maxnrecv) { - if (cinfo->incolind) { free(cinfo->incolind); cinfo->incolind = NULL; } - if (cinfo->invalues) { free(cinfo->invalues); cinfo->invalues = NULL; } - cinfo->incolind = hypre_idx_malloc(maxnrecv*(global_maxnz+2)+1, "hypre_ComputeCommInfo: cinfo->incolind"); - cinfo->invalues = hypre_fp_malloc(maxnrecv*(global_maxnz+2)+1, "hypre_ComputeCommInfo: cinfo->invalues"); - cinfo->maxnrecv = maxnrecv; + if (cinfo->maxnrecv < maxnrecv) + { + hypre_TFree(cinfo->incolind, HYPRE_MEMORY_HOST); + hypre_TFree(cinfo->invalues, HYPRE_MEMORY_HOST); + cinfo->incolind = hypre_idx_malloc(maxnrecv*(global_maxnz+2)+1, "hypre_ComputeCommInfo: cinfo->incolind"); + cinfo->invalues = hypre_fp_malloc(maxnrecv*(global_maxnz+2)+1, "hypre_ComputeCommInfo: cinfo->invalues"); + cinfo->maxnrecv = maxnrecv; } - assert( cinfo->incolind != NULL ); - assert( cinfo->invalues != NULL ); + hypre_assert( cinfo->incolind != NULL ); + hypre_assert( cinfo->invalues != NULL ); /* Zero our send buffer */ for(i=0; imaxnsend < maxnsend) { - if(cinfo->srowind) { free(cinfo->srowind); cinfo->srowind = NULL; } - cinfo->srowind = hypre_idx_malloc(maxnsend, "hypre_ComputeCommInfo: cinfo->srowind"); - cinfo->maxnsend = maxnsend; + hypre_TFree(cinfo->srowind, HYPRE_MEMORY_HOST); + cinfo->srowind = hypre_idx_malloc(maxnsend, "hypre_ComputeCommInfo: cinfo->srowind"); + cinfo->maxnsend = maxnsend; } - assert( cinfo->srowind != NULL ); + hypre_assert( cinfo->srowind != NULL ); srowind = cinfo->srowind; /* issue asynchronous recieves */ for (i=0; i= vrowdist[penum+1]) { /* idx >= lastrow? */ penum++; - assert( penum < npes ); + hypre_assert( penum < npes ); } return penum; @@ -352,8 +353,8 @@ HYPRE_Int hypre_Idx2PE(HYPRE_Int idx, * For historical reasons the set is called a maximal indep. set (MIS). **************************************************************************/ HYPRE_Int hypre_SelectSet(ReduceMatType *rmat, CommInfoType *cinfo, - HYPRE_Int *perm, HYPRE_Int *iperm, - HYPRE_Int *newperm, HYPRE_Int *newiperm, + HYPRE_Int *perm, HYPRE_Int *iperm, + HYPRE_Int *newperm, HYPRE_Int *newiperm, hypre_PilutSolverGlobals *globals) { HYPRE_Int ir, i, j, k, l, num; @@ -382,8 +383,8 @@ HYPRE_Int hypre_SelectSet(ReduceMatType *rmat, CommInfoType *cinfo, for (j=1; j= lastrow) && - mype > hypre_Idx2PE(rcolind[j], globals)) - break ; + mype > hypre_Idx2PE(rcolind[j], globals)) + break ; } if ( j == nnz ) { /* passed test; put into set */ jw[num++] = i; @@ -395,12 +396,12 @@ HYPRE_Int hypre_SelectSet(ReduceMatType *rmat, CommInfoType *cinfo, for (k=0; kSFR_timer ); -#endif +#endif snnbr = cinfo->snnbr; snbrind = cinfo->snbrind; @@ -493,10 +494,10 @@ void hypre_SendFactoredRows(FactorMatType *ldu, CommInfoType *cinfo, penum = rnbrind[i]; hypre_MPI_Irecv( incolind+j, cnt, HYPRE_MPI_INT, - penum, TAG_Send_colind, pilut_comm, &index_requests[i] ); + penum, TAG_Send_colind, pilut_comm, &index_requests[i] ); hypre_MPI_Irecv( invalues+j, cnt, hypre_MPI_REAL, - penum, TAG_Send_values, pilut_comm, &value_requests[i] ); + penum, TAG_Send_values, pilut_comm, &value_requests[i] ); j += cnt; } @@ -506,7 +507,7 @@ void hypre_SendFactoredRows(FactorMatType *ldu, CommInfoType *cinfo, for (j=ndone; jSFR_timer ); -#endif +#endif /* clean up memory */ hypre_TFree(index_requests, HYPRE_MEMORY_HOST); @@ -586,9 +587,9 @@ void hypre_SendFactoredRows(FactorMatType *ldu, CommInfoType *cinfo, * processor as the row being subtracted is, since it is block diagonal. **************************************************************************/ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, - ReduceMatType *nrmat, CommInfoType *cinfo, - HYPRE_Int *perm, HYPRE_Int *iperm, - HYPRE_Int *newperm, HYPRE_Int *newiperm, HYPRE_Int nmis, HYPRE_Real tol, + ReduceMatType *nrmat, CommInfoType *cinfo, + HYPRE_Int *perm, HYPRE_Int *iperm, + HYPRE_Int *newperm, HYPRE_Int *newiperm, HYPRE_Int nmis, HYPRE_Real tol, hypre_PilutSolverGlobals *globals) { HYPRE_Int i, ir, inr, start, k, kk, l, m, end, nnz; @@ -601,7 +602,7 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, #endif #ifdef HYPRE_TIMING hypre_BeginTiming( globals->CR_timer ); -#endif +#endif usrowptr = ldu->usrowptr; uerowptr = ldu->uerowptr; @@ -619,7 +620,7 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, for (ir=ndone+nmis; ir= firstrow && rcolind[lastjr] < lastrow) - lr[lastlr] = (newiperm[rcolind[lastjr]-firstrow] << 1); - else { - lr[lastlr] = pilut_map[rcolind[lastjr]]; /* map[] == (l<<1) | 1 */ - assert(incolind[StripMIS(pilut_map[rcolind[lastjr]])+1] == + if (rcolind[lastjr] >= firstrow && rcolind[lastjr] < lastrow) + lr[lastlr] = (newiperm[rcolind[lastjr]-firstrow] << 1); + else { + lr[lastlr] = pilut_map[rcolind[lastjr]]; /* map[] == (l<<1) | 1 */ + hypre_assert(incolind[StripMIS(pilut_map[rcolind[lastjr]])+1] == rcolind[lastjr]); - } + } lastlr++; } @@ -663,27 +664,27 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, jw[lastjr] = rcolind[lastjr]; w[lastjr] = rvalues[lastjr]; } - assert(lastjr == nnz); - assert(lastjr > 0); + hypre_assert(lastjr == nnz); + hypre_assert(lastjr > 0); /* Go through the L nonzeros and pull in the contributions */ while( lastlr != 0 ) { k = hypre_ExtractMinLR( globals ); if ( IsLocal(k) ) { /* Local node -- row is in DU */ - hypre_CheckBounds(0, StripLocal(k), lnrows, globals); - kk = newperm[ StripLocal(k) ]; /* remove the local bit (LSB) */ - k = kk+firstrow; + hypre_CheckBounds(0, StripLocal(k), lnrows, globals); + kk = newperm[ StripLocal(k) ]; /* remove the local bit (LSB) */ + k = kk+firstrow; - hypre_CheckBounds(0, kk, lnrows, globals); - hypre_CheckBounds(0, jr[k], lastjr, globals); - assert(jw[jr[k]] == k); + hypre_CheckBounds(0, kk, lnrows, globals); + hypre_CheckBounds(0, jr[k], lastjr, globals); + hypre_assert(jw[jr[k]] == k); mult = w[jr[k]]*dvalues[kk]; w[jr[k]] = mult; if (fabs(mult) < rtol) - continue; /* First drop test */ + continue; /* First drop test */ for (l=usrowptr[kk]; l= firstrow && ucolind[l] < lastrow); - lr[lastlr] = (newiperm[ucolind[l]-firstrow] << 1); - lastlr++; - } + hypre_assert(ucolind[l] >= firstrow && ucolind[l] < lastrow); + lr[lastlr] = (newiperm[ucolind[l]-firstrow] << 1); + lastlr++; + } /* Create fill */ jr[ucolind[l]] = lastjr; jw[lastjr] = ucolind[l]; w[lastjr] = -mult*uvalues[l]; - lastjr++; + lastjr++; } - else + else w[m] -= mult*uvalues[l]; } } else { /* Outside node -- row is in incolind/invalues */ start = StripLocal(k); /* Remove the local bit (LSB) */ end = start + incolind[start]; /* get length */ - start++; - k = incolind[start]; /* get diagonal colind == row index */ + start++; + k = incolind[start]; /* get diagonal colind == row index */ - hypre_CheckBounds(0, k, nrows, globals); - hypre_CheckBounds(0, jr[k], lastjr, globals); - assert(jw[jr[k]] == k); + hypre_CheckBounds(0, k, nrows, globals); + hypre_CheckBounds(0, jr[k], lastjr, globals); + hypre_assert(jw[jr[k]] == k); mult = w[jr[k]]*invalues[start]; w[jr[k]] = mult; if (fabs(mult) < rtol) - continue; /* First drop test */ + continue; /* First drop test */ for (l=++start; l<=end; l++) { hypre_CheckBounds(0, incolind[l], nrows, globals); @@ -732,20 +733,20 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, if (fabs(mult*invalues[l]) < rtol) continue; /* Don't worry. The fill has too small of a value */ - /* record L elements -- these must be remote */ + /* record L elements -- these must be remote */ if (IsInMIS(pilut_map[incolind[l]])) { - assert(incolind[l] < firstrow || incolind[l] >= lastrow); - lr[lastlr] = pilut_map[incolind[l]]; /* map[] == (l<<1) | 1 */ - lastlr++; - } + hypre_assert(incolind[l] < firstrow || incolind[l] >= lastrow); + lr[lastlr] = pilut_map[incolind[l]]; /* map[] == (l<<1) | 1 */ + lastlr++; + } /* Create fill */ jr[incolind[l]] = lastjr; jw[lastjr] = incolind[l]; w[lastjr] = -mult*invalues[l]; - lastjr++; + lastjr++; } - else + else w[m] -= mult*invalues[l]; } } @@ -760,7 +761,7 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, } #ifdef HYPRE_TIMING hypre_EndTiming( globals->CR_timer ); -#endif +#endif } @@ -772,9 +773,9 @@ void hypre_ComputeRmat(FactorMatType *ldu, ReduceMatType *rmat, * dependencies within a PE this factors those, adding to L, and forms DU. **************************************************************************/ void hypre_FactorLocal(FactorMatType *ldu, ReduceMatType *rmat, - ReduceMatType *nrmat, CommInfoType *cinfo, - HYPRE_Int *perm, HYPRE_Int *iperm, - HYPRE_Int *newperm, HYPRE_Int *newiperm, HYPRE_Int nmis, HYPRE_Real tol, + ReduceMatType *nrmat, CommInfoType *cinfo, + HYPRE_Int *perm, HYPRE_Int *iperm, + HYPRE_Int *newperm, HYPRE_Int *newiperm, HYPRE_Int nmis, HYPRE_Real tol, hypre_PilutSolverGlobals *globals) { HYPRE_Int i, ir, k, kk, l, m, nnz, diag; @@ -787,12 +788,12 @@ void hypre_FactorLocal(FactorMatType *ldu, ReduceMatType *rmat, #endif #ifdef HYPRE_TIMING hypre_BeginTiming( globals->FL_timer ); -#endif +#endif - assert( rmat != nrmat ); - assert( perm != newperm ); - assert( iperm != newiperm ); + hypre_assert( rmat != nrmat ); + hypre_assert( perm != newperm ); + hypre_assert( iperm != newiperm ); usrowptr = ldu->usrowptr; uerowptr = ldu->uerowptr; @@ -805,7 +806,7 @@ void hypre_FactorLocal(FactorMatType *ldu, ReduceMatType *rmat, for (ir=ndone; ir= firstrow && - rcolind[lastjr] < lastrow && - newiperm[rcolind[lastjr]-firstrow] < diag) { - lr[lastlr] = newiperm[rcolind[lastjr]-firstrow]; + rcolind[lastjr] < lastrow && + newiperm[rcolind[lastjr]-firstrow] < diag) { + lr[lastlr] = newiperm[rcolind[lastjr]-firstrow]; lastlr++; } @@ -852,38 +853,38 @@ void hypre_FactorLocal(FactorMatType *ldu, ReduceMatType *rmat, hypre_CheckBounds(0, kk, lnrows, globals); hypre_CheckBounds(0, jr[k], lastjr, globals); - assert(jw[jr[k]] == k); + hypre_assert(jw[jr[k]] == k); mult = w[jr[k]]*dvalues[kk]; w[jr[k]] = mult; if (fabs(mult) < rtol) - continue; /* First drop test */ + continue; /* First drop test */ for (l=usrowptr[kk]; l= firstrow && - ucolind[l] < lastrow && - newiperm[ucolind[l]-firstrow] < diag) { - assert(IsInMIS(pilut_map[ucolind[l]])); - lr[lastlr] = newiperm[ucolind[l]-firstrow]; - lastlr++; - } - - /* Create fill */ - jr[ucolind[l]] = lastjr; - jw[lastjr] = ucolind[l]; - w[lastjr] = -mult*uvalues[l]; - lastjr++; - } - else - w[m] -= mult*uvalues[l]; + hypre_CheckBounds(0, ucolind[l], nrows, globals); + m = jr[ucolind[l]]; + if (m == -1) { + if (fabs(mult*uvalues[l]) < rtol) + continue; /* Don't worry. The fill has too small of a value */ + + /* record L elements */ + if (ucolind[l] >= firstrow && + ucolind[l] < lastrow && + newiperm[ucolind[l]-firstrow] < diag) { + hypre_assert(IsInMIS(pilut_map[ucolind[l]])); + lr[lastlr] = newiperm[ucolind[l]-firstrow]; + lastlr++; + } + + /* Create fill */ + jr[ucolind[l]] = lastjr; + jw[lastjr] = ucolind[l]; + w[lastjr] = -mult*uvalues[l]; + lastjr++; + } + else + w[m] -= mult*uvalues[l]; } } /* L non-zeros */ @@ -895,7 +896,7 @@ void hypre_FactorLocal(FactorMatType *ldu, ReduceMatType *rmat, } #ifdef HYPRE_TIMING hypre_EndTiming( globals->FL_timer ); -#endif +#endif } @@ -946,7 +947,7 @@ HYPRE_Int hypre_SeperateLU_byDIAG( HYPRE_Int diag, HYPRE_Int *newiperm, #ifdef HYPRE_TIMING hypre_BeginTiming( globals->SLUD_timer ); -#endif +#endif /* Perform a Qsort type pass to seperate L and U (rmat) entries. */ if (lastjr == 1) @@ -956,14 +957,14 @@ HYPRE_Int hypre_SeperateLU_byDIAG( HYPRE_Int diag, HYPRE_Int *newiperm, first = lastjr-1; while (true) { while (last < first && /* while (last < first AND [last] is in L) */ - (jw[last] >= firstrow && - jw[last] < lastrow && - newiperm[jw[last]-firstrow] < diag)) + (jw[last] >= firstrow && + jw[last] < lastrow && + newiperm[jw[last]-firstrow] < diag)) last++; while (last < first && /* while (last < first AND [first] is not in L) */ - ! (jw[first] >= firstrow && - jw[first] < lastrow && - newiperm[jw[first]-firstrow] < diag)) + ! (jw[first] >= firstrow && + jw[first] < lastrow && + newiperm[jw[first]-firstrow] < diag)) first--; if (last < first) { @@ -974,8 +975,8 @@ HYPRE_Int hypre_SeperateLU_byDIAG( HYPRE_Int diag, HYPRE_Int *newiperm, if (last == first) { if ((jw[last] >= firstrow && /* if [last] is in L */ - jw[last] < lastrow && - newiperm[jw[last]-firstrow] < diag)) { + jw[last] < lastrow && + newiperm[jw[last]-firstrow] < diag)) { first++; last++; } @@ -991,21 +992,21 @@ HYPRE_Int hypre_SeperateLU_byDIAG( HYPRE_Int diag, HYPRE_Int *newiperm, #ifndef NDEBUG /* DEBUGGING: verify sorting to some extent */ for (itmp=1; itmp= firstrow && /* [itmp] is in L -- must be MIS */ - jw[itmp] < lastrow && - newiperm[jw[itmp]-firstrow] < diag)); - assert(IsInMIS(pilut_map[jw[itmp]])); + hypre_assert((jw[itmp] >= firstrow && /* [itmp] is in L -- must be MIS */ + jw[itmp] < lastrow && + newiperm[jw[itmp]-firstrow] < diag)); + hypre_assert(IsInMIS(pilut_map[jw[itmp]])); } for (itmp=first; itmp= firstrow && /* [itmp] is not in L -- may be MIS still */ - jw[itmp] < lastrow && - newiperm[jw[itmp]-firstrow] < diag)); + hypre_assert(!(jw[itmp] >= firstrow && /* [itmp] is not in L -- may be MIS still */ + jw[itmp] < lastrow && + newiperm[jw[itmp]-firstrow] < diag)); } - assert(last == first); + hypre_assert(last == first); #endif #ifdef HYPRE_TIMING hypre_EndTiming( globals->SLUD_timer ); -#endif +#endif return first; @@ -1030,7 +1031,7 @@ HYPRE_Int hypre_SeperateLU_byMIS( hypre_PilutSolverGlobals *globals ) #ifdef HYPRE_TIMING hypre_BeginTiming( globals->SLUM_timer ); -#endif +#endif /* Perform a Qsort type pass to seperate L and U (rmat) entries. */ if (lastjr == 1) @@ -1067,15 +1068,15 @@ HYPRE_Int hypre_SeperateLU_byMIS( hypre_PilutSolverGlobals *globals ) #ifndef NDEBUG /* DEBUGGING: verify sorting to some extent */ for (itmp=1; itmpSLUM_timer ); -#endif +#endif return first; @@ -1097,7 +1098,7 @@ void hypre_UpdateL(HYPRE_Int lrow, HYPRE_Int last, FactorMatType *ldu, #ifdef HYPRE_TIMING hypre_BeginTiming( globals->UL_timer ); -#endif +#endif lcolind = ldu->lcolind; lvalues = ldu->lvalues; @@ -1115,13 +1116,13 @@ void hypre_UpdateL(HYPRE_Int lrow, HYPRE_Int last, FactorMatType *ldu, else { min = start; /* find min and replace if i is larger */ for (j=start+1; jUL_timer ); -#endif +#endif } /************************************************************************* -* This function forms the new reduced row corresponding to +* This function forms the new reduced row corresponding to * the given row, assuming that the * workspace has already been split into L and U (rmat) entries. It reuses * the memory for the row in the reduced matrix, storing the new row into @@ -1145,7 +1146,7 @@ void hypre_UpdateL(HYPRE_Int lrow, HYPRE_Int last, FactorMatType *ldu, **************************************************************************/ void hypre_FormNRmat(HYPRE_Int rrow, HYPRE_Int first, ReduceMatType *nrmat, HYPRE_Int max_rowlen, - HYPRE_Int in_rowlen, HYPRE_Int *in_colind, HYPRE_Real *in_values, + HYPRE_Int in_rowlen, HYPRE_Int *in_colind, HYPRE_Real *in_values, hypre_PilutSolverGlobals *globals ) { HYPRE_Int nz, max, j, out_rowlen, *rcolind; @@ -1153,9 +1154,9 @@ void hypre_FormNRmat(HYPRE_Int rrow, HYPRE_Int first, ReduceMatType *nrmat, #ifdef HYPRE_TIMING hypre_BeginTiming( globals->FNR_timer ); -#endif +#endif - assert(in_colind[0] == jw[0]); /* diagonal at the beginning */ + hypre_assert(in_colind[0] == jw[0]); /* diagonal at the beginning */ /* check to see if we need to reallocate space */ out_rowlen = hypre_min( max_rowlen, lastjr-first+1 ); @@ -1182,26 +1183,26 @@ void hypre_FormNRmat(HYPRE_Int rrow, HYPRE_Int first, ReduceMatType *nrmat, rcolind[nz] = jw[j]; rvalues[nz] = w[j]; } - assert(nz == lastjr-first+1); + hypre_assert(nz == lastjr-first+1); } else { /* Keep largest out_rowlen elements in the reduced row */ for (nz=1; nz fabs(w[max])) - max = j; + if (fabs(w[j]) > fabs(w[max])) + max = j; } - + rcolind[nz] = jw[max]; /* store max */ rvalues[nz] = w[max]; - + jw[max] = jw[--lastjr]; /* swap max out */ w[max] = w[ lastjr]; } - assert(nz == out_rowlen); + hypre_assert(nz == out_rowlen); } - assert(nz <= max_rowlen); - + hypre_assert(nz <= max_rowlen); + /* link the reused storage to the new reduced system */ nrmat->rmat_rnz[rrow] = nz; nrmat->rmat_rrowlen[rrow] = out_rowlen; @@ -1210,7 +1211,7 @@ void hypre_FormNRmat(HYPRE_Int rrow, HYPRE_Int first, ReduceMatType *nrmat, #ifdef HYPRE_TIMING hypre_EndTiming( globals->FNR_timer ); -#endif +#endif } @@ -1222,7 +1223,7 @@ void hypre_FormNRmat(HYPRE_Int rrow, HYPRE_Int first, ReduceMatType *nrmat, * the memory used by the row in the reduced matrix. **************************************************************************/ void hypre_FormDU(HYPRE_Int lrow, HYPRE_Int first, FactorMatType *ldu, - HYPRE_Int *rcolind, HYPRE_Real *rvalues, HYPRE_Real tol, + HYPRE_Int *rcolind, HYPRE_Real *rvalues, HYPRE_Real tol, hypre_PilutSolverGlobals *globals ) { HYPRE_Int nz, max, j, end; @@ -1233,7 +1234,7 @@ void hypre_FormDU(HYPRE_Int lrow, HYPRE_Int first, FactorMatType *ldu, uerowptr = ldu->uerowptr; uvalues = ldu->uvalues; - /* + /* * Take care of the diagonal */ if (w[0] == 0.0) { @@ -1243,33 +1244,33 @@ void hypre_FormDU(HYPRE_Int lrow, HYPRE_Int first, FactorMatType *ldu, else ldu->dvalues[lrow] = 1.0/w[0]; - /* + /* * Take care of the elements of U * Note U is completely empty beforehand. */ end = ldu->uerowptr[lrow]; - assert(ldu->usrowptr[lrow] == ldu->uerowptr[lrow]); + hypre_assert(ldu->usrowptr[lrow] == ldu->uerowptr[lrow]); for (nz=0; nzfirst; nz++) { /* The entries [first, lastjr) are part of U */ max = first; for (j=first+1; j fabs(w[max])) - max = j; + max = j; } ucolind[end] = jw[max]; /* store max */ uvalues[end] = w[max]; end++; - + jw[max] = jw[--lastjr]; /* swap max out */ w[max] = w[ lastjr]; } uerowptr[lrow] = end; /* free the row storage */ - free( rcolind ); rcolind = NULL; - free( rvalues ); rvalues = NULL; + hypre_TFree( rcolind ,HYPRE_MEMORY_HOST); + hypre_TFree( rvalues ,HYPRE_MEMORY_HOST); } @@ -1294,7 +1295,7 @@ void hypre_EraseMap(CommInfoType *cinfo, HYPRE_Int *newperm, HYPRE_Int nmis, #endif /* clear map of all MIS rows */ - for (i=ndone; irnbrind = hypre_idx_malloc(npes, "hypre_ComputeCommInfo: cinfo->rnbrind"); cinfo->rrowind = hypre_idx_malloc(nleft, "hypre_ComputeCommInfo: cinfo->rrowind"); cinfo->rnbrptr = hypre_idx_malloc(npes+1, "hypre_ComputeCommInfo: cinfo->rnbrptr"); - + cinfo->snbrind = hypre_idx_malloc(npes, "hypre_ComputeCommInfo: cinfo->snbrind"); cinfo->snbrptr = hypre_idx_malloc(npes+1, "hypre_ComputeCommInfo: cinfo->snbrptr"); diff --git a/src/distributed_ls/pilut/parutil.c b/src/distributed_ls/pilut/parutil.c index ce6d75bc4..16985fb0b 100644 --- a/src/distributed_ls/pilut/parutil.c +++ b/src/distributed_ls/pilut/parutil.c @@ -160,30 +160,25 @@ void *hypre_mymalloc(HYPRE_Int nbytes,const char *msg) /************************************************************************* -* This function is my wrapper around free, allows multiple pointers +* This function is my wrapper around free, allows multiple pointers **************************************************************************/ #if 0 void hypre_free_multi(void *ptr1,...) { - va_list plist; - void *ptr; - - if (ptr1 != NULL) - free(ptr1); - ptr1 = NULL; + va_list plist; + void *ptr; - va_start(plist, ptr1); + hypre_TFree(ptr1, HYPRE_MEMORY_HOST); - while ( (ptr = va_arg(plist, void *)) != ((void *) -1) ) { - if (ptr != NULL) - free(ptr); - ptr = NULL; - } + va_start(plist, ptr1); - va_end(plist); + while ( (ptr = va_arg(plist, void *)) != ((void *) -1) ) { + hypre_TFree(ptr, HYPRE_MEMORY_HOST); + } -} -#endif + va_end(plist); +} +#endif /************************************************************************* * The following function copies an HYPRE_Int (HYPRE_Int) array diff --git a/src/distributed_ls/pilut/serilut.c b/src/distributed_ls/pilut/serilut.c index 9cec49622..3c7278b77 100644 --- a/src/distributed_ls/pilut/serilut.c +++ b/src/distributed_ls/pilut/serilut.c @@ -39,9 +39,10 @@ **************************************************************************/ HYPRE_Int hypre_SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, FactorMatType *ldu, - ReduceMatType *rmat, HYPRE_Int maxnz, HYPRE_Real tol, + ReduceMatType *rmat, HYPRE_Int maxnz, HYPRE_Real tol, hypre_PilutSolverGlobals *globals) { + HYPRE_Int logging = globals ? globals->logging : 0; HYPRE_Int i, ii, j, k, kk, l, m, ierr, diag_present; HYPRE_Int *perm, *iperm, *usrowptr, *uerowptr, *ucolind; @@ -67,13 +68,13 @@ HYPRE_Int hypre_SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, iperm = ldu->iperm; /* Allocate work space */ - if (jr) { free(jr); jr = NULL; } + hypre_TFree(jr, HYPRE_MEMORY_HOST); jr = hypre_idx_malloc_init(nrows, -1, "hypre_SerILUT: jr"); - if (lr) { free(lr); lr = NULL; } + hypre_TFree(lr, HYPRE_MEMORY_HOST); lr = hypre_idx_malloc_init(nrows, -1, "hypre_SerILUT: lr"); - if (jw) { free(jw); jw = NULL; } + hypre_TFree(jw, HYPRE_MEMORY_HOST); jw = hypre_idx_malloc(nrows, "hypre_SerILUT: jw"); - if (w) { free(w); w = NULL; } + hypre_TFree(w, HYPRE_MEMORY_HOST); w = hypre_fp_malloc(nrows, "hypre_SerILUT: w" ); /* Find structural union of local rows */ @@ -119,7 +120,10 @@ HYPRE_Int hypre_SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, nbnd = lnrows - nlocal ; #ifdef HYPRE_DEBUG - hypre_printf("nbnd = %d, lnrows=%d, nlocal=%d\n", nbnd, lnrows, nlocal ); + if (logging) + { + hypre_printf("nbnd = %d, lnrows=%d, nlocal=%d\n", nbnd, lnrows, nlocal ); + } #endif ldu->nnodes[0] = nlocal; @@ -193,7 +197,7 @@ HYPRE_Int hypre_SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, w[jr[k]] = mult; if (fabs(mult) < rtol) - continue; /* First drop test */ + continue;/* First drop test */ for (l=usrowptr[kk]; l= firstrow && - col_ind[j] < lastrow && - iperm[col_ind[j]-firstrow] < nlocal) + col_ind[j] < lastrow && + iperm[col_ind[j]-firstrow] < nlocal) lr[lastlr++] = iperm[col_ind[j]-firstrow]; /* Copy the L elements separately */ if (col_ind[j] != i+firstrow) { /* Off-diagonal element */ @@ -289,7 +293,7 @@ HYPRE_Int hypre_SerILUT(DataDistType *ddist, HYPRE_DistributedMatrix matrix, w[jr[k]] = mult; if (fabs(mult) < rtol) - continue; /* First drop test */ + continue;/* First drop test */ for (l=usrowptr[kk]; ldvalues[lrow] = 1.0/w[0]; else { /* zero pivot */ @@ -632,8 +636,8 @@ void hypre_SecondDrop(HYPRE_Int maxnz, HYPRE_Real tol, HYPRE_Int row, * This version keeps only maxnzkeep **************************************************************************/ void hypre_SecondDropUpdate(HYPRE_Int maxnz, HYPRE_Int maxnzkeep, HYPRE_Real tol, HYPRE_Int row, - HYPRE_Int nlocal, HYPRE_Int *perm, HYPRE_Int *iperm, - FactorMatType *ldu, ReduceMatType *rmat, + HYPRE_Int nlocal, HYPRE_Int *perm, HYPRE_Int *iperm, + FactorMatType *ldu, ReduceMatType *rmat, hypre_PilutSolverGlobals *globals ) { HYPRE_Int i, j, nl; @@ -670,14 +674,14 @@ void hypre_SecondDropUpdate(HYPRE_Int maxnz, HYPRE_Int maxnzkeep, HYPRE_Real tol last = 1, first = lastjr-1; while (1) { while (last < first && /* and [last] is L */ - jw[last] >= firstrow && - jw[last] < lastrow && - iperm[jw[last]-firstrow] < nlocal) + jw[last] >= firstrow && + jw[last] < lastrow && + iperm[jw[last]-firstrow] < nlocal) last++; while (last < first && /* and [first] is not L */ - !(jw[first] >= firstrow && - jw[first] < lastrow && - iperm[jw[first]-firstrow] < nlocal)) + !(jw[first] >= firstrow && + jw[first] < lastrow && + iperm[jw[first]-firstrow] < nlocal)) first--; if (last < first) { @@ -688,8 +692,8 @@ void hypre_SecondDropUpdate(HYPRE_Int maxnz, HYPRE_Int maxnzkeep, HYPRE_Real tol if (last == first) { if (jw[last] >= firstrow && - jw[last] < lastrow && - iperm[jw[last]-firstrow] < nlocal) { + jw[last] < lastrow && + iperm[jw[last]-firstrow] < nlocal) { first++; last++; } diff --git a/src/distributed_ls/pilut/trifactor.c b/src/distributed_ls/pilut/trifactor.c index 91ab24c62..aed5d7a41 100644 --- a/src/distributed_ls/pilut/trifactor.c +++ b/src/distributed_ls/pilut/trifactor.c @@ -64,7 +64,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR ux = ldu->ux; /****************************************************************** - * Do the L(lx) = b, first + * Do the L(lx) = b, first *******************************************************************/ snbrpes = ldu->lcomm.snbrpes; spes = ldu->lcomm.spes; @@ -92,7 +92,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR * For forward substitution we do local+1st MIS == nnodes[1] (NOT [0]!) */ for (i=0; i auxsptr[i] && sindex[auxsptr[i]]ucomm.snbrpes; spes = ldu->ucomm.spes; @@ -190,7 +190,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR * by construction all remote lx elements needed are filled in */ for (i=nnodes[ii]-1; i>=nnodes[ii-1]; i--) { xx = 0.0; - for (j=rowptr[i]; j auxsptr[i] && sindex[auxsptr[i]]>=nnodes[ii-1]) { /* Something to send */ - for (j=auxsptr[i], l=0; j=nnodes[ii-1]; j++, l++) + for (j=auxsptr[i], l=0; j=nnodes[ii-1]; j++, l++) gatherbuf[l] = ux[sindex[j]]; hypre_MPI_Send( gatherbuf, l, hypre_MPI_REAL, @@ -244,7 +244,7 @@ void hypre_LDUSolve(DataDistType *ddist, FactorMatType *ldu, HYPRE_Real *x, HYPR /* Do the local next */ for (i=nnodes[0]-1; i>=0; i--) { xx = 0.0; - for (j=rowptr[i]; jsnbrpes == snbrpes ); + hypre_assert( TriSolveComm->snbrpes == snbrpes ); /* Create a sptr array into sindex */ for (i=1; iddist_nrows ); + hypre_assert( k < ddist->ddist_nrows ); } } /* this last one is to compute (raddr[i+1] - raddr[i]) */ raddr[rnbrpes] = x + k + lnrows; - assert( TriSolveComm->rnbrpes == rnbrpes ); + hypre_assert( TriSolveComm->rnbrpes == rnbrpes ); /* complete asynchronous receives */ for (i=0; i + +# Writing hypre documentation + +The hypre documentation is written in reStructuredText and built through a +combination of Sphinx, doxygen, and breathe. The User Manual source files are +in the directory `usr-manual` with top-level file `index.rst`. The Reference +Manual is in `ref-manual`, but the actual content is in the hypre header files. + +## Installing the utilities needed to build the documentation + +Building the documentation requires a number of things to be installed. To help +keep everything consistent and manageable, it is best to create a Python virtual +environment (venv) that contains all of the python packages that are required. +This venv can be turned on and off as needed. The following will install the +venv in the directory `~/python-hypre`: + + mkdir ~/python-hypre + cd ~/python-hypre + python3 -m venv env + +This creates a subdirectory called `env` that will contain the venv packages. +The following will install the various packages that are needed: + + cd ~/python-hypre + source env/bin/activate + + pip install --upgrade pip + + pip install sphinx + pip install breathe + + deactivate + +Unfortunately, Sphinx uses a lot of latex packages, so it may be necessary to +install a pretty complete version of TexLive. This installation takes a while, +but since we all use latex extensively, it's worth doing this for other reasons. +First, download the following and untar it somewhere: + + http://mirror.ctan.org/systems/texlive/tlnet/install-tl-unx.tar.gz + tar xzf install-tl-unx.tar.gz + +Now, `cd` into the untarred directory created above, type the following perl +command, then use the sequence of menu commands to change the install directory +to your home directory (here, it's set to `~/texlive/2019`) and install: + + perl install-tl + D + 1 + ~/texlive/2019 + R + I + +Add `~/texlive/2019/bin/x86_64-linux` to your `PATH` and do `which pdflatex` to +verify that you did it correctly. + +## Building the documentation + +To build the documentation, first activate the virtual environment: + + source ~/python-hypre/env/bin/activate + +Now, just type `make` in the `src/docs` directory to build the documentation. +When you are finished editing and building, turn off the virtual environment: + + deactivate + +To view the output, open a browser and navigate to the following links to see +the user and reference manuals (adjust the path as needed): + + file:///home/falgout2/hypre/src/docs/usr-manual-html/index.html + file:///home/falgout2/hypre/src/docs/ref-manual-html/index.html + +## Some useful links + +Sphinx: + +- http://www.sphinx-doc.org/en/stable/ +- http://www.sphinx-doc.org/en/stable/examples.html +- https://alabaster.readthedocs.io/en/latest/index.html + +reStructuredText: + +- https://docutils.sourceforge.io/rst.html + +Doxygen: + +- http://www.doxygen.nl/manual/index.html + +Breathe: + +- https://breathe.readthedocs.io/en/latest/index.html + diff --git a/src/docs/ref-manual/Makefile b/src/docs/ref-manual/Makefile index 66205f9c0..4aaecbeb6 100644 --- a/src/docs/ref-manual/Makefile +++ b/src/docs/ref-manual/Makefile @@ -10,12 +10,12 @@ PDFLATEX = pdflatex --interaction=nonstopmode ################################################################## # all: html pdf -all: html pdf +all: pdf install: clean: - @/bin/rm -fr html latex + @/bin/rm -fr html latex xml distclean: clean diff --git a/src/docs/ref-manual/conf.doxygen b/src/docs/ref-manual/conf.doxygen index 428f0d8c4..0b36c5b9c 100644 --- a/src/docs/ref-manual/conf.doxygen +++ b/src/docs/ref-manual/conf.doxygen @@ -1,1480 +1,2288 @@ -# Doxyfile 1.5.7.1 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = hypre - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = 2.18.2 - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = . - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, -# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, -# Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, -# Spanish, Swedish, and Ukrainian. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = NO - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = NO - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 8 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = NO - -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols - -SYMBOL_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = NO - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = YES - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = YES - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = YES - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = NO - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command , where is the value of -# the FILE_VERSION_FILTER tag, and is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by -# doxygen. The layout file controls the global structure of the generated output files -# in an output format independent way. The create the layout file that represents -# doxygen's defaults, run doxygen with the -l option. You can optionally specify a -# file name after the option, if omitted DoxygenLayout.xml will be used as the name -# of the layout file. - -LAYOUT_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = ../../struct_mv/HYPRE_struct_mv.h \ - ../../sstruct_mv/HYPRE_sstruct_mv.h \ - ../../IJ_mv/HYPRE_IJ_mv.h \ - ../../struct_ls/HYPRE_struct_ls.h \ - ../../sstruct_ls/HYPRE_sstruct_ls.h \ - ../../parcsr_ls/HYPRE_parcsr_ls.h \ - ../../krylov/HYPRE_krylov.h \ - ../../krylov/HYPRE_lobpcg.h - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = HYPRE*.h - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = YES - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command , where -# is the value of the INPUT_FILTER tag, and is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. Otherwise they will link to the documentstion. - -REFERENCES_LINK_SOURCE = NO - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = YES - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 4 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = NO - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. -# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER -# are set, an additional index file will be generated that can be used as input for -# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated -# HTML documentation. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can -# be used to specify the file name of the resulting .qch file. -# The path specified is relative to the HTML output folder. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# Qt Help Project / Namespace. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# Qt Help Project / Virtual Folders. - -QHP_VIRTUAL_FOLDER = doc - -# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can -# be used to specify the location of Qt's qhelpgenerator. -# If non-empty doxygen will try to run qhelpgenerator on the generated -# .qhp file . - -QHG_LOCATION = - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to FRAME, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, -# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are -# probably better off using the HTML help feature. Other possible values -# for this tag are: HIERARCHIES, which will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list; -# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which -# disables this behavior completely. For backwards compatibility with previous -# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE -# respectively. - -GENERATE_TREEVIEW = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = YES - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = letter - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. This is useful -# if you want to understand what is going on. On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = NO - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = NO - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans - -# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. -# The default size is 10pt. - -DOT_FONTSIZE = 10 - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = NO - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = NO - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not -# seem to support this out of the box. Warning: Depending on the platform used, -# enabling this option may lead to badly anti-aliased labels on the edges of -# a graph (i.e. they become hard to read). - -DOT_TRANSPARENT = NO - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = NO - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to the search engine -#--------------------------------------------------------------------------- - -# The SEARCHENGINE tag specifies whether or not a search engine should be -# used. If set to NO the values of all tags below this one will be ignored. - -SEARCHENGINE = NO +# Doxyfile 1.8.5 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a double hash (##) is considered a comment and is placed in +# front of the TAG it is preceding. +# +# All text after a single hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists, items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (\" \"). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all text +# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv +# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv +# for the list of possible encodings. +# The default value is: UTF-8. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by +# double-quotes, unless you are using Doxywizard) that should identify the +# project for which the documentation is generated. This name is used in the +# title of most generated pages and in a few other places. +# The default value is: My Project. + +PROJECT_NAME = hypre + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. This +# could be handy for archiving the generated documentation or if some version +# control system is used. + +PROJECT_NUMBER = 2.21.0 + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer a +# quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = + +# With the PROJECT_LOGO tag one can specify an logo or icon that is included in +# the documentation. The maximum height of the logo should not exceed 55 pixels +# and the maximum width should not exceed 200 pixels. Doxygen will copy the logo +# to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path +# into which the generated documentation will be written. If a relative path is +# entered, it will be relative to the location where doxygen was started. If +# left blank the current directory will be used. + +OUTPUT_DIRECTORY = . + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub- +# directories (in 2 levels) under the output directory of each output format and +# will distribute the generated files over these directories. Enabling this +# option can be useful when feeding doxygen a huge amount of source files, where +# putting all generated files in the same directory would otherwise causes +# performance problems for the file system. +# The default value is: NO. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# Possible values are: Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese- +# Traditional, Croatian, Czech, Danish, Dutch, English, Esperanto, Farsi, +# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en, +# Korean, Korean-en, Latvian, Norwegian, Macedonian, Persian, Polish, +# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, +# Turkish, Ukrainian and Vietnamese. +# The default value is: English. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES doxygen will include brief member +# descriptions after the members that are listed in the file and class +# documentation (similar to Javadoc). Set to NO to disable this. +# The default value is: YES. + +BRIEF_MEMBER_DESC = NO + +# If the REPEAT_BRIEF tag is set to YES doxygen will prepend the brief +# description of a member or function before the detailed description +# +# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. +# The default value is: YES. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator that is +# used to form the text in various listings. Each string in this list, if found +# as the leading text of the brief description, will be stripped from the text +# and the result, after processing the whole list, is used as the annotated +# text. Otherwise, the brief description is used as-is. If left blank, the +# following values are used ($name is automatically replaced with the name of +# the entity):The $name class, The $name widget, The $name file, is, provides, +# specifies, contains, represents, a, an and the. + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# doxygen will generate a detailed section even if there is only a brief +# description. +# The default value is: NO. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. +# The default value is: NO. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES doxygen will prepend the full path +# before files name in the file list and in the header files. If set to NO the +# shortest path that makes the file name unique will be used +# The default value is: YES. + +FULL_PATH_NAMES = NO + +# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. +# Stripping is only done if one of the specified strings matches the left-hand +# part of the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the path to +# strip. +# +# Note that you can specify absolute paths here, but also relative paths, which +# will be relative from the directory where doxygen is started. +# This tag requires that the tag FULL_PATH_NAMES is set to YES. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the +# path mentioned in the documentation of a class, which tells the reader which +# header file to include in order to use a class. If left blank only the name of +# the header file containing the class definition is used. Otherwise one should +# specify the list of include paths that are normally passed to the compiler +# using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but +# less readable) file names. This can be useful is your file systems doesn't +# support long names like on DOS, Mac, or CD-ROM. +# The default value is: NO. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the +# first line (until the first dot) of a Javadoc-style comment as the brief +# description. If set to NO, the Javadoc-style will behave just like regular Qt- +# style comments (thus requiring an explicit @brief command for a brief +# description.) +# The default value is: NO. + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first +# line (until the first dot) of a Qt-style comment as the brief description. If +# set to NO, the Qt-style will behave just like regular Qt-style comments (thus +# requiring an explicit \brief command for a brief description.) +# The default value is: NO. + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a +# multi-line C++ special comment block (i.e. a block of //! or /// comments) as +# a brief description. This used to be the default behavior. The new default is +# to treat a multi-line C++ comment block as a detailed description. Set this +# tag to YES if you prefer the old behavior instead. +# +# Note that setting this tag to YES also means that rational rose comments are +# not recognized any more. +# The default value is: NO. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the +# documentation from any documented member that it re-implements. +# The default value is: YES. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce a +# new page for each member. If set to NO, the documentation of a member will be +# part of the file/class/namespace that contains it. +# The default value is: NO. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen +# uses this value to replace tabs by spaces in code fragments. +# Minimum value: 1, maximum value: 16, default value: 4. + +TAB_SIZE = 4 + +# This tag can be used to specify a number of aliases that act as commands in +# the documentation. An alias has the form: +# name=value +# For example adding +# "sideeffect=@par Side Effects:\n" +# will allow you to put the command \sideeffect (or @sideeffect) in the +# documentation, which will result in a user-defined paragraph with heading +# "Side Effects:". You can put \n's in the value part of an alias to insert +# newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding "class=itcl::class" +# will allow you to use the command class in the itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. For +# instance, some of the names that are used will be different. The list of all +# members will be omitted, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or +# Python sources only. Doxygen will then generate output that is more tailored +# for that language. For instance, namespaces will be presented as packages, +# qualified scopes will look different, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources. Doxygen will then generate output that is tailored for Fortran. +# The default value is: NO. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for VHDL. +# The default value is: NO. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given +# extension. Doxygen has a built-in mapping, but you can override or extend it +# using this tag. The format is ext=language, where ext is a file extension, and +# language is one of the parsers supported by doxygen: IDL, Java, Javascript, +# C#, C, C++, D, PHP, Objective-C, Python, Fortran, VHDL. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. +# +# Note For files without extension you can use no_extension as a placeholder. +# +# Note that for custom extensions you also need to set FILE_PATTERNS otherwise +# the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments +# according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you can +# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in +# case of backward compatibilities issues. +# The default value is: YES. + +MARKDOWN_SUPPORT = YES + +# When enabled doxygen tries to link words that correspond to documented +# classes, or namespaces to their corresponding documentation. Such a link can +# be prevented in individual cases by by putting a % sign in front of the word +# or globally by setting AUTOLINK_SUPPORT to NO. +# The default value is: YES. + +AUTOLINK_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should set this +# tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); +# versus func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. +# The default value is: NO. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. +# The default value is: NO. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: +# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# will parse them like normal C++ but will assume all classes use public instead +# of private inheritance when no explicit protection keyword is present. +# The default value is: NO. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate +# getter and setter methods for a property. Setting this option to YES will make +# doxygen to replace the get and set methods by a property in the documentation. +# This will only work if the methods are indeed getting or setting a simple +# type. If this is not the case, or you want to show the methods anyway, you +# should set this option to NO. +# The default value is: YES. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. +# The default value is: NO. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES to allow class member groups of the same type +# (for instance a group of public functions) to be put as a subgroup of that +# type (e.g. under the Public Functions section). Set it to NO to prevent +# subgrouping. Alternatively, this can be done per class using the +# \nosubgrouping command. +# The default value is: YES. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions +# are shown inside the group in which they are included (e.g. using \ingroup) +# instead of on a separate page (for HTML and Man pages) or section (for LaTeX +# and RTF). +# +# Note that this feature does not work in combination with +# SEPARATE_MEMBER_PAGES. +# The default value is: NO. + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions +# with only public data fields or simple typedef fields will be shown inline in +# the documentation of the scope in which they are defined (i.e. file, +# namespace, or group documentation), provided this scope is documented. If set +# to NO, structs, classes, and unions are shown on a separate page (for HTML and +# Man pages) or section (for LaTeX and RTF). +# The default value is: NO. + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or +# enum is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically be +# useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. +# The default value is: NO. + +TYPEDEF_HIDES_STRUCT = NO + +# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This +# cache is used to resolve symbols given their name and scope. Since this can be +# an expensive process and often the same symbol appears multiple times in the +# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small +# doxygen will become slower. If the cache is too large, memory is wasted. The +# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range +# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 +# symbols. At the end of a run doxygen will report the cache usage and suggest +# the optimal cache size from a speed point of view. +# Minimum value: 0, maximum value: 9, default value: 0. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. Private +# class members and static file members will be hidden unless the +# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. +# Note: This will also disable the warnings about undocumented members that are +# normally produced when WARNINGS is set to YES. +# The default value is: NO. + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will +# be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. +# The default value is: NO. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file will be +# included in the documentation. +# The default value is: NO. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) defined +# locally in source files will be included in the documentation. If set to NO +# only classes defined in header files are included. Does not have any effect +# for Java sources. +# The default value is: YES. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. When set to YES local methods, +# which are defined in the implementation section but not in the interface are +# included in the documentation. If set to NO only methods in the interface are +# included. +# The default value is: NO. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base name of +# the file that contains the anonymous namespace. By default anonymous namespace +# are hidden. +# The default value is: NO. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all +# undocumented members inside documented classes or files. If set to NO these +# members will be included in the various overviews, but no documentation +# section is generated. This option has no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_MEMBERS = YES + +# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. If set +# to NO these classes will be included in the various overviews. This option has +# no effect if EXTRACT_ALL is enabled. +# The default value is: NO. + +HIDE_UNDOC_CLASSES = YES + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend +# (class|struct|union) declarations. If set to NO these declarations will be +# included in the documentation. +# The default value is: NO. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any +# documentation blocks found inside the body of a function. If set to NO these +# blocks will be appended to the function's detailed documentation block. +# The default value is: NO. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation that is typed after a +# \internal command is included. If the tag is set to NO then the documentation +# will be excluded. Set it to YES to include the internal documentation. +# The default value is: NO. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file +# names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. +# The default value is: system dependent. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with +# their full class and namespace scopes in the documentation. If set to YES the +# scope will be hidden. +# The default value is: NO. + +HIDE_SCOPE_NAMES = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of +# the files that are included by a file in the documentation of that file. +# The default value is: YES. + +SHOW_INCLUDE_FILES = NO + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include +# files with double quotes in the documentation rather than with sharp brackets. +# The default value is: NO. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the +# documentation for inline members. +# The default value is: YES. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the +# (detailed) documentation of file and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: YES. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief +# descriptions of file, namespace and class members alphabetically by member +# name. If set to NO the members will appear in declaration order. +# The default value is: NO. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the +# (brief and detailed) documentation of class members so that constructors and +# destructors are listed first. If set to NO the constructors will appear in the +# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. +# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief +# member documentation. +# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting +# detailed member documentation. +# The default value is: NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy +# of group names into alphabetical order. If set to NO the group names will +# appear in their defined order. +# The default value is: NO. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by +# fully-qualified names, including namespaces. If set to NO, the class list will +# be sorted only by class name, not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the alphabetical +# list. +# The default value is: NO. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper +# type resolution of all parameters of a function it will reject a match between +# the prototype and the implementation of a member function even if there is +# only one candidate or it is obvious which candidate to choose by doing a +# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still +# accept a match between prototype and implementation in such cases. +# The default value is: NO. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable ( YES) or disable ( NO) the +# todo list. This list is created by putting \todo commands in the +# documentation. +# The default value is: YES. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable ( YES) or disable ( NO) the +# test list. This list is created by putting \test commands in the +# documentation. +# The default value is: YES. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable ( YES) or disable ( NO) the bug +# list. This list is created by putting \bug commands in the documentation. +# The default value is: YES. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable ( YES) or disable ( NO) +# the deprecated list. This list is created by putting \deprecated commands in +# the documentation. +# The default value is: YES. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional documentation +# sections, marked by \if ... \endif and \cond +# ... \endcond blocks. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the +# initial value of a variable or macro / define can have for it to appear in the +# documentation. If the initializer consists of more lines than specified here +# it will be hidden. Use a value of 0 to hide initializers completely. The +# appearance of the value of individual variables and macros / defines can be +# controlled using \showinitializer or \hideinitializer command in the +# documentation regardless of this setting. +# Minimum value: 0, maximum value: 10000, default value: 30. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at +# the bottom of the documentation of classes and structs. If set to YES the list +# will mention the files that were used to generate the documentation. +# The default value is: YES. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This +# will remove the Files entry from the Quick Index and from the Folder Tree View +# (if specified). +# The default value is: YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces +# page. This will remove the Namespaces entry from the Quick Index and from the +# Folder Tree View (if specified). +# The default value is: YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command command input-file, where command is the value of the +# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided +# by doxygen. Whatever the program writes to standard output is used as the file +# version. For an example see the documentation. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. You can +# optionally specify a file name after the option, if omitted DoxygenLayout.xml +# will be used as the name of the layout file. +# +# Note that if you run doxygen from a directory containing a file called +# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE +# tag is left empty. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files containing +# the reference definitions. This must be a list of .bib files. The .bib +# extension is automatically appended if omitted. This requires the bibtex tool +# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# For LaTeX the style of the bibliography can be controlled using +# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the +# search path. Do not use file names with spaces, bibtex cannot handle them. See +# also \cite for info how to create references. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# Configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated to +# standard output by doxygen. If QUIET is set to YES this implies that the +# messages are off. +# The default value is: NO. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated to standard error ( stderr) by doxygen. If WARNINGS is set to YES +# this implies that the warnings are on. +# +# Tip: Turn warnings on while writing the documentation. +# The default value is: YES. + +WARNINGS = YES + +# If the WARN_IF_UNDOCUMENTED tag is set to YES, then doxygen will generate +# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: YES. + +WARN_IF_UNDOCUMENTED = YES + +# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some parameters +# in a documented function, or documenting parameters that don't exist or using +# markup commands wrongly. +# The default value is: YES. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that +# are documented, but have no documentation for their parameters or return +# value. If set to NO doxygen will only warn about wrong or incomplete parameter +# documentation, but not about the absence of documentation. +# The default value is: NO. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that doxygen +# can produce. The string should contain the $file, $line, and $text tags, which +# will be replaced by the file and line number from which the warning originated +# and the warning text. Optionally the format may contain $version, which will +# be replaced by the version of the file (if it could be obtained via +# FILE_VERSION_FILTER) +# The default value is: $file:$line: $text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning and error +# messages should be written. If left blank the output is written to standard +# error (stderr). + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# Configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag is used to specify the files and/or directories that contain +# documented source files. You may enter file names like myfile.cpp or +# directories like /usr/src/myproject. Separate the files or directories with +# spaces. +# Note: If this tag is empty the current directory is searched. + +INPUT = \ +../../struct_mv/HYPRE_struct_mv.h \ +../../sstruct_mv/HYPRE_sstruct_mv.h \ +../../IJ_mv/HYPRE_IJ_mv.h \ +../../struct_ls/HYPRE_struct_ls.h \ +../../sstruct_ls/HYPRE_sstruct_ls.h \ +../../parcsr_ls/HYPRE_parcsr_ls.h \ +../../krylov/HYPRE_krylov.h \ +../../krylov/HYPRE_lobpcg.h + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses +# libiconv (or the iconv built into libc) for the transcoding. See the libiconv +# documentation (see: http://www.gnu.org/software/libiconv) for the list of +# possible encodings. +# The default value is: UTF-8. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank the +# following patterns are tested:*.c, *.cc, *.cxx, *.cpp, *.c++, *.java, *.ii, +# *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, *.hh, *.hxx, *.hpp, +# *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, *.m, *.markdown, +# *.md, *.mm, *.dox, *.py, *.f90, *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, +# *.qsf, *.as and *.js. + +FILE_PATTERNS = HYPRE*.h + +# The RECURSIVE tag can be used to specify whether or not subdirectories should +# be searched for input files as well. +# The default value is: NO. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. +# The default value is: NO. + +EXCLUDE_SYMLINKS = YES + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test +# +# Note that the wildcards are matched against the file with absolute path, so to +# exclude all test directories use the pattern */test/* + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or directories +# that contain example code fragments that are included (see the \include +# command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and +# *.h) to filter out the source-files in the directories. If left blank all +# files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude commands +# irrespective of the value of the RECURSIVE tag. +# The default value is: NO. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or directories +# that contain images that are to be included in the documentation (see the +# \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command: +# +# +# +# where is the value of the INPUT_FILTER tag, and is the +# name of an input file. Doxygen will then use the output that the filter +# program writes to standard output. If FILTER_PATTERNS is specified, this tag +# will be ignored. +# +# Note that the filter must not add or remove lines; it is applied before the +# code is scanned, but not when the output code is generated. If lines are added +# or removed, the anchors will not be placed correctly. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: pattern=filter +# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how +# filters are used. If the FILTER_PATTERNS tag is empty or if none of the +# patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER ) will also be used to filter the input files that are used for +# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). +# The default value is: NO. + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and +# it is also possible to disable source filtering for a specific pattern using +# *.ext= (so without naming a filter). +# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. + +FILTER_SOURCE_PATTERNS = + +# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that +# is part of the input, its contents will be placed on the main page +# (index.html). This can be useful if you have a project on for instance GitHub +# and want to reuse the introduction page also for the doxygen output. + +USE_MDFILE_AS_MAINPAGE = + +#--------------------------------------------------------------------------- +# Configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will be +# generated. Documented entities will be cross-referenced with these sources. +# +# Note: To get rid of all source code in the generated output, make sure that +# also VERBATIM_HEADERS is set to NO. +# The default value is: NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body of functions, +# classes and enums directly into the documentation. +# The default value is: NO. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any +# special comment blocks from generated source code fragments. Normal C, C++ and +# Fortran comments will always remain visible. +# The default value is: YES. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES then for each documented +# function all documented functions referencing it will be listed. +# The default value is: NO. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES then for each documented function +# all documented entities called/used by that function will be listed. +# The default value is: NO. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set +# to YES, then the hyperlinks from functions in REFERENCES_RELATION and +# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will +# link to the documentation. +# The default value is: YES. + +REFERENCES_LINK_SOURCE = NO + +# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the +# source code will show a tooltip with additional information such as prototype, +# brief description and links to the definition and documentation. Since this +# will make the HTML file larger and loading of large files a bit slower, you +# can opt to disable this feature. +# The default value is: YES. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +SOURCE_TOOLTIPS = YES + +# If the USE_HTAGS tag is set to YES then the references to source code will +# point to the HTML generated by the htags(1) tool instead of doxygen built-in +# source browser. The htags tool is part of GNU's global source tagging system +# (see http://www.gnu.org/software/global/global.html). You will need version +# 4.8.6 or higher. +# +# To use it do the following: +# - Install the latest version of global +# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Make sure the INPUT points to the root of the source tree +# - Run doxygen as normal +# +# Doxygen will invoke htags (and that will in turn invoke gtags), so these +# tools must be available from the command line (i.e. in the search path). +# +# The result: instead of the source browser generated by doxygen, the links to +# source code will now point to the output of htags. +# The default value is: NO. +# This tag requires that the tag SOURCE_BROWSER is set to YES. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a +# verbatim copy of the header file for each class for which an include is +# specified. Set to NO to disable this. +# See also: Section \class. +# The default value is: YES. + +VERBATIM_HEADERS = NO + +#--------------------------------------------------------------------------- +# Configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all +# compounds will be generated. Enable this if the project contains a lot of +# classes, structs, unions or interfaces. +# The default value is: YES. + +ALPHABETICAL_INDEX = YES + +# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in +# which the alphabetical index list will be split. +# Minimum value: 1, maximum value: 20, default value: 5. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all classes will +# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag +# can be used to specify a prefix (or a list of prefixes) that should be ignored +# while generating the index headers. +# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES doxygen will generate HTML output +# The default value is: YES. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a +# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of +# it. +# The default directory is: html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each +# generated HTML page (for example: .htm, .php, .asp). +# The default value is: .html. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a user-defined HTML header file for +# each generated HTML page. If the tag is left blank doxygen will generate a +# standard header. +# +# To get valid HTML the header file that includes any scripts and style sheets +# that doxygen needs, which is dependent on the configuration options used (e.g. +# the setting GENERATE_TREEVIEW). It is highly recommended to start with a +# default header using +# doxygen -w html new_header.html new_footer.html new_stylesheet.css +# YourConfigFile +# and then modify the file new_header.html. See also section "Doxygen usage" +# for information on how to generate the default header that doxygen normally +# uses. +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. For a description +# of the possible markers and block names see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each +# generated HTML page. If the tag is left blank doxygen will generate a standard +# footer. See HTML_HEADER for more information on how to generate a default +# footer and what special commands can be used inside the footer. See also +# section "Doxygen usage" for information on how to generate the default footer +# that doxygen normally uses. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style +# sheet that is used by each HTML page. It can be used to fine-tune the look of +# the HTML output. If left blank doxygen will generate a default style sheet. +# See also section "Doxygen usage" for information on how to generate the style +# sheet that doxygen normally uses. +# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as +# it is more robust and this tag (HTML_STYLESHEET) will in the future become +# obsolete. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_STYLESHEET = + +# The HTML_EXTRA_STYLESHEET tag can be used to specify an additional user- +# defined cascading style sheet that is included after the standard style sheets +# created by doxygen. Using this option one can overrule certain style aspects. +# This is preferred over using HTML_STYLESHEET since it does not replace the +# standard style sheet and is therefor more robust against future updates. +# Doxygen will copy the style sheet file to the output directory. For an example +# see the documentation. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that the +# files will be copied as-is; there are no commands or markers available. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen +# will adjust the colors in the stylesheet and background images according to +# this color. Hue is specified as an angle on a colorwheel, see +# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 +# purple, and 360 is red again. +# Minimum value: 0, maximum value: 359, default value: 220. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors +# in the HTML output. For a value of 0 the output will use grayscales only. A +# value of 255 will produce the most vivid colors. +# Minimum value: 0, maximum value: 255, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the +# luminance component of the colors in the HTML output. Values below 100 +# gradually make the output lighter, whereas values above 100 make the output +# darker. The value divided by 100 is the actual gamma applied, so 80 represents +# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not +# change the gamma. +# Minimum value: 40, maximum value: 240, default value: 80. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting this +# to NO can help when comparing the output of multiple runs. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_TIMESTAMP = NO + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries +# shown in the various tree structured indices initially; the user can expand +# and collapse entries dynamically later on. Doxygen will expand the tree to +# such a level that at most the specified number of entries are visible (unless +# a fully collapsed tree already exceeds this amount). So setting the number of +# entries 1 will produce a full collapsed tree by default. 0 is a special value +# representing an infinite number of entries and will result in a full expanded +# tree by default. +# Minimum value: 0, maximum value: 9999, default value: 100. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files will be +# generated that can be used as input for Apple's Xcode 3 integrated development +# environment (see: http://developer.apple.com/tools/xcode/), introduced with +# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a +# Makefile in the HTML output directory. Running make will produce the docset in +# that directory and running make install will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at +# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_DOCSET = NO + +# This tag determines the name of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# The default value is: Doxygen generated docs. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# This tag specifies a string that should uniquely identify the documentation +# set bundle. This should be a reverse domain-name style string, e.g. +# com.mycompany.MyDocSet. Doxygen will append .docset to the name. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. +# The default value is: org.doxygen.Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. +# The default value is: Publisher. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three +# additional HTML index files: index.hhp, index.hhc, and index.hhk. The +# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop +# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on +# Windows. +# +# The HTML Help Workshop contains a compiler that can convert all HTML output +# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML +# files are now used as the Windows 98 help format, and will replace the old +# Windows help format (.hlp) on all Windows platforms in the future. Compressed +# HTML files also contain an index, a table of contents, and you can search for +# words in the documentation. The HTML workshop also contains a viewer for +# compressed HTML files. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_HTMLHELP = NO + +# The CHM_FILE tag can be used to specify the file name of the resulting .chm +# file. You can add a path in front of the file if the result should not be +# written to the html output directory. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_FILE = + +# The HHC_LOCATION tag can be used to specify the location (absolute path +# including file name) of the HTML help compiler ( hhc.exe). If non-empty +# doxygen will try to run the HTML help compiler on the generated index.hhp. +# The file has to be specified with full path. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +HHC_LOCATION = + +# The GENERATE_CHI flag controls if a separate .chi index file is generated ( +# YES) or that it should be included in the master .chm file ( NO). +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +GENERATE_CHI = NO + +# The CHM_INDEX_ENCODING is used to encode HtmlHelp index ( hhk), content ( hhc) +# and project file content. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +CHM_INDEX_ENCODING = + +# The BINARY_TOC flag controls whether a binary table of contents is generated ( +# YES) or a normal table of contents ( NO) in the .chm file. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members to +# the table of contents of the HTML help documentation and to the tree view. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTMLHELP is set to YES. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that +# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help +# (.qch) of the generated HTML documentation. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify +# the file name of the resulting .qch file. The path specified is relative to +# the HTML output folder. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help +# Project output. For more information please see Qt Help Project / Namespace +# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt +# Help Project output. For more information please see Qt Help Project / Virtual +# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- +# folders). +# The default value is: doc. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_VIRTUAL_FOLDER = doc + +# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom +# filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see Qt Help Project / Custom +# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- +# filters). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's filter section matches. Qt Help Project / Filter Attributes (see: +# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHP_SECT_FILTER_ATTRS = + +# The QHG_LOCATION tag can be used to specify the location of Qt's +# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the +# generated .qhp file. +# This tag requires that the tag GENERATE_QHP is set to YES. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be +# generated, together with the HTML files, they form an Eclipse help plugin. To +# install this plugin and make it available under the help contents menu in +# Eclipse, the contents of the directory containing the HTML and XML files needs +# to be copied into the plugins directory of eclipse. The name of the directory +# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. +# After copying Eclipse needs to be restarted before the help appears. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the Eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have this +# name. Each documentation set should have its own identifier. +# The default value is: org.doxygen.Project. +# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# If you want full control over the layout of the generated HTML pages it might +# be necessary to disable the index and replace it with your own. The +# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top +# of each HTML page. A value of NO enables the index and the value YES disables +# it. Since the tabs in the index contain the same information as the navigation +# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. If the tag +# value is set to YES, a side panel will be generated containing a tree-like +# index structure (just like the one that is generated for HTML Help). For this +# to work a browser that supports JavaScript, DHTML, CSS and frames is required +# (i.e. any modern browser). Windows users are probably better off using the +# HTML help feature. Via custom stylesheets (see HTML_EXTRA_STYLESHEET) one can +# further fine-tune the look of the index. As an example, the default style +# sheet generated by doxygen has an example that shows how to put an image at +# the root of the tree instead of the PROJECT_NAME. Since the tree basically has +# the same information as the tab index, you could consider setting +# DISABLE_INDEX to YES when enabling this option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that +# doxygen will group on one line in the generated HTML documentation. +# +# Note that a value of 0 will completely suppress the enum values from appearing +# in the overview section. +# Minimum value: 0, maximum value: 20, default value: 4. +# This tag requires that the tag GENERATE_HTML is set to YES. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used +# to set the initial width (in pixels) of the frame in which the tree is shown. +# Minimum value: 0, maximum value: 1500, default value: 250. +# This tag requires that the tag GENERATE_HTML is set to YES. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open links to +# external symbols imported via tag files in a separate window. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of LaTeX formulas included as images in +# the HTML documentation. When you change the font size after a successful +# doxygen run you need to manually remove any form_*.png images from the HTML +# output directory to force them to be regenerated. +# Minimum value: 8, maximum value: 50, default value: 10. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are not +# supported properly for IE 6.0, but are supported on all modern browsers. +# +# Note that when changing this option you need to delete any form_*.png files in +# the HTML output directory before the changes have effect. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see +# http://www.mathjax.org) which uses client side Javascript for the rendering +# instead of using prerendered bitmaps. Use this if you do not have LaTeX +# installed or if you want to formulas look prettier in the HTML output. When +# enabled you may also need to install MathJax separately and configure the path +# to it using the MATHJAX_RELPATH option. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +USE_MATHJAX = NO + +# When MathJax is enabled you can set the default output format to be used for +# the MathJax output. See the MathJax site (see: +# http://docs.mathjax.org/en/latest/output.html) for more details. +# Possible values are: HTML-CSS (which is slower, but has the best +# compatibility), NativeMML (i.e. MathML) and SVG. +# The default value is: HTML-CSS. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_FORMAT = HTML-CSS + +# When MathJax is enabled you need to specify the location relative to the HTML +# output directory using the MATHJAX_RELPATH option. The destination directory +# should contain the MathJax.js script. For instance, if the mathjax directory +# is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax +# Content Delivery Network so you can quickly see the result without installing +# MathJax. However, it is strongly recommended to install a local copy of +# MathJax from http://www.mathjax.org before deployment. +# The default value is: http://cdn.mathjax.org/mathjax/latest. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax +# extension names that should be enabled during MathJax rendering. For example +# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_EXTENSIONS = + +# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces +# of code that will be used on startup of the MathJax code. See the MathJax site +# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# example see the documentation. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_CODEFILE = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for +# the HTML output. The underlying search engine uses javascript and DHTML and +# should work on any modern browser. Note that when using HTML help +# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) +# there is already a search function so this one should typically be disabled. +# For large projects the javascript based search engine can be slow, then +# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to +# search using the keyboard; to jump to the search box use + S +# (what the is depends on the OS and browser, but it is typically +# , /